diff --git a/lib/feed/conformance.sh b/lib/feed/conformance.sh index 6589a916..4560ec45 100755 --- a/lib/feed/conformance.sh +++ b/lib/feed/conformance.sh @@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -SUITES=(basic) +SUITES=(basic fanout) OUT_JSON="lib/feed/scoreboard.json" OUT_MD="lib/feed/scoreboard.md" @@ -31,6 +31,8 @@ run_suite() { (load "lib/feed/normalize.sx") (load "lib/feed/stream.sx") (load "lib/feed/api.sx") +(load "lib/feed/fanout.sx") +(load "lib/feed/dedupe.sx") (epoch 2) (eval "(define feed-test-pass 0)") (eval "(define feed-test-fail 0)") diff --git a/lib/feed/dedupe.sx b/lib/feed/dedupe.sx new file mode 100644 index 00000000..fb850fb1 --- /dev/null +++ b/lib/feed/dedupe.sx @@ -0,0 +1,56 @@ +; feed/dedupe — collapse duplicate items, keeping first occurrence per key. +; Each verb may want its own key (see briefing): "alice posted X" keys on +; (actor verb object) — distinct per actor; "alice liked X / bob liked X" +; collapse on (verb object) so the cross-actor likes fold into one. +; +; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx +; (feed/-elem? lives in fanout.sx). + +; generic: dedupe a stream by key-fn, first occurrence wins (stable) +(define + feed/-dedup-by + (fn + (items key-fn) + (get + (reduce + (fn + (st x) + (let + ((k (key-fn x))) + (if (feed/-elem? k (get st :seen)) st {:seen (append (get st :seen) (list k)) :out (append (get st :out) (list x))}))) + {:seen (list) :out (list)} + items) + :out))) + +(define + feed/dedupe + (fn + (stream key-fn) + (feed/stream (feed/-dedup-by (feed/items stream) key-fn)))) + +; --- keys ------------------------------------------------------------------- + +(define + feed/activity-key + (fn (a) (list (get a :actor) (get a :verb) (get a :object)))) + +; collapse cross-actor duplicates of the same verb+object (e.g. likes) +(define feed/collapse-key (fn (a) (list (get a :verb) (get a :object)))) + +; per-receiver inbox key — one inbox event per (receiver, actor, verb, object) +(define + feed/event-key + (fn + (ev) + (let + ((a (get ev :activity))) + (list (get ev :to) (get a :actor) (get a :verb) (get a :object))))) + +; --- ready-made dedupers ---------------------------------------------------- + +(define feed/dedupe-activities (fn (s) (feed/dedupe s feed/activity-key))) + +(define feed/dedupe-collapse (fn (s) (feed/dedupe s feed/collapse-key))) + +; dedupe an inbox: at most one event per receiver per (actor verb object) +(define feed/dedupe-inbox (fn (inbox) (feed/dedupe inbox feed/event-key))) diff --git a/lib/feed/fanout.sx b/lib/feed/fanout.sx new file mode 100644 index 00000000..4d816e51 --- /dev/null +++ b/lib/feed/fanout.sx @@ -0,0 +1,114 @@ +; feed/fanout — THE SHOWCASE. Fan activities out to followers via the APL outer +; product (∘.×). activities ∘.× audience → an (activity × follower) matrix of +; inbox events; flatten to a vector; guard-keep only real follow edges. +; +; Requires: lib/apl/runtime.sx, lib/feed/normalize.sx, lib/feed/stream.sx. +; +; NOTE: apl-outer's combiner result is run through (if (scalar? r) (disclose r) r). +; A bare dict counts as a scalar (shape ()) and disclose nils it — so the combiner +; must (enclose ...) its event dict; apl-outer then discloses it back intact. + +; --- graph: {followee -> (list of followers)} ------------------------------- + +(define feed/followers (fn (graph user) (get graph user (list)))) + +; build a graph from (follower followee) edges: "follower follows followee" +(define + feed/follow-graph + (fn + (edges) + (reduce + (fn + (g e) + (let + ((follower (first e)) (followee (nth e 1))) + (assoc + g + followee + (append (feed/followers g followee) (list follower))))) + {} + edges))) + +; --- helpers ---------------------------------------------------------------- + +; unwrap an apl-scalar (has :ravel) back to its value; pass activities through +(define + feed/-val + (fn + (x) + (if (and (= (type-of x) "dict") (has-key? x :ravel)) (disclose x) x))) + +(define feed/-elem? (fn (x lst) (some (fn (y) (equal? x y)) lst))) + +(define + feed/-distinct + (fn + (lst) + (if + (= (len lst) 0) + (list) + (get (apl-unique (make-array (list (len lst)) lst)) :ravel)))) + +; rank-2 matrix -> rank-1 stream of its ravel +(define feed/-flatten (fn (arr) (feed/stream (get arr :ravel)))) + +; distinct receivers across the whole graph, sorted for determinism +; (dict key order is unspecified, so sort to pin audience/recipient ordering) +(define + feed/audience + (fn + (graph) + (sort + (feed/-distinct + (reduce + (fn (acc k) (append acc (feed/followers graph k))) + (list) + (keys graph)))))) + +; --- the outer product ------------------------------------------------------ + +; one (activity, follower) inbox event, enclosed so apl-outer keeps the dict +(define feed/-mk-event (fn (a f) (enclose {:activity (feed/-val a) :to (feed/-val f)}))) + +; keep events where :to actually follows the activity's actor +(define + feed/-edge? + (fn + (graph) + (fn + (ev) + (feed/-elem? + (get ev :to) + (feed/followers graph (get (get ev :activity) :actor)))))) + +; fanout — activities ∘.× audience, flatten, guard-keep real edges +(define + feed/fanout + (fn + (stream graph) + (let + ((matrix (apl-outer feed/-mk-event stream (feed/stream (feed/audience graph))))) + (feed/filter (feed/-flatten matrix) (feed/-edge? graph))))) + +; --- inbox queries ---------------------------------------------------------- + +(define + feed/inbox-for + (fn + (inbox user) + (feed/filter inbox (fn (ev) (equal? (get ev :to) user))))) + +(define + feed/recipients + (fn + (inbox) + (feed/-distinct (map (fn (ev) (get ev :to)) (feed/items inbox))))) + +; the activities (unwrapped) destined for a user +(define + feed/inbox-activities + (fn + (inbox user) + (map + (fn (ev) (get ev :activity)) + (feed/items (feed/inbox-for inbox user))))) diff --git a/lib/feed/scoreboard.json b/lib/feed/scoreboard.json index a9a81bff..d72fe5d9 100644 --- a/lib/feed/scoreboard.json +++ b/lib/feed/scoreboard.json @@ -1,8 +1,9 @@ { "suites": { - "basic": {"pass": 30, "fail": 0} + "basic": {"pass": 30, "fail": 0}, + "fanout": {"pass": 29, "fail": 0} }, - "total_pass": 30, + "total_pass": 59, "total_fail": 0, - "total": 30 + "total": 59 } diff --git a/lib/feed/scoreboard.md b/lib/feed/scoreboard.md index f517e734..c264255c 100644 --- a/lib/feed/scoreboard.md +++ b/lib/feed/scoreboard.md @@ -5,4 +5,5 @@ _Generated by `lib/feed/conformance.sh`_ | Suite | Pass | Fail | Total | |-------|-----:|-----:|------:| | basic | 30 | 0 | 30 | -| **Total** | **30** | **0** | **30** | +| fanout | 29 | 0 | 29 | +| **Total** | **59** | **0** | **59** | diff --git a/lib/feed/tests/fanout.sx b/lib/feed/tests/fanout.sx new file mode 100644 index 00000000..84bd7988 --- /dev/null +++ b/lib/feed/tests/fanout.sx @@ -0,0 +1,187 @@ +; Phase 2 — fanout via outer product + dedupe. (feed-test name got expected) + +; ---------- graph ---------- + +; edges: (follower followee). bob,carol follow alice; carol,dave follow bob. +(define + G + (feed/follow-graph + (list + (list "bob" "alice") + (list "carol" "alice") + (list "carol" "bob") + (list "dave" "bob")))) + +(feed-test "followers alice" (feed/followers G "alice") (list "bob" "carol")) +(feed-test "followers bob" (feed/followers G "bob") (list "carol" "dave")) +(feed-test "followers unknown" (feed/followers G "zzz") (list)) +(feed-test "audience distinct" (feed/audience G) (list "bob" "carol" "dave")) + +; ---------- fanout ---------- + +(define + S + (feed/stream + (list + (feed/activity "alice" "post" "p1" 10 (list)) + (feed/activity "alice" "post" "p2" 20 (list)) + (feed/activity "bob" "like" "p1" 30 (list))))) + +(define IB (feed/fanout S G)) + +(feed-test "fanout total edges" (feed/count IB) 6) +(feed-test + "inbox bob count" + (feed/count (feed/inbox-for IB "bob")) + 2) +(feed-test + "inbox carol count" + (feed/count (feed/inbox-for IB "carol")) + 3) +(feed-test + "inbox dave count" + (feed/count (feed/inbox-for IB "dave")) + 1) +(feed-test + "inbox alice (follows none)" + (feed/count (feed/inbox-for IB "alice")) + 0) +(feed-test + "recipients order" + (feed/recipients IB) + (list "bob" "carol" "dave")) +(feed-test + "bob inbox objects" + (map (fn (a) (get a :object)) (feed/inbox-activities IB "bob")) + (list "p1" "p2")) +(feed-test + "dave inbox objects" + (map (fn (a) (get a :object)) (feed/inbox-activities IB "dave")) + (list "p1")) +(feed-test + "dave inbox verb" + (map (fn (a) (get a :verb)) (feed/inbox-activities IB "dave")) + (list "like")) + +; empty graph → no audience → no edges +(feed-test + "empty graph fanout" + (feed/count (feed/fanout S {})) + 0) + +; actor nobody follows produces no edges +(define + Sghost + (feed/stream (list (feed/activity "ghost" "post" "g1" 5 (list))))) +(feed-test + "unfollowed actor fanout" + (feed/count (feed/fanout Sghost G)) + 0) + +; ---------- high fanout (popular actor) ---------- + +(define + Gstar + (feed/follow-graph + (list + (list "u1" "star") + (list "u2" "star") + (list "u3" "star") + (list "u4" "star") + (list "u5" "star")))) +(define + Sstar + (feed/stream (list (feed/activity "star" "post" "s1" 1 (list))))) +(feed-test + "star fanout count" + (feed/count (feed/fanout Sstar Gstar)) + 5) +(feed-test "star audience size" (len (feed/audience Gstar)) 5) + +; ---------- mutual follow ---------- + +(define Gmut (feed/follow-graph (list (list "a" "b") (list "b" "a")))) +(define + Smut + (feed/stream + (list + (feed/activity "a" "post" "pa" 1 (list)) + (feed/activity "b" "post" "pb" 2 (list))))) +(define IBmut (feed/fanout Smut Gmut)) +(feed-test "mutual total" (feed/count IBmut) 2) +(feed-test + "mutual a gets pb" + (map (fn (x) (get x :object)) (feed/inbox-activities IBmut "a")) + (list "pb")) +(feed-test + "mutual b gets pa" + (map (fn (x) (get x :object)) (feed/inbox-activities IBmut "b")) + (list "pa")) + +; ---------- dedupe ---------- + +(define + Sdup2 + (feed/stream + (list + (feed/activity "alice" "post" "p1" 1 (list)) + (feed/activity "alice" "post" "p1" 9 (list)) + (feed/activity "alice" "post" "p2" 2 (list))))) +(feed-test + "dedupe-activities collapses dup" + (feed/count (feed/dedupe-activities Sdup2)) + 2) +(feed-test + "dedupe-activities keeps distinct" + (map + (fn (a) (get a :object)) + (feed/items (feed/dedupe-activities Sdup2))) + (list "p1" "p2")) + +(define + Slikes + (feed/stream + (list + (feed/activity "alice" "like" "X" 1 (list)) + (feed/activity "bob" "like" "X" 2 (list)) + (feed/activity "carol" "like" "Y" 3 (list))))) +(feed-test + "collapse cross-actor likes" + (feed/count (feed/dedupe-collapse Slikes)) + 2) +(feed-test + "collapse keeps distinct objects" + (map + (fn (a) (get a :object)) + (feed/items (feed/dedupe-collapse Slikes))) + (list "X" "Y")) + +(feed-test + "activity-key shape" + (feed/activity-key (feed/activity "a" "post" "o" 0 (list))) + (list "a" "post" "o")) +(feed-test + "collapse-key shape" + (feed/collapse-key (feed/activity "a" "like" "o" 0 (list))) + (list "like" "o")) + +; cross-post: alice posts p1 twice → bob's inbox has it twice → dedupe-inbox → once +(define + Scross + (feed/stream + (list + (feed/activity "alice" "post" "p1" 1 (list)) + (feed/activity "alice" "post" "p1" 5 (list))))) +(define IBcross (feed/fanout Scross G)) +(feed-test + "cross-post raw bob count" + (feed/count (feed/inbox-for IBcross "bob")) + 2) +(feed-test + "cross-post deduped bob count" + (feed/count (feed/inbox-for (feed/dedupe-inbox IBcross) "bob")) + 1) +(feed-test + "dedupe-inbox keeps distinct receivers" + (feed/count (feed/dedupe-inbox IBcross)) + 2) diff --git a/plans/feed-on-sx.md b/plans/feed-on-sx.md index 1dbdeb0a..665f70a1 100644 --- a/plans/feed-on-sx.md +++ b/plans/feed-on-sx.md @@ -14,7 +14,7 @@ APL, ACL visibility filtering via `lib/acl/`, federation via fed-sx. ## Status (rolling) -`bash lib/feed/conformance.sh` → **30/30** (Phase 1 complete) +`bash lib/feed/conformance.sh` → **59/59** (Phases 1–2 complete) ## Ground rules @@ -69,13 +69,15 @@ lib/feed/api.sx lib/feed/fed.sx ## Phase 2 — Fanout via outer product -- [ ] follower graph: `followers user → vector of user ids` -- [ ] fanout: activities `∘.×` followers → matrix `(activity, follower)` pairs -- [ ] flatten to inbox events vector -- [ ] dedupe — group by `(actor, verb, object)` collapse to one inbox event per - receiver -- [ ] `lib/feed/tests/fanout.sx` — 20+ cases: small graph, mutual follow, popular - actor (high-fanout), cross-post dedupe +- [x] follower graph: `followers user → vector of user ids` (`feed/follow-graph`, + `feed/followers`; graph = `{followee -> (followers)}` dict) +- [x] fanout: activities `∘.×` audience → matrix via `apl-outer feed/-mk-event` +- [x] flatten to inbox events vector (`feed/-flatten` rank-2 → rank-1) +- [x] dedupe — `feed/dedupe-inbox` by `(to, actor, verb, object)`; also + `feed/dedupe-activities` `(actor verb object)` and `feed/dedupe-collapse` + `(verb object)` for cross-actor likes +- [x] `lib/feed/tests/fanout.sx` — 29 cases: small graph, mutual follow, star + (high-fanout), empty graph, unfollowed actor, cross-post dedupe ## Phase 3 — Aggregation + ranking @@ -104,8 +106,14 @@ lib/feed/api.sx lib/feed/fed.sx by-actor/verb/object/since predicates), `api.sx` (mutable log: post/all/reset!/size). Substrate: `apl-compress`, `apl-grade-up`, `apl-take`, `apl-reverse`, `make-array`. Grade-up returns 1-based indices (⎕IO=1), is stable on ties → deterministic sort. - -## Blockers +- **Phase 2 done (59/59 total).** `fanout.sx` (graph + `apl-outer` showcase), + `dedupe.sx` (per-key dedupe, first-wins stable). Key APL gotcha: `scalar?` is + true for ANY dict and `disclose` nils a non-array dict, so an apl-outer combiner + MUST `enclose` its event dict — apl-outer discloses it back intact. `apl-unique` + preserves first-occurrence order; dict `keys` order is NOT stable, so + `feed/audience` sorts (else recipient ordering flakes). `apl-compress` needs a + rank-1 array, so the (activity×follower) matrix is flattened to its ravel before + the edge-guard filter. (none)