Compare commits
20 Commits
loops/mod
...
loops/sear
| Author | SHA1 | Date | |
|---|---|---|---|
| 9d3b775b25 | |||
| 77ab827b91 | |||
| a3f9d4f6c9 | |||
| 4c84decc01 | |||
| 0f0da0319c | |||
| b8cf3eb1b8 | |||
| e2de5a4675 | |||
| 1e4cf25015 | |||
| 9c4a5d1913 | |||
| f91ac82434 | |||
| 5136249ae5 | |||
| 6fc61147a8 | |||
| 0122c41ecb | |||
| 58656b03e4 | |||
| b0feb7b01b | |||
| a979297959 | |||
| 37226cf6eb | |||
| 50a7f31a39 | |||
| 915f51b2b6 | |||
| e7501bdf8f |
38
lib/feed/acl.sx
Normal file
38
lib/feed/acl.sx
Normal file
@@ -0,0 +1,38 @@
|
||||
; feed/acl — per-viewer visibility filtering. The same candidate stream yields
|
||||
; different timelines for different viewers, so ACL is applied per request and
|
||||
; pre-ACL timelines are never cached.
|
||||
;
|
||||
; permit? is injected: (permit? viewer activity) -> bool. Wire a real acl-sx
|
||||
; predicate here; feed/permit-acl? is a self-contained default that reads an
|
||||
; optional :visible-to allowlist on the activity.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-elem?), lib/feed/rank.sx (feed/top).
|
||||
|
||||
; default permit: actor always sees own activity; absent/nil :visible-to is
|
||||
; public; otherwise viewer must be in the allowlist.
|
||||
(define
|
||||
feed/permit-acl?
|
||||
(fn
|
||||
(viewer a)
|
||||
(or
|
||||
(equal? viewer (get a :actor))
|
||||
(let
|
||||
((allowed (get a :visible-to nil)))
|
||||
(if (= allowed nil) true (feed/-elem? viewer allowed))))))
|
||||
|
||||
(define feed/permit-public? (fn (viewer a) true))
|
||||
|
||||
; filter a stream to what viewer may read
|
||||
(define
|
||||
feed/visible
|
||||
(fn
|
||||
(stream viewer permit?)
|
||||
(feed/filter stream (fn (a) (permit? viewer a)))))
|
||||
|
||||
; the capstone: candidate stream -> ACL for viewer -> rank -> top-N
|
||||
(define
|
||||
feed/timeline
|
||||
(fn
|
||||
(stream viewer permit? score-fn n)
|
||||
(feed/top (feed/visible stream viewer permit?) score-fn n)))
|
||||
62
lib/feed/aggregate.sx
Normal file
62
lib/feed/aggregate.sx
Normal file
@@ -0,0 +1,62 @@
|
||||
; feed/aggregate — group-by / counting via key-reduce. Keys must be strings
|
||||
; (dict keys), so composite keys (actor, day) are joined into one string.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx.
|
||||
|
||||
; group activities into a dict: key-string -> (list of activities), order-preserving
|
||||
(define
|
||||
feed/group-by
|
||||
(fn
|
||||
(stream key-fn)
|
||||
(reduce
|
||||
(fn
|
||||
(g a)
|
||||
(let
|
||||
((k (key-fn a)))
|
||||
(assoc g k (append (get g k (list)) (list a)))))
|
||||
{}
|
||||
(feed/items stream))))
|
||||
|
||||
; key-string -> count
|
||||
(define
|
||||
feed/group-count
|
||||
(fn
|
||||
(stream key-fn)
|
||||
(reduce
|
||||
(fn
|
||||
(g a)
|
||||
(let
|
||||
((k (key-fn a)))
|
||||
(assoc g k (+ (get g k 0) 1))))
|
||||
{}
|
||||
(feed/items stream))))
|
||||
|
||||
; --- composite keys ---------------------------------------------------------
|
||||
|
||||
(define feed/day (fn (at window) (floor (/ at window))))
|
||||
|
||||
; (actor, day-bucket) -> "actor#day"
|
||||
(define
|
||||
feed/actor-day-key
|
||||
(fn
|
||||
(window)
|
||||
(fn
|
||||
(a)
|
||||
(string-append
|
||||
(get a :actor)
|
||||
"#"
|
||||
(number->string (feed/day (get a :at) window))))))
|
||||
|
||||
(define
|
||||
feed/by-actor-day
|
||||
(fn (stream window) (feed/group-count stream (feed/actor-day-key window))))
|
||||
|
||||
; per-actor activity counts
|
||||
(define
|
||||
feed/actor-counts
|
||||
(fn (stream) (feed/group-count stream feed/actor)))
|
||||
|
||||
; per-object activity counts (engagement)
|
||||
(define
|
||||
feed/object-counts
|
||||
(fn (stream) (feed/group-count stream feed/object)))
|
||||
24
lib/feed/api.sx
Normal file
24
lib/feed/api.sx
Normal file
@@ -0,0 +1,24 @@
|
||||
; feed/api — ergonomic API over the stream layer for non-APL callers.
|
||||
; A single mutable activity log; post appends, all returns it as a stream.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx (loaded by harness).
|
||||
|
||||
(define feed/-log (list))
|
||||
|
||||
; post — normalize then append. Returns the stored activity.
|
||||
(define
|
||||
feed/post
|
||||
(fn
|
||||
(raw)
|
||||
(let
|
||||
((a (feed/normalize raw)))
|
||||
(begin (set! feed/-log (append feed/-log (list a))) a))))
|
||||
|
||||
; all — the whole log as a stream (insertion order)
|
||||
(define feed/all (fn () (feed/stream feed/-log)))
|
||||
|
||||
; reset! — clear the log (test hygiene)
|
||||
(define feed/reset! (fn () (begin (set! feed/-log (list)) nil)))
|
||||
|
||||
; size — number of posted activities
|
||||
(define feed/size (fn () (len feed/-log)))
|
||||
125
lib/feed/conformance.sh
Executable file
125
lib/feed/conformance.sh
Executable file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env bash
|
||||
# lib/feed/conformance.sh — run feed test suites, emit scoreboard.json + scoreboard.md.
|
||||
|
||||
set -uo pipefail
|
||||
cd "$(git rev-parse --show-toplevel)"
|
||||
|
||||
SX_SERVER="${SX_SERVER:-/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
SX_SERVER="hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||
fi
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
echo "ERROR: sx_server.exe not found." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SUITES=(basic fanout rank integration content notify home dedupe trending mute page thread)
|
||||
|
||||
OUT_JSON="lib/feed/scoreboard.json"
|
||||
OUT_MD="lib/feed/scoreboard.md"
|
||||
|
||||
run_suite() {
|
||||
local suite=$1
|
||||
local file="lib/feed/tests/${suite}.sx"
|
||||
local TMP
|
||||
TMP=$(mktemp)
|
||||
cat > "$TMP" << EPOCHS
|
||||
(epoch 1)
|
||||
(load "spec/stdlib.sx")
|
||||
(load "lib/r7rs.sx")
|
||||
(load "lib/apl/runtime.sx")
|
||||
(load "lib/feed/normalize.sx")
|
||||
(load "lib/feed/stream.sx")
|
||||
(load "lib/feed/api.sx")
|
||||
(load "lib/feed/fanout.sx")
|
||||
(load "lib/feed/dedupe.sx")
|
||||
(load "lib/feed/aggregate.sx")
|
||||
(load "lib/feed/rank.sx")
|
||||
(load "lib/feed/acl.sx")
|
||||
(load "lib/feed/fed.sx")
|
||||
(load "lib/feed/content.sx")
|
||||
(load "lib/feed/notify.sx")
|
||||
(load "lib/feed/home.sx")
|
||||
(load "lib/feed/trending.sx")
|
||||
(load "lib/feed/mute.sx")
|
||||
(load "lib/feed/page.sx")
|
||||
(load "lib/feed/thread.sx")
|
||||
(epoch 2)
|
||||
(eval "(define feed-test-pass 0)")
|
||||
(eval "(define feed-test-fail 0)")
|
||||
(eval "(define feed-test (fn (name got expected) (if (= got expected) (set! feed-test-pass (+ feed-test-pass 1)) (set! feed-test-fail (+ feed-test-fail 1)))))")
|
||||
(epoch 3)
|
||||
(load "${file}")
|
||||
(epoch 4)
|
||||
(eval "(list feed-test-pass feed-test-fail)")
|
||||
EPOCHS
|
||||
|
||||
local OUTPUT
|
||||
OUTPUT=$(timeout 300 "$SX_SERVER" < "$TMP" 2>/dev/null)
|
||||
rm -f "$TMP"
|
||||
|
||||
local LINE
|
||||
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 4 / {getline; print; exit}')
|
||||
if [ -z "$LINE" ]; then
|
||||
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 4 \([0-9]+ [0-9]+\)\)' | tail -1 \
|
||||
| sed -E 's/^\(ok 4 //; s/\)$//')
|
||||
fi
|
||||
|
||||
local P F
|
||||
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/')
|
||||
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/')
|
||||
P=${P:-0}
|
||||
F=${F:-0}
|
||||
echo "${P} ${F}"
|
||||
}
|
||||
|
||||
declare -A SUITE_PASS
|
||||
declare -A SUITE_FAIL
|
||||
TOTAL_PASS=0
|
||||
TOTAL_FAIL=0
|
||||
|
||||
echo "Running feed conformance suite..." >&2
|
||||
for s in "${SUITES[@]}"; do
|
||||
read -r p f < <(run_suite "$s")
|
||||
SUITE_PASS[$s]=$p
|
||||
SUITE_FAIL[$s]=$f
|
||||
TOTAL_PASS=$((TOTAL_PASS + p))
|
||||
TOTAL_FAIL=$((TOTAL_FAIL + f))
|
||||
printf " %-12s %d/%d\n" "$s" "$p" "$((p+f))" >&2
|
||||
done
|
||||
|
||||
# scoreboard.json
|
||||
{
|
||||
printf '{\n'
|
||||
printf ' "suites": {\n'
|
||||
first=1
|
||||
for s in "${SUITES[@]}"; do
|
||||
if [ $first -eq 0 ]; then printf ',\n'; fi
|
||||
printf ' "%s": {"pass": %d, "fail": %d}' "$s" "${SUITE_PASS[$s]}" "${SUITE_FAIL[$s]}"
|
||||
first=0
|
||||
done
|
||||
printf '\n },\n'
|
||||
printf ' "total_pass": %d,\n' "$TOTAL_PASS"
|
||||
printf ' "total_fail": %d,\n' "$TOTAL_FAIL"
|
||||
printf ' "total": %d\n' "$((TOTAL_PASS + TOTAL_FAIL))"
|
||||
printf '}\n'
|
||||
} > "$OUT_JSON"
|
||||
|
||||
# scoreboard.md
|
||||
{
|
||||
printf '# feed Conformance Scoreboard\n\n'
|
||||
printf '_Generated by `lib/feed/conformance.sh`_\n\n'
|
||||
printf '| Suite | Pass | Fail | Total |\n'
|
||||
printf '|-------|-----:|-----:|------:|\n'
|
||||
for s in "${SUITES[@]}"; do
|
||||
p=${SUITE_PASS[$s]}
|
||||
f=${SUITE_FAIL[$s]}
|
||||
printf '| %s | %d | %d | %d |\n' "$s" "$p" "$f" "$((p+f))"
|
||||
done
|
||||
printf '| **Total** | **%d** | **%d** | **%d** |\n' "$TOTAL_PASS" "$TOTAL_FAIL" "$((TOTAL_PASS + TOTAL_FAIL))"
|
||||
} > "$OUT_MD"
|
||||
|
||||
echo "Wrote $OUT_JSON and $OUT_MD" >&2
|
||||
echo "Total: $TOTAL_PASS pass, $TOTAL_FAIL fail" >&2
|
||||
|
||||
[ "$TOTAL_FAIL" -eq 0 ]
|
||||
68
lib/feed/content.sx
Normal file
68
lib/feed/content.sx
Normal file
@@ -0,0 +1,68 @@
|
||||
; feed/content — TF-IDF relevance over activity :tags. Rare tags carry more
|
||||
; signal, so an activity matching an uncommon tag ranks above one matching a
|
||||
; common tag. Composes with rank.sx: feed/tfidf-score is just another scorer.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-distinct), lib/feed/rank.sx (feed/rank).
|
||||
|
||||
; document frequency: tag -> number of activities whose :tags contain it
|
||||
; (a tag repeated within one activity counts once toward df)
|
||||
(define
|
||||
feed/tag-df
|
||||
(fn
|
||||
(stream)
|
||||
(reduce
|
||||
(fn
|
||||
(df a)
|
||||
(reduce
|
||||
(fn (d t) (assoc d t (+ (get d t 0) 1)))
|
||||
df
|
||||
(feed/-distinct (get a :tags))))
|
||||
{}
|
||||
(feed/items stream))))
|
||||
|
||||
; inverse document frequency: tag -> log(N / df)
|
||||
(define
|
||||
feed/tag-idf
|
||||
(fn
|
||||
(stream)
|
||||
(let
|
||||
((n (feed/count stream)) (df (feed/tag-df stream)))
|
||||
(reduce
|
||||
(fn (idf t) (assoc idf t (log (/ n (get df t)))))
|
||||
{}
|
||||
(keys df)))))
|
||||
|
||||
; term frequency within one activity: tag -> occurrence count
|
||||
(define
|
||||
feed/-tf
|
||||
(fn
|
||||
(a)
|
||||
(reduce
|
||||
(fn (tf t) (assoc tf t (+ (get tf t 0) 1)))
|
||||
{}
|
||||
(get a :tags))))
|
||||
|
||||
; relevance of an activity to a query (list of tags) given precomputed idf:
|
||||
; sum over query tags of tf(tag in activity) * idf(tag in corpus)
|
||||
(define
|
||||
feed/tfidf-score
|
||||
(fn
|
||||
(idf query)
|
||||
(fn
|
||||
(a)
|
||||
(let
|
||||
((tf (feed/-tf a)))
|
||||
(reduce
|
||||
(fn
|
||||
(acc t)
|
||||
(+ acc (* (get tf t 0) (get idf t 0))))
|
||||
0
|
||||
query)))))
|
||||
|
||||
; rank a stream by relevance to query tags (idf computed over the stream itself)
|
||||
(define
|
||||
feed/by-relevance
|
||||
(fn
|
||||
(stream query)
|
||||
(feed/rank stream (feed/tfidf-score (feed/tag-idf stream) query))))
|
||||
76
lib/feed/dedupe.sx
Normal file
76
lib/feed/dedupe.sx
Normal file
@@ -0,0 +1,76 @@
|
||||
; feed/dedupe — collapse duplicate items, keeping first occurrence per key.
|
||||
; Each verb may want its own key (see briefing): "alice posted X" keys on
|
||||
; (actor verb object) — distinct per actor; "alice liked X / bob liked X"
|
||||
; collapse on (verb object) so the cross-actor likes fold into one.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-elem? lives in fanout.sx).
|
||||
|
||||
; generic: dedupe a stream by key-fn, first occurrence wins (stable)
|
||||
(define
|
||||
feed/-dedup-by
|
||||
(fn
|
||||
(items key-fn)
|
||||
(get
|
||||
(reduce
|
||||
(fn
|
||||
(st x)
|
||||
(let
|
||||
((k (key-fn x)))
|
||||
(if (feed/-elem? k (get st :seen)) st {:seen (append (get st :seen) (list k)) :out (append (get st :out) (list x))})))
|
||||
{:seen (list) :out (list)}
|
||||
items)
|
||||
:out)))
|
||||
|
||||
(define
|
||||
feed/dedupe
|
||||
(fn
|
||||
(stream key-fn)
|
||||
(feed/stream (feed/-dedup-by (feed/items stream) key-fn))))
|
||||
|
||||
; --- keys -------------------------------------------------------------------
|
||||
|
||||
(define
|
||||
feed/activity-key
|
||||
(fn (a) (list (get a :actor) (get a :verb) (get a :object))))
|
||||
|
||||
; collapse cross-actor duplicates of the same verb+object (e.g. likes)
|
||||
(define feed/collapse-key (fn (a) (list (get a :verb) (get a :object))))
|
||||
|
||||
; per-receiver inbox key — one inbox event per (receiver, actor, verb, object)
|
||||
(define
|
||||
feed/event-key
|
||||
(fn
|
||||
(ev)
|
||||
(let
|
||||
((a (get ev :activity)))
|
||||
(list (get ev :to) (get a :actor) (get a :verb) (get a :object)))))
|
||||
|
||||
; verbs whose duplicates collapse across actors (reactions, not authorship).
|
||||
; rebindable: callers can (set! feed/collapse-verbs ...) to tune the policy.
|
||||
(define
|
||||
feed/collapse-verbs
|
||||
(list "like" "favourite" "follow" "boost" "repost"))
|
||||
|
||||
; per-verb key: collapse-verbs fold on (verb object); the rest key on
|
||||
; (actor verb object).
|
||||
(define
|
||||
feed/smart-key
|
||||
(fn
|
||||
(a)
|
||||
(if
|
||||
(feed/-elem? (get a :verb) feed/collapse-verbs)
|
||||
(feed/collapse-key a)
|
||||
(feed/activity-key a))))
|
||||
|
||||
; --- ready-made dedupers ----------------------------------------------------
|
||||
|
||||
(define feed/dedupe-activities (fn (s) (feed/dedupe s feed/activity-key)))
|
||||
|
||||
(define feed/dedupe-collapse (fn (s) (feed/dedupe s feed/collapse-key)))
|
||||
|
||||
; verb-aware: reactions collapse cross-actor, posts stay distinct per actor
|
||||
(define feed/dedupe-smart (fn (s) (feed/dedupe s feed/smart-key)))
|
||||
|
||||
; dedupe an inbox: at most one event per receiver per (actor verb object)
|
||||
(define feed/dedupe-inbox (fn (inbox) (feed/dedupe inbox feed/event-key)))
|
||||
114
lib/feed/fanout.sx
Normal file
114
lib/feed/fanout.sx
Normal file
@@ -0,0 +1,114 @@
|
||||
; feed/fanout — THE SHOWCASE. Fan activities out to followers via the APL outer
|
||||
; product (∘.×). activities ∘.× audience → an (activity × follower) matrix of
|
||||
; inbox events; flatten to a vector; guard-keep only real follow edges.
|
||||
;
|
||||
; Requires: lib/apl/runtime.sx, lib/feed/normalize.sx, lib/feed/stream.sx.
|
||||
;
|
||||
; NOTE: apl-outer's combiner result is run through (if (scalar? r) (disclose r) r).
|
||||
; A bare dict counts as a scalar (shape ()) and disclose nils it — so the combiner
|
||||
; must (enclose ...) its event dict; apl-outer then discloses it back intact.
|
||||
|
||||
; --- graph: {followee -> (list of followers)} -------------------------------
|
||||
|
||||
(define feed/followers (fn (graph user) (get graph user (list))))
|
||||
|
||||
; build a graph from (follower followee) edges: "follower follows followee"
|
||||
(define
|
||||
feed/follow-graph
|
||||
(fn
|
||||
(edges)
|
||||
(reduce
|
||||
(fn
|
||||
(g e)
|
||||
(let
|
||||
((follower (first e)) (followee (nth e 1)))
|
||||
(assoc
|
||||
g
|
||||
followee
|
||||
(append (feed/followers g followee) (list follower)))))
|
||||
{}
|
||||
edges)))
|
||||
|
||||
; --- helpers ----------------------------------------------------------------
|
||||
|
||||
; unwrap an apl-scalar (has :ravel) back to its value; pass activities through
|
||||
(define
|
||||
feed/-val
|
||||
(fn
|
||||
(x)
|
||||
(if (and (= (type-of x) "dict") (has-key? x :ravel)) (disclose x) x)))
|
||||
|
||||
(define feed/-elem? (fn (x lst) (some (fn (y) (equal? x y)) lst)))
|
||||
|
||||
(define
|
||||
feed/-distinct
|
||||
(fn
|
||||
(lst)
|
||||
(if
|
||||
(= (len lst) 0)
|
||||
(list)
|
||||
(get (apl-unique (make-array (list (len lst)) lst)) :ravel))))
|
||||
|
||||
; rank-2 matrix -> rank-1 stream of its ravel
|
||||
(define feed/-flatten (fn (arr) (feed/stream (get arr :ravel))))
|
||||
|
||||
; distinct receivers across the whole graph, sorted for determinism
|
||||
; (dict key order is unspecified, so sort to pin audience/recipient ordering)
|
||||
(define
|
||||
feed/audience
|
||||
(fn
|
||||
(graph)
|
||||
(sort
|
||||
(feed/-distinct
|
||||
(reduce
|
||||
(fn (acc k) (append acc (feed/followers graph k)))
|
||||
(list)
|
||||
(keys graph))))))
|
||||
|
||||
; --- the outer product ------------------------------------------------------
|
||||
|
||||
; one (activity, follower) inbox event, enclosed so apl-outer keeps the dict
|
||||
(define feed/-mk-event (fn (a f) (enclose {:activity (feed/-val a) :to (feed/-val f)})))
|
||||
|
||||
; keep events where :to actually follows the activity's actor
|
||||
(define
|
||||
feed/-edge?
|
||||
(fn
|
||||
(graph)
|
||||
(fn
|
||||
(ev)
|
||||
(feed/-elem?
|
||||
(get ev :to)
|
||||
(feed/followers graph (get (get ev :activity) :actor))))))
|
||||
|
||||
; fanout — activities ∘.× audience, flatten, guard-keep real edges
|
||||
(define
|
||||
feed/fanout
|
||||
(fn
|
||||
(stream graph)
|
||||
(let
|
||||
((matrix (apl-outer feed/-mk-event stream (feed/stream (feed/audience graph)))))
|
||||
(feed/filter (feed/-flatten matrix) (feed/-edge? graph)))))
|
||||
|
||||
; --- inbox queries ----------------------------------------------------------
|
||||
|
||||
(define
|
||||
feed/inbox-for
|
||||
(fn
|
||||
(inbox user)
|
||||
(feed/filter inbox (fn (ev) (equal? (get ev :to) user)))))
|
||||
|
||||
(define
|
||||
feed/recipients
|
||||
(fn
|
||||
(inbox)
|
||||
(feed/-distinct (map (fn (ev) (get ev :to)) (feed/items inbox)))))
|
||||
|
||||
; the activities (unwrapped) destined for a user
|
||||
(define
|
||||
feed/inbox-activities
|
||||
(fn
|
||||
(inbox user)
|
||||
(map
|
||||
(fn (ev) (get ev :activity))
|
||||
(feed/items (feed/inbox-for inbox user)))))
|
||||
60
lib/feed/fed.sx
Normal file
60
lib/feed/fed.sx
Normal file
@@ -0,0 +1,60 @@
|
||||
; feed/fed — federation. Outbound: a local post fans out, then splits into local
|
||||
; vs remote inboxes; remote events are handed to an injected send-fn. Inbound:
|
||||
; peer activities merge into the local stream, deduped. Backfill: pull peer
|
||||
; history via an injected fetch-fn and merge.
|
||||
;
|
||||
; remote? / send-fn / fetch-fn are injected so real fed-sx transport wires in here
|
||||
; without feed depending on it.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx,
|
||||
; lib/feed/dedupe.sx.
|
||||
|
||||
; --- merge / ingest ---------------------------------------------------------
|
||||
|
||||
(define
|
||||
feed/merge
|
||||
(fn (s1 s2) (feed/stream (append (feed/items s1) (feed/items s2)))))
|
||||
|
||||
; merge a peer stream into local, dropping (actor verb object) duplicates
|
||||
(define
|
||||
feed/ingest
|
||||
(fn (local peer) (feed/dedupe-activities (feed/merge local peer))))
|
||||
|
||||
; --- inbound ----------------------------------------------------------------
|
||||
|
||||
; peer pushes raw activities to the local inbox; normalize + ingest
|
||||
(define
|
||||
feed/inbound
|
||||
(fn
|
||||
(local raw-activities)
|
||||
(feed/ingest local (feed/stream (map feed/normalize raw-activities)))))
|
||||
|
||||
; backfill on subscribe: pull peer history via fetch-fn, normalize, ingest
|
||||
(define
|
||||
feed/backfill
|
||||
(fn (local fetch-fn peer-id) (feed/inbound local (fetch-fn peer-id))))
|
||||
|
||||
; --- outbound ---------------------------------------------------------------
|
||||
|
||||
; split an inbox into local vs remote deliveries by viewer-id predicate
|
||||
(define feed/partition-inbox (fn (inbox remote?) {:local (feed/filter inbox (fn (ev) (not (remote? (get ev :to))))) :remote (feed/filter inbox (fn (ev) (remote? (get ev :to))))}))
|
||||
|
||||
; fan a stream out over the graph, then partition by locality
|
||||
(define
|
||||
feed/federate
|
||||
(fn
|
||||
(stream graph remote?)
|
||||
(feed/partition-inbox (feed/fanout stream graph) remote?)))
|
||||
|
||||
; deliver: hand each remote event to send-fn, return the local inbox to enqueue
|
||||
(define
|
||||
feed/deliver
|
||||
(fn
|
||||
(stream graph remote? send-fn)
|
||||
(let
|
||||
((parts (feed/federate stream graph remote?)))
|
||||
(begin
|
||||
(for-each
|
||||
(fn (ev) (send-fn (get ev :to) (get ev :activity)))
|
||||
(feed/items (get parts :remote)))
|
||||
(get parts :local)))))
|
||||
23
lib/feed/home.sx
Normal file
23
lib/feed/home.sx
Normal file
@@ -0,0 +1,23 @@
|
||||
; feed/home — the capstone. A user's home timeline is the whole pipeline as one
|
||||
; line: fan all activities out over the follow graph, take the events landing in
|
||||
; the viewer's inbox, dedupe cross-posts, apply the viewer's ACL, rank, take N.
|
||||
;
|
||||
; Requires: fanout.sx, dedupe.sx, acl.sx (feed/timeline), rank.sx, stream.sx.
|
||||
|
||||
; the activities in a user's inbox, as a stream
|
||||
(define
|
||||
feed/inbox-stream
|
||||
(fn (inbox user) (feed/stream (feed/inbox-activities inbox user))))
|
||||
|
||||
; fanout ∘ inbox ∘ dedupe ∘ ACL ∘ rank ∘ take
|
||||
(define
|
||||
feed/home
|
||||
(fn
|
||||
(stream graph viewer permit? score-fn n)
|
||||
(feed/timeline
|
||||
(feed/dedupe-activities
|
||||
(feed/inbox-stream (feed/fanout stream graph) viewer))
|
||||
viewer
|
||||
permit?
|
||||
score-fn
|
||||
n)))
|
||||
44
lib/feed/mute.sx
Normal file
44
lib/feed/mute.sx
Normal file
@@ -0,0 +1,44 @@
|
||||
; feed/mute — viewer-controlled filtering. ACL (acl.sx) is author-controlled
|
||||
; visibility; mute is the reader's own preference: hide muted actors or tags.
|
||||
; Like ACL it is per-viewer and applied per request, never cached.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-elem?).
|
||||
|
||||
; drop activities authored by a muted actor
|
||||
(define
|
||||
feed/mute-actors
|
||||
(fn
|
||||
(stream actors)
|
||||
(feed/filter
|
||||
stream
|
||||
(fn (a) (not (feed/-elem? (get a :actor) actors))))))
|
||||
|
||||
; drop activities carrying any muted tag
|
||||
(define
|
||||
feed/mute-tags
|
||||
(fn
|
||||
(stream tags)
|
||||
(feed/filter
|
||||
stream
|
||||
(fn (a) (not (some (fn (t) (feed/-elem? t tags)) (get a :tags)))))))
|
||||
|
||||
; drop activities about a muted object (thread mute)
|
||||
(define
|
||||
feed/mute-objects
|
||||
(fn
|
||||
(stream objects)
|
||||
(feed/filter
|
||||
stream
|
||||
(fn (a) (not (feed/-elem? (get a :object) objects))))))
|
||||
|
||||
; apply a viewer preference bag: {:mute-actors (...) :mute-tags (...) :mute-objects (...)}
|
||||
(define
|
||||
feed/apply-prefs
|
||||
(fn
|
||||
(stream prefs)
|
||||
(feed/mute-objects
|
||||
(feed/mute-tags
|
||||
(feed/mute-actors stream (get prefs :mute-actors (list)))
|
||||
(get prefs :mute-tags (list)))
|
||||
(get prefs :mute-objects (list)))))
|
||||
31
lib/feed/normalize.sx
Normal file
31
lib/feed/normalize.sx
Normal file
@@ -0,0 +1,31 @@
|
||||
; feed/normalize — coerce arbitrary input into the canonical activity record.
|
||||
; An activity is a small dict {:actor :verb :object :at :tags}; a stream is an
|
||||
; APL vector of such dicts (see stream.sx). Extra keys on the raw input survive
|
||||
; (e.g. :visible-to for ACL, peer metadata for federation) — :tags is the
|
||||
; flexible bag but the record is not closed.
|
||||
|
||||
(define feed/activity-keys (list :actor :verb :object :at :tags))
|
||||
|
||||
(define
|
||||
feed/normalize
|
||||
(fn
|
||||
(raw)
|
||||
(let
|
||||
((d (if (= (type-of raw) "dict") raw {})))
|
||||
(merge d {:actor (get d :actor "") :object (get d :object nil) :at (get d :at 0) :tags (let ((t (get d :tags (list)))) (if (list? t) t (list t))) :verb (get d :verb "post")}))))
|
||||
|
||||
(define
|
||||
feed/activity
|
||||
(fn (actor verb object at tags) (feed/normalize {:actor actor :object object :at at :tags tags :verb verb})))
|
||||
|
||||
(define feed/actor (fn (a) (get a :actor)))
|
||||
(define feed/verb (fn (a) (get a :verb)))
|
||||
(define feed/object (fn (a) (get a :object)))
|
||||
(define feed/at (fn (a) (get a :at)))
|
||||
(define feed/tags (fn (a) (get a :tags)))
|
||||
|
||||
(define
|
||||
feed/activity?
|
||||
(fn
|
||||
(a)
|
||||
(and (= (type-of a) "dict") (has-key? a :actor) (has-key? a :verb))))
|
||||
45
lib/feed/notify.sx
Normal file
45
lib/feed/notify.sx
Normal file
@@ -0,0 +1,45 @@
|
||||
; feed/notify — a notification feed is a thin layer over a recipient's inbox:
|
||||
; the events directed at a user, optionally verb-filtered, and a digest that
|
||||
; collapses "alice, bob and 1 other liked X" by (verb, object).
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/inbox-for, feed/-elem?).
|
||||
|
||||
; all inbox events for a user (their raw notifications)
|
||||
(define feed/notifications (fn (inbox user) (feed/inbox-for inbox user)))
|
||||
|
||||
; restrict to notification-worthy verbs (e.g. (list "like" "reply" "follow"))
|
||||
(define
|
||||
feed/notify-verbs
|
||||
(fn
|
||||
(inbox user verbs)
|
||||
(feed/filter
|
||||
(feed/inbox-for inbox user)
|
||||
(fn (ev) (feed/-elem? (get (get ev :activity) :verb) verbs)))))
|
||||
|
||||
; group key "verb|object" — deterministic, sortable
|
||||
(define
|
||||
feed/-notify-key
|
||||
(fn
|
||||
(ev)
|
||||
(let
|
||||
((a (get ev :activity)))
|
||||
(string-append (get a :verb) "|" (get a :object)))))
|
||||
|
||||
; digest: one entry per (verb, object) with the distinct actors and a count,
|
||||
; ordered by key for determinism.
|
||||
(define
|
||||
feed/notify-digest
|
||||
(fn
|
||||
(inbox user)
|
||||
(let
|
||||
((events (feed/items (feed/inbox-for inbox user))))
|
||||
(let
|
||||
((groups (reduce (fn (g ev) (let ((a (get ev :activity)) (k (feed/-notify-key ev))) (let ((cur (get g k {:object (get a :object) :actors (list) :verb (get a :verb)}))) (assoc g k (assoc cur :actors (append (get cur :actors) (list (get a :actor)))))))) {} events)))
|
||||
(map
|
||||
(fn
|
||||
(k)
|
||||
(let
|
||||
((grp (get groups k)))
|
||||
(assoc grp :count (len (get grp :actors)))))
|
||||
(sort (keys groups)))))))
|
||||
50
lib/feed/page.sx
Normal file
50
lib/feed/page.sx
Normal file
@@ -0,0 +1,50 @@
|
||||
; feed/page — pagination. Offset/limit for indexed access, and cursor-based
|
||||
; (by :at) for recency feeds, which is stable under inserts: a cursor is the
|
||||
; :at of the last item seen, and the next page is the newest items older than it.
|
||||
;
|
||||
; Requires: lib/feed/stream.sx (feed/recent, feed/take, feed/filter).
|
||||
|
||||
; --- offset / limit ---------------------------------------------------------
|
||||
|
||||
(define
|
||||
feed/page
|
||||
(fn
|
||||
(stream offset limit)
|
||||
(feed/stream (take (drop (feed/items stream) offset) limit))))
|
||||
|
||||
(define
|
||||
feed/page-count
|
||||
(fn (stream limit) (ceil (/ (feed/count stream) limit))))
|
||||
|
||||
; --- cursor (recency feeds) -------------------------------------------------
|
||||
|
||||
; activities strictly older than cursor (scroll down / load older)
|
||||
(define
|
||||
feed/before
|
||||
(fn
|
||||
(stream cursor)
|
||||
(feed/filter stream (fn (a) (< (get a :at) cursor)))))
|
||||
|
||||
; activities strictly newer than cursor (load newer / "N new posts")
|
||||
(define
|
||||
feed/after
|
||||
(fn
|
||||
(stream cursor)
|
||||
(feed/filter stream (fn (a) (> (get a :at) cursor)))))
|
||||
|
||||
; one page: the `limit` newest activities older than cursor, newest first
|
||||
(define
|
||||
feed/page-before
|
||||
(fn
|
||||
(stream cursor limit)
|
||||
(feed/take (feed/recent (feed/before stream cursor)) limit)))
|
||||
|
||||
; cursor to fetch the next (older) page: :at of the last item of a page,
|
||||
; or nil when the page is empty (end of feed)
|
||||
(define
|
||||
feed/next-cursor
|
||||
(fn
|
||||
(page)
|
||||
(let
|
||||
((items (feed/items page)))
|
||||
(if (= (len items) 0) nil (get (last items) :at)))))
|
||||
92
lib/feed/rank.sx
Normal file
92
lib/feed/rank.sx
Normal file
@@ -0,0 +1,92 @@
|
||||
; feed/rank — scoring + ranking. Scorers are (activity -> number). Ranking is a
|
||||
; stable two-pass grade-down: first by :at descending (the tiebreak), then by
|
||||
; score descending — so ties resolve by recency, then by input order. Fully
|
||||
; deterministic on ties.
|
||||
;
|
||||
; Requires: lib/apl/runtime.sx, lib/feed/normalize.sx, lib/feed/stream.sx.
|
||||
|
||||
; --- scorers ----------------------------------------------------------------
|
||||
|
||||
; recency: half-life decay. score = 0.5 ^ (age / half-life). at==now -> 1.0.
|
||||
(define
|
||||
feed/recency
|
||||
(fn
|
||||
(now half-life)
|
||||
(fn (a) (expt 0.5 (/ (- now (get a :at)) half-life)))))
|
||||
|
||||
; velocity: how many of this actor's activities fall in (at-window, at] —
|
||||
; a burst of recent activity scores higher.
|
||||
(define
|
||||
feed/velocity
|
||||
(fn
|
||||
(stream window)
|
||||
(fn
|
||||
(a)
|
||||
(len
|
||||
(filter
|
||||
(fn
|
||||
(b)
|
||||
(and
|
||||
(equal? (get b :actor) (get a :actor))
|
||||
(<= (get b :at) (get a :at))
|
||||
(> (get b :at) (- (get a :at) window))))
|
||||
(feed/items stream))))))
|
||||
|
||||
; engagement: how many activities in the stream touch this activity's :object
|
||||
(define
|
||||
feed/engagement
|
||||
(fn
|
||||
(stream)
|
||||
(fn
|
||||
(a)
|
||||
(len
|
||||
(filter
|
||||
(fn (b) (equal? (get b :object) (get a :object)))
|
||||
(feed/items stream))))))
|
||||
|
||||
; composite: weighted sum. parts = (list (list weight scorer) ...)
|
||||
(define
|
||||
feed/composite
|
||||
(fn
|
||||
(parts)
|
||||
(fn
|
||||
(a)
|
||||
(reduce
|
||||
(fn (acc p) (+ acc (* (first p) ((nth p 1) a))))
|
||||
0
|
||||
parts))))
|
||||
|
||||
; --- ranking ----------------------------------------------------------------
|
||||
|
||||
; stable reorder of items by key-fn, descending (grade-down is stable)
|
||||
(define
|
||||
feed/-desc-by
|
||||
(fn
|
||||
(items key-fn)
|
||||
(let
|
||||
((keys (make-array (list (len items)) (map key-fn items))))
|
||||
(let
|
||||
((order (get (apl-grade-down keys) :ravel)))
|
||||
(map (fn (i) (nth items (- i 1))) order)))))
|
||||
|
||||
; rank by score descending; ties -> :at descending -> input order
|
||||
(define
|
||||
feed/rank
|
||||
(fn
|
||||
(stream score-fn)
|
||||
(let
|
||||
((by-at (feed/-desc-by (feed/items stream) feed/at)))
|
||||
(feed/stream (feed/-desc-by by-at score-fn)))))
|
||||
|
||||
; attach a :score to each activity (for inspection / debugging)
|
||||
(define
|
||||
feed/with-scores
|
||||
(fn
|
||||
(stream score-fn)
|
||||
(feed/stream
|
||||
(map (fn (a) (assoc a :score (score-fn a))) (feed/items stream)))))
|
||||
|
||||
; top-N ranked timeline
|
||||
(define
|
||||
feed/top
|
||||
(fn (stream score-fn n) (feed/take (feed/rank stream score-fn) n)))
|
||||
19
lib/feed/scoreboard.json
Normal file
19
lib/feed/scoreboard.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"suites": {
|
||||
"basic": {"pass": 30, "fail": 0},
|
||||
"fanout": {"pass": 29, "fail": 0},
|
||||
"rank": {"pass": 24, "fail": 0},
|
||||
"integration": {"pass": 22, "fail": 0},
|
||||
"content": {"pass": 15, "fail": 0},
|
||||
"notify": {"pass": 8, "fail": 0},
|
||||
"home": {"pass": 6, "fail": 0},
|
||||
"dedupe": {"pass": 9, "fail": 0},
|
||||
"trending": {"pass": 11, "fail": 0},
|
||||
"mute": {"pass": 9, "fail": 0},
|
||||
"page": {"pass": 14, "fail": 0},
|
||||
"thread": {"pass": 12, "fail": 0}
|
||||
},
|
||||
"total_pass": 189,
|
||||
"total_fail": 0,
|
||||
"total": 189
|
||||
}
|
||||
19
lib/feed/scoreboard.md
Normal file
19
lib/feed/scoreboard.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# feed Conformance Scoreboard
|
||||
|
||||
_Generated by `lib/feed/conformance.sh`_
|
||||
|
||||
| Suite | Pass | Fail | Total |
|
||||
|-------|-----:|-----:|------:|
|
||||
| basic | 30 | 0 | 30 |
|
||||
| fanout | 29 | 0 | 29 |
|
||||
| rank | 24 | 0 | 24 |
|
||||
| integration | 22 | 0 | 22 |
|
||||
| content | 15 | 0 | 15 |
|
||||
| notify | 8 | 0 | 8 |
|
||||
| home | 6 | 0 | 6 |
|
||||
| dedupe | 9 | 0 | 9 |
|
||||
| trending | 11 | 0 | 11 |
|
||||
| mute | 9 | 0 | 9 |
|
||||
| page | 14 | 0 | 14 |
|
||||
| thread | 12 | 0 | 12 |
|
||||
| **Total** | **189** | **0** | **189** |
|
||||
75
lib/feed/stream.sx
Normal file
75
lib/feed/stream.sx
Normal file
@@ -0,0 +1,75 @@
|
||||
; feed/stream — a stream is an APL vector (rank-1 array) whose ravel holds
|
||||
; activity dicts. Operations lift APL primitives onto this shape: filter via
|
||||
; compress (/), sort via grade (⍋), take via ↑, reverse via ⌽.
|
||||
;
|
||||
; Requires: lib/apl/runtime.sx, lib/feed/normalize.sx (loaded by harness).
|
||||
|
||||
(define feed/stream (fn (acts) (make-array (list (len acts)) acts)))
|
||||
|
||||
(define feed/items (fn (s) (get s :ravel)))
|
||||
|
||||
(define feed/count (fn (s) (len (get s :ravel))))
|
||||
|
||||
(define feed/empty (feed/stream (list)))
|
||||
|
||||
(define feed/empty? (fn (s) (= (feed/count s) 0)))
|
||||
|
||||
; filter — bool mask ∘ compress. pred : activity -> truthy
|
||||
(define
|
||||
feed/filter
|
||||
(fn
|
||||
(s pred)
|
||||
(let
|
||||
((items (get s :ravel)))
|
||||
(let
|
||||
((mask (make-array (list (len items)) (map (fn (a) (if (pred a) 1 0)) items))))
|
||||
(apl-compress mask s)))))
|
||||
|
||||
; sort-by — ascending, stable on ties (grade-up is stable). key-fn : activity -> number
|
||||
(define
|
||||
feed/sort-by
|
||||
(fn
|
||||
(s key-fn)
|
||||
(let
|
||||
((items (get s :ravel)))
|
||||
(let
|
||||
((keys (make-array (list (len items)) (map key-fn items))))
|
||||
(let
|
||||
((order (get (apl-grade-up keys) :ravel)))
|
||||
(feed/stream (map (fn (i) (nth items (- i 1))) order)))))))
|
||||
|
||||
(define feed/sort-by-at (fn (s) (feed/sort-by s feed/at)))
|
||||
|
||||
; newest-first: ascending sort then reverse (⌽)
|
||||
(define feed/recent (fn (s) (apl-reverse (feed/sort-by-at s))))
|
||||
|
||||
; take N (↑), clamped to stream length so it never over-takes/pads
|
||||
(define
|
||||
feed/take
|
||||
(fn
|
||||
(s n)
|
||||
(let
|
||||
((c (feed/count s)))
|
||||
(if (>= n c) s (apl-take (apl-scalar n) s)))))
|
||||
|
||||
(define feed/reverse (fn (s) (apl-reverse s)))
|
||||
|
||||
; common predicates
|
||||
(define
|
||||
feed/by-actor
|
||||
(fn (s actor) (feed/filter s (fn (a) (equal? (get a :actor) actor)))))
|
||||
|
||||
(define
|
||||
feed/by-verb
|
||||
(fn (s verb) (feed/filter s (fn (a) (equal? (get a :verb) verb)))))
|
||||
|
||||
(define
|
||||
feed/by-object
|
||||
(fn
|
||||
(s object)
|
||||
(feed/filter s (fn (a) (equal? (get a :object) object)))))
|
||||
|
||||
; activities at or after timestamp t
|
||||
(define
|
||||
feed/since
|
||||
(fn (s t) (feed/filter s (fn (a) (>= (get a :at) t)))))
|
||||
118
lib/feed/tests/basic.sx
Normal file
118
lib/feed/tests/basic.sx
Normal file
@@ -0,0 +1,118 @@
|
||||
; Phase 1 — normalize, stream ops, api. Uses the feed-test harness
|
||||
; (feed-test name got expected) provided by conformance.sh.
|
||||
|
||||
; ---------- normalize ----------
|
||||
|
||||
(feed-test
|
||||
"normalize default actor"
|
||||
(feed/actor (feed/normalize {}))
|
||||
"")
|
||||
(feed-test
|
||||
"normalize default verb"
|
||||
(feed/verb (feed/normalize {}))
|
||||
"post")
|
||||
(feed-test
|
||||
"normalize default at"
|
||||
(feed/at (feed/normalize {}))
|
||||
0)
|
||||
(feed-test
|
||||
"normalize default object"
|
||||
(feed/object (feed/normalize {}))
|
||||
nil)
|
||||
(feed-test
|
||||
"normalize default tags"
|
||||
(feed/tags (feed/normalize {}))
|
||||
(list))
|
||||
(feed-test
|
||||
"normalize keeps actor"
|
||||
(feed/actor (feed/normalize {:actor "alice"}))
|
||||
"alice")
|
||||
(feed-test
|
||||
"normalize keeps verb"
|
||||
(feed/verb (feed/normalize {:verb "like"}))
|
||||
"like")
|
||||
(feed-test
|
||||
"normalize scalar tag -> list"
|
||||
(feed/tags (feed/normalize {:tags "x"}))
|
||||
(list "x"))
|
||||
(feed-test
|
||||
"normalize list tags kept"
|
||||
(feed/tags (feed/normalize {:tags (list "a" "b")}))
|
||||
(list "a" "b"))
|
||||
(feed-test
|
||||
"activity constructor at"
|
||||
(feed/at (feed/activity "a" "post" "o" 5 (list)))
|
||||
5)
|
||||
(feed-test
|
||||
"activity? on activity"
|
||||
(feed/activity? (feed/normalize {:actor "a"}))
|
||||
true)
|
||||
(feed-test "activity? on number" (feed/activity? 5) false)
|
||||
(feed-test "activity? on bare dict" (feed/activity? {:foo 1}) false)
|
||||
|
||||
; ---------- stream ----------
|
||||
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 30 (list))
|
||||
(feed/activity "bob" "like" "p1" 10 (list))
|
||||
(feed/activity "alice" "post" "p2" 20 (list)))))
|
||||
|
||||
(feed-test "stream count" (feed/count S) 3)
|
||||
(feed-test "stream items len" (len (feed/items S)) 3)
|
||||
(feed-test
|
||||
"sort-by-at actors asc"
|
||||
(map feed/actor (feed/items (feed/sort-by-at S)))
|
||||
(list "bob" "alice" "alice"))
|
||||
(feed-test
|
||||
"recent newest first"
|
||||
(map feed/at (feed/items (feed/recent S)))
|
||||
(list 30 20 10))
|
||||
(feed-test
|
||||
"take 2 of recent"
|
||||
(feed/count (feed/take (feed/recent S) 2))
|
||||
2)
|
||||
(feed-test
|
||||
"take clamps past end"
|
||||
(feed/count (feed/take S 10))
|
||||
3)
|
||||
(feed-test
|
||||
"by-actor alice count"
|
||||
(feed/count (feed/by-actor S "alice"))
|
||||
2)
|
||||
(feed-test
|
||||
"by-verb like actor"
|
||||
(map feed/actor (feed/items (feed/by-verb S "like")))
|
||||
(list "bob"))
|
||||
(feed-test
|
||||
"by-object p1 count"
|
||||
(feed/count (feed/by-object S "p1"))
|
||||
2)
|
||||
(feed-test
|
||||
"since 20 count"
|
||||
(feed/count (feed/since S 20))
|
||||
2)
|
||||
(feed-test
|
||||
"reverse ats"
|
||||
(map feed/at (feed/items (feed/reverse S)))
|
||||
(list 20 10 30))
|
||||
(feed-test "empty? on empty" (feed/empty? feed/empty) true)
|
||||
(feed-test
|
||||
"empty? on filtered-out"
|
||||
(feed/empty? (feed/by-actor S "zzz"))
|
||||
true)
|
||||
|
||||
; ---------- api ----------
|
||||
|
||||
(feed/reset!)
|
||||
(feed/post {:actor "x" :at 1 :verb "post"})
|
||||
(feed/post {:actor "y" :at 2 :verb "like"})
|
||||
(feed-test "api size after posts" (feed/size) 2)
|
||||
(feed-test "api all count" (feed/count (feed/all)) 2)
|
||||
(feed-test
|
||||
"post returns normalized verb"
|
||||
(feed/verb (feed/post {:actor "z"}))
|
||||
"post")
|
||||
(feed-test "api size after third post" (feed/size) 3)
|
||||
85
lib/feed/tests/content.sx
Normal file
85
lib/feed/tests/content.sx
Normal file
@@ -0,0 +1,85 @@
|
||||
; Follow-up — TF-IDF content ranking over :tags. (feed-test name got expected)
|
||||
|
||||
(define
|
||||
corpus
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "u" :object "o1" :at 10 :tags (list "cats" "funny")})
|
||||
(feed/normalize {:actor "u" :object "o2" :at 20 :tags (list "cats" "news")})
|
||||
(feed/normalize {:actor "u" :object "o3" :at 30 :tags (list "politics" "news")})
|
||||
(feed/normalize {:actor "u" :object "o4" :at 40 :tags (list "cats")}))))
|
||||
|
||||
; ---------- document frequency ----------
|
||||
|
||||
(feed-test "df cats" (get (feed/tag-df corpus) "cats") 3)
|
||||
(feed-test "df news" (get (feed/tag-df corpus) "news") 2)
|
||||
(feed-test "df funny" (get (feed/tag-df corpus) "funny") 1)
|
||||
(feed-test "df politics" (get (feed/tag-df corpus) "politics") 1)
|
||||
(feed-test "df full" (feed/tag-df corpus) {:news 2 :funny 1 :politics 1 :cats 3})
|
||||
|
||||
; ---------- inverse document frequency ----------
|
||||
|
||||
(feed-test
|
||||
"idf news = log(4/2)"
|
||||
(get (feed/tag-idf corpus) "news")
|
||||
(log 2))
|
||||
(feed-test
|
||||
"idf funny = log(4/1)"
|
||||
(get (feed/tag-idf corpus) "funny")
|
||||
(log 4))
|
||||
(feed-test
|
||||
"rarer tag has higher idf"
|
||||
(>
|
||||
(get (feed/tag-idf corpus) "funny")
|
||||
(get (feed/tag-idf corpus) "cats"))
|
||||
true)
|
||||
|
||||
; ---------- tf-idf scoring ----------
|
||||
|
||||
(define idf (feed/tag-idf corpus))
|
||||
|
||||
(feed-test
|
||||
"score query funny on o1"
|
||||
((feed/tfidf-score idf (list "funny")) (feed/normalize {:actor "u" :object "x" :tags (list "cats" "funny")}))
|
||||
(log 4))
|
||||
(feed-test
|
||||
"score query funny on non-match"
|
||||
((feed/tfidf-score idf (list "funny")) (feed/normalize {:actor "u" :object "x" :tags (list "cats")}))
|
||||
0)
|
||||
(feed-test
|
||||
"unknown query tag scores 0"
|
||||
((feed/tfidf-score idf (list "zzz")) (feed/normalize {:actor "u" :object "x" :tags (list "cats")}))
|
||||
0)
|
||||
|
||||
; ---------- ranking by relevance ----------
|
||||
|
||||
; query news: o2,o3 match (score log2), o1,o4 don't (0); ties break by :at desc
|
||||
(feed-test
|
||||
"by-relevance news order"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/by-relevance corpus (list "news"))))
|
||||
(list "o3" "o2" "o4" "o1"))
|
||||
|
||||
; query funny: only o1 matches -> ranks first
|
||||
(feed-test
|
||||
"by-relevance funny first"
|
||||
(get
|
||||
(nth (feed/items (feed/by-relevance corpus (list "funny"))) 0)
|
||||
:object)
|
||||
"o1")
|
||||
|
||||
; query (cats news): o2 carries both tags -> highest combined tf-idf
|
||||
(feed-test
|
||||
"by-relevance cats+news top"
|
||||
(get
|
||||
(nth
|
||||
(feed/items (feed/by-relevance corpus (list "cats" "news")))
|
||||
0)
|
||||
:object)
|
||||
"o2")
|
||||
|
||||
(feed-test
|
||||
"by-relevance preserves count"
|
||||
(feed/count (feed/by-relevance corpus (list "cats")))
|
||||
4)
|
||||
56
lib/feed/tests/dedupe.sx
Normal file
56
lib/feed/tests/dedupe.sx
Normal file
@@ -0,0 +1,56 @@
|
||||
; Follow-up — verb-aware (smart) dedupe. (feed-test name got expected)
|
||||
|
||||
; reactions (like/follow) collapse cross-actor; posts stay distinct per actor
|
||||
(define
|
||||
M
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "like" "X" 1 (list))
|
||||
(feed/activity "bob" "like" "X" 2 (list))
|
||||
(feed/activity "alice" "post" "P" 3 (list))
|
||||
(feed/activity "bob" "post" "P" 4 (list))
|
||||
(feed/activity "alice" "follow" "C" 5 (list))
|
||||
(feed/activity "bob" "follow" "C" 6 (list))))) ; collapses
|
||||
|
||||
(feed-test
|
||||
"smart dedupe total"
|
||||
(feed/count (feed/dedupe-smart M))
|
||||
4)
|
||||
(feed-test
|
||||
"smart keeps both posts"
|
||||
(feed/count (feed/by-verb (feed/dedupe-smart M) "post"))
|
||||
2)
|
||||
(feed-test
|
||||
"smart collapses likes to one"
|
||||
(feed/count (feed/by-verb (feed/dedupe-smart M) "like"))
|
||||
1)
|
||||
(feed-test
|
||||
"smart collapses follows to one"
|
||||
(feed/count (feed/by-verb (feed/dedupe-smart M) "follow"))
|
||||
1)
|
||||
(feed-test
|
||||
"collapsed like keeps first actor"
|
||||
(map feed/actor (feed/items (feed/by-verb (feed/dedupe-smart M) "like")))
|
||||
(list "alice"))
|
||||
|
||||
; contrast: plain activity dedupe keeps cross-actor likes distinct
|
||||
(feed-test
|
||||
"activity dedupe keeps both likes"
|
||||
(feed/count (feed/by-verb (feed/dedupe-activities M) "like"))
|
||||
2)
|
||||
|
||||
; contrast: blanket collapse folds the two posts (same verb+object) too
|
||||
(feed-test
|
||||
"collapse dedupe folds posts"
|
||||
(feed/count (feed/by-verb (feed/dedupe-collapse M) "post"))
|
||||
1)
|
||||
|
||||
; smart-key dispatch
|
||||
(feed-test
|
||||
"smart-key reaction -> (verb object)"
|
||||
(feed/smart-key (feed/activity "alice" "like" "X" 0 (list)))
|
||||
(list "like" "X"))
|
||||
(feed-test
|
||||
"smart-key post -> (actor verb object)"
|
||||
(feed/smart-key (feed/activity "alice" "post" "P" 0 (list)))
|
||||
(list "alice" "post" "P"))
|
||||
187
lib/feed/tests/fanout.sx
Normal file
187
lib/feed/tests/fanout.sx
Normal file
@@ -0,0 +1,187 @@
|
||||
; Phase 2 — fanout via outer product + dedupe. (feed-test name got expected)
|
||||
|
||||
; ---------- graph ----------
|
||||
|
||||
; edges: (follower followee). bob,carol follow alice; carol,dave follow bob.
|
||||
(define
|
||||
G
|
||||
(feed/follow-graph
|
||||
(list
|
||||
(list "bob" "alice")
|
||||
(list "carol" "alice")
|
||||
(list "carol" "bob")
|
||||
(list "dave" "bob"))))
|
||||
|
||||
(feed-test "followers alice" (feed/followers G "alice") (list "bob" "carol"))
|
||||
(feed-test "followers bob" (feed/followers G "bob") (list "carol" "dave"))
|
||||
(feed-test "followers unknown" (feed/followers G "zzz") (list))
|
||||
(feed-test "audience distinct" (feed/audience G) (list "bob" "carol" "dave"))
|
||||
|
||||
; ---------- fanout ----------
|
||||
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 10 (list))
|
||||
(feed/activity "alice" "post" "p2" 20 (list))
|
||||
(feed/activity "bob" "like" "p1" 30 (list)))))
|
||||
|
||||
(define IB (feed/fanout S G))
|
||||
|
||||
(feed-test "fanout total edges" (feed/count IB) 6)
|
||||
(feed-test
|
||||
"inbox bob count"
|
||||
(feed/count (feed/inbox-for IB "bob"))
|
||||
2)
|
||||
(feed-test
|
||||
"inbox carol count"
|
||||
(feed/count (feed/inbox-for IB "carol"))
|
||||
3)
|
||||
(feed-test
|
||||
"inbox dave count"
|
||||
(feed/count (feed/inbox-for IB "dave"))
|
||||
1)
|
||||
(feed-test
|
||||
"inbox alice (follows none)"
|
||||
(feed/count (feed/inbox-for IB "alice"))
|
||||
0)
|
||||
(feed-test
|
||||
"recipients order"
|
||||
(feed/recipients IB)
|
||||
(list "bob" "carol" "dave"))
|
||||
(feed-test
|
||||
"bob inbox objects"
|
||||
(map (fn (a) (get a :object)) (feed/inbox-activities IB "bob"))
|
||||
(list "p1" "p2"))
|
||||
(feed-test
|
||||
"dave inbox objects"
|
||||
(map (fn (a) (get a :object)) (feed/inbox-activities IB "dave"))
|
||||
(list "p1"))
|
||||
(feed-test
|
||||
"dave inbox verb"
|
||||
(map (fn (a) (get a :verb)) (feed/inbox-activities IB "dave"))
|
||||
(list "like"))
|
||||
|
||||
; empty graph → no audience → no edges
|
||||
(feed-test
|
||||
"empty graph fanout"
|
||||
(feed/count (feed/fanout S {}))
|
||||
0)
|
||||
|
||||
; actor nobody follows produces no edges
|
||||
(define
|
||||
Sghost
|
||||
(feed/stream (list (feed/activity "ghost" "post" "g1" 5 (list)))))
|
||||
(feed-test
|
||||
"unfollowed actor fanout"
|
||||
(feed/count (feed/fanout Sghost G))
|
||||
0)
|
||||
|
||||
; ---------- high fanout (popular actor) ----------
|
||||
|
||||
(define
|
||||
Gstar
|
||||
(feed/follow-graph
|
||||
(list
|
||||
(list "u1" "star")
|
||||
(list "u2" "star")
|
||||
(list "u3" "star")
|
||||
(list "u4" "star")
|
||||
(list "u5" "star"))))
|
||||
(define
|
||||
Sstar
|
||||
(feed/stream (list (feed/activity "star" "post" "s1" 1 (list)))))
|
||||
(feed-test
|
||||
"star fanout count"
|
||||
(feed/count (feed/fanout Sstar Gstar))
|
||||
5)
|
||||
(feed-test "star audience size" (len (feed/audience Gstar)) 5)
|
||||
|
||||
; ---------- mutual follow ----------
|
||||
|
||||
(define Gmut (feed/follow-graph (list (list "a" "b") (list "b" "a"))))
|
||||
(define
|
||||
Smut
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "a" "post" "pa" 1 (list))
|
||||
(feed/activity "b" "post" "pb" 2 (list)))))
|
||||
(define IBmut (feed/fanout Smut Gmut))
|
||||
(feed-test "mutual total" (feed/count IBmut) 2)
|
||||
(feed-test
|
||||
"mutual a gets pb"
|
||||
(map (fn (x) (get x :object)) (feed/inbox-activities IBmut "a"))
|
||||
(list "pb"))
|
||||
(feed-test
|
||||
"mutual b gets pa"
|
||||
(map (fn (x) (get x :object)) (feed/inbox-activities IBmut "b"))
|
||||
(list "pa"))
|
||||
|
||||
; ---------- dedupe ----------
|
||||
|
||||
(define
|
||||
Sdup2
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 1 (list))
|
||||
(feed/activity "alice" "post" "p1" 9 (list))
|
||||
(feed/activity "alice" "post" "p2" 2 (list)))))
|
||||
(feed-test
|
||||
"dedupe-activities collapses dup"
|
||||
(feed/count (feed/dedupe-activities Sdup2))
|
||||
2)
|
||||
(feed-test
|
||||
"dedupe-activities keeps distinct"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/dedupe-activities Sdup2)))
|
||||
(list "p1" "p2"))
|
||||
|
||||
(define
|
||||
Slikes
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "like" "X" 1 (list))
|
||||
(feed/activity "bob" "like" "X" 2 (list))
|
||||
(feed/activity "carol" "like" "Y" 3 (list)))))
|
||||
(feed-test
|
||||
"collapse cross-actor likes"
|
||||
(feed/count (feed/dedupe-collapse Slikes))
|
||||
2)
|
||||
(feed-test
|
||||
"collapse keeps distinct objects"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/dedupe-collapse Slikes)))
|
||||
(list "X" "Y"))
|
||||
|
||||
(feed-test
|
||||
"activity-key shape"
|
||||
(feed/activity-key (feed/activity "a" "post" "o" 0 (list)))
|
||||
(list "a" "post" "o"))
|
||||
(feed-test
|
||||
"collapse-key shape"
|
||||
(feed/collapse-key (feed/activity "a" "like" "o" 0 (list)))
|
||||
(list "like" "o"))
|
||||
|
||||
; cross-post: alice posts p1 twice → bob's inbox has it twice → dedupe-inbox → once
|
||||
(define
|
||||
Scross
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 1 (list))
|
||||
(feed/activity "alice" "post" "p1" 5 (list)))))
|
||||
(define IBcross (feed/fanout Scross G))
|
||||
(feed-test
|
||||
"cross-post raw bob count"
|
||||
(feed/count (feed/inbox-for IBcross "bob"))
|
||||
2)
|
||||
(feed-test
|
||||
"cross-post deduped bob count"
|
||||
(feed/count (feed/inbox-for (feed/dedupe-inbox IBcross) "bob"))
|
||||
1)
|
||||
(feed-test
|
||||
"dedupe-inbox keeps distinct receivers"
|
||||
(feed/count (feed/dedupe-inbox IBcross))
|
||||
2)
|
||||
73
lib/feed/tests/home.sx
Normal file
73
lib/feed/tests/home.sx
Normal file
@@ -0,0 +1,73 @@
|
||||
; Follow-up — feed/home capstone pipeline. (feed-test name got expected)
|
||||
|
||||
; alice follows star and bob (edges: follower followee)
|
||||
(define
|
||||
G
|
||||
(feed/follow-graph (list (list "alice" "star") (list "alice" "bob"))))
|
||||
|
||||
; star posts s1 then s2; bob posts b1; star re-posts s1 (cross-post dup);
|
||||
; zoe posts z1 (alice does NOT follow zoe)
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "star" "post" "s1" 10 (list))
|
||||
(feed/activity "star" "post" "s2" 20 (list))
|
||||
(feed/activity "bob" "post" "b1" 15 (list))
|
||||
(feed/activity "star" "post" "s1" 5 (list))
|
||||
(feed/activity "zoe" "post" "z1" 30 (list)))))
|
||||
|
||||
(define rec (feed/recency 100 10))
|
||||
|
||||
(feed-test
|
||||
"home count (deduped, followed only)"
|
||||
(feed/count (feed/home S G "alice" feed/permit-public? rec 10))
|
||||
3)
|
||||
|
||||
(feed-test
|
||||
"home order by recency"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/home S G "alice" feed/permit-public? rec 10)))
|
||||
(list "s2" "b1" "s1"))
|
||||
|
||||
(feed-test
|
||||
"home excludes unfollowed zoe"
|
||||
(feed/-elem?
|
||||
"z1"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/home S G "alice" feed/permit-public? rec 10))))
|
||||
false)
|
||||
|
||||
(feed-test
|
||||
"home top-2"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/home S G "alice" feed/permit-public? rec 2)))
|
||||
(list "s2" "b1"))
|
||||
|
||||
(feed-test
|
||||
"home dedupes cross-post (one s1)"
|
||||
(len
|
||||
(filter
|
||||
(fn (o) (equal? o "s1"))
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items
|
||||
(feed/home S G "alice" feed/permit-public? rec 10)))))
|
||||
1)
|
||||
|
||||
; ACL applied per-viewer in the home pipeline
|
||||
(define
|
||||
Sacl
|
||||
(feed/stream
|
||||
(list (feed/normalize {:actor "star" :object "pub" :at 20}) (feed/normalize {:actor "star" :object "sec" :visible-to (list "carol") :at 25}))))
|
||||
(define Gacl (feed/follow-graph (list (list "alice" "star"))))
|
||||
|
||||
(feed-test
|
||||
"home hides activity alice not permitted"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/home Sacl Gacl "alice" feed/permit-acl? rec 10)))
|
||||
(list "pub"))
|
||||
155
lib/feed/tests/integration.sx
Normal file
155
lib/feed/tests/integration.sx
Normal file
@@ -0,0 +1,155 @@
|
||||
; Phase 4 — visibility (ACL) + federation, and the end-to-end timeline.
|
||||
; (feed-test name got expected)
|
||||
|
||||
; ---------- ACL visibility ----------
|
||||
; pub: public. sec: bob, allows carol. dm: frank, allows dave.
|
||||
|
||||
(define
|
||||
C
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "alice" :object "pub" :at 10})
|
||||
(feed/normalize {:actor "bob" :object "sec" :visible-to (list "carol") :at 20})
|
||||
(feed/normalize {:actor "frank" :object "dm" :visible-to (list "dave") :at 30}))))
|
||||
|
||||
(feed-test
|
||||
"public visible to anyone"
|
||||
(feed/count (feed/visible C "zoe" feed/permit-acl?))
|
||||
1)
|
||||
(feed-test
|
||||
"carol sees allowlisted + public"
|
||||
(feed/count (feed/visible C "carol" feed/permit-acl?))
|
||||
2)
|
||||
(feed-test
|
||||
"dave sees dm + public"
|
||||
(feed/count (feed/visible C "dave" feed/permit-acl?))
|
||||
2)
|
||||
(feed-test
|
||||
"author always sees own private"
|
||||
(feed/count (feed/visible C "frank" feed/permit-acl?))
|
||||
2)
|
||||
(feed-test
|
||||
"permit-public? lets all through"
|
||||
(feed/count (feed/visible C "zoe" feed/permit-public?))
|
||||
3)
|
||||
(feed-test
|
||||
"visible objects for dave"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/visible C "dave" feed/permit-acl?)))
|
||||
(list "pub" "dm"))
|
||||
|
||||
; per-viewer: same stream, different timelines
|
||||
(feed-test
|
||||
"zoe timeline differs from carol"
|
||||
(not
|
||||
(=
|
||||
(feed/count (feed/visible C "zoe" feed/permit-acl?))
|
||||
(feed/count (feed/visible C "carol" feed/permit-acl?))))
|
||||
true)
|
||||
|
||||
; ---------- federation: merge / ingest ----------
|
||||
|
||||
(define
|
||||
L
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 10 (list))
|
||||
(feed/activity "alice" "post" "p2" 20 (list)))))
|
||||
(define
|
||||
P
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p2" 20 (list))
|
||||
(feed/activity "peer" "post" "p9" 25 (list)))))
|
||||
|
||||
(feed-test "merge concatenates" (feed/count (feed/merge L P)) 4)
|
||||
(feed-test
|
||||
"ingest dedupes overlap"
|
||||
(feed/count (feed/ingest L P))
|
||||
3)
|
||||
|
||||
(feed-test
|
||||
"inbound normalizes + ingests"
|
||||
(feed/count (feed/inbound L (list {:actor "peer" :object "p9" :at 25} {:actor "alice" :object "p1" :at 10})))
|
||||
3)
|
||||
|
||||
; backfill via injected fetch-fn
|
||||
(define peer-history (fn (peer-id) (list {:actor peer-id :object "h1" :at 1} {:actor peer-id :object "h2" :at 2})))
|
||||
(feed-test
|
||||
"backfill merges peer history"
|
||||
(feed/count (feed/backfill L peer-history "remote"))
|
||||
4)
|
||||
(feed-test
|
||||
"backfill objects present"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items
|
||||
(feed/by-actor (feed/backfill L peer-history "remote") "remote")))
|
||||
(list "h1" "h2"))
|
||||
|
||||
; ---------- federation: outbound partition ----------
|
||||
|
||||
; bob (local), alice@remote + carol@remote (remote) follow star
|
||||
(define
|
||||
Gf
|
||||
(feed/follow-graph
|
||||
(list
|
||||
(list "bob" "star")
|
||||
(list "alice@remote" "star")
|
||||
(list "carol@remote" "star"))))
|
||||
(define
|
||||
Sf
|
||||
(feed/stream (list (feed/activity "star" "post" "s1" 1 (list)))))
|
||||
(define
|
||||
remote?
|
||||
(fn (id) (feed/-elem? id (list "alice@remote" "carol@remote"))))
|
||||
(define parts (feed/federate Sf Gf remote?))
|
||||
|
||||
(feed-test "local deliveries" (feed/count (get parts :local)) 1)
|
||||
(feed-test "remote deliveries" (feed/count (get parts :remote)) 2)
|
||||
(feed-test
|
||||
"local recipient is bob"
|
||||
(feed/recipients (get parts :local))
|
||||
(list "bob"))
|
||||
|
||||
; deliver: send-fn receives each remote event, local inbox returned
|
||||
(define sent (list))
|
||||
(define send-fn (fn (to act) (set! sent (append sent (list to)))))
|
||||
(define local-inbox (feed/deliver Sf Gf remote? send-fn))
|
||||
(feed-test "deliver returns local inbox" (feed/count local-inbox) 1)
|
||||
(feed-test "deliver sent to both remotes" (len sent) 2)
|
||||
(feed-test "deliver remote targets" sent (list "alice@remote" "carol@remote"))
|
||||
|
||||
; ---------- end-to-end: federated, ACL-filtered, ranked timeline ----------
|
||||
|
||||
(define
|
||||
base
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "alice" :object "a1" :at 100})
|
||||
(feed/normalize {:actor "bob" :object "b1" :visible-to (list "carol") :at 90})
|
||||
(feed/normalize {:actor "eve" :object "e1" :visible-to (list "dave") :at 80}))))
|
||||
(define federated (feed/inbound base (list {:actor "peer" :object "x1" :at 110})))
|
||||
(define rec (feed/recency 120 10))
|
||||
(define
|
||||
carol-tl
|
||||
(feed/timeline federated "carol" feed/permit-acl? rec 3))
|
||||
|
||||
; eve's :visible-to excludes carol -> filtered out; peer/alice public, bob allows carol
|
||||
(feed-test "carol federated timeline count" (feed/count carol-tl) 3)
|
||||
(feed-test
|
||||
"carol timeline order (recency)"
|
||||
(map (fn (a) (get a :object)) (feed/items carol-tl))
|
||||
(list "x1" "a1" "b1"))
|
||||
(feed-test
|
||||
"eve dm excluded from carol"
|
||||
(feed/-elem? "e1" (map (fn (a) (get a :object)) (feed/items carol-tl)))
|
||||
false)
|
||||
(feed-test
|
||||
"dave sees eve dm not bob"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items
|
||||
(feed/timeline federated "dave" feed/permit-acl? rec 5)))
|
||||
(list "x1" "a1" "e1"))
|
||||
68
lib/feed/tests/mute.sx
Normal file
68
lib/feed/tests/mute.sx
Normal file
@@ -0,0 +1,68 @@
|
||||
; Follow-up — viewer mute/block filtering. (feed-test name got expected)
|
||||
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "alice" :object "P1" :at 1 :tags (list "news")})
|
||||
(feed/normalize {:actor "bob" :object "P2" :at 2 :tags (list "spam")})
|
||||
(feed/normalize {:actor "alice" :object "P3" :at 3 :tags (list "cats")})
|
||||
(feed/normalize {:actor "carol" :object "P4" :at 4 :tags (list "news" "spam")}))))
|
||||
|
||||
; ---------- mute actors ----------
|
||||
|
||||
(feed-test
|
||||
"mute bob drops his post"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/mute-actors S (list "bob"))))
|
||||
(list "P1" "P3" "P4"))
|
||||
(feed-test
|
||||
"mute alice drops two"
|
||||
(feed/count (feed/mute-actors S (list "alice")))
|
||||
2)
|
||||
(feed-test
|
||||
"mute nobody keeps all"
|
||||
(feed/count (feed/mute-actors S (list)))
|
||||
4)
|
||||
|
||||
; ---------- mute tags ----------
|
||||
|
||||
(feed-test
|
||||
"mute spam tag drops two"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/mute-tags S (list "spam"))))
|
||||
(list "P1" "P3"))
|
||||
(feed-test
|
||||
"mute news+cats leaves spam-only"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/mute-tags S (list "news" "cats"))))
|
||||
(list "P2"))
|
||||
|
||||
; ---------- mute objects ----------
|
||||
|
||||
(feed-test
|
||||
"mute object P3 (thread mute)"
|
||||
(feed/count (feed/mute-objects S (list "P3")))
|
||||
3)
|
||||
|
||||
; ---------- combined prefs ----------
|
||||
|
||||
(feed-test
|
||||
"apply-prefs actors + tags"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/apply-prefs S {:mute-actors (list "bob") :mute-tags (list "cats")})))
|
||||
(list "P1" "P4"))
|
||||
(feed-test
|
||||
"apply-prefs empty keeps all"
|
||||
(feed/count (feed/apply-prefs S {}))
|
||||
4)
|
||||
(feed-test
|
||||
"apply-prefs all three filters"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/apply-prefs S {:mute-objects (list "P3") :mute-actors (list "carol") :mute-tags (list "spam")})))
|
||||
(list "P1"))
|
||||
69
lib/feed/tests/notify.sx
Normal file
69
lib/feed/tests/notify.sx
Normal file
@@ -0,0 +1,69 @@
|
||||
; Follow-up — notification feed over an inbox. (feed-test name got expected)
|
||||
|
||||
; an inbox is a stream of {:to receiver :activity act} events
|
||||
(define mk-ev (fn (to act) {:activity act :to to}))
|
||||
|
||||
(define
|
||||
IB
|
||||
(feed/stream
|
||||
(list
|
||||
(mk-ev "alice" (feed/activity "bob" "like" "P" 10 (list)))
|
||||
(mk-ev "alice" (feed/activity "carol" "like" "P" 20 (list)))
|
||||
(mk-ev "alice" (feed/activity "dave" "reply" "Q" 30 (list)))
|
||||
(mk-ev "bob" (feed/activity "eve" "like" "R" 40 (list))))))
|
||||
|
||||
; ---------- raw notifications ----------
|
||||
|
||||
(feed-test
|
||||
"alice notification count"
|
||||
(feed/count (feed/notifications IB "alice"))
|
||||
3)
|
||||
(feed-test
|
||||
"bob notification count"
|
||||
(feed/count (feed/notifications IB "bob"))
|
||||
1)
|
||||
(feed-test
|
||||
"zoe no notifications"
|
||||
(feed/count (feed/notifications IB "zoe"))
|
||||
0)
|
||||
|
||||
; ---------- verb filtering ----------
|
||||
|
||||
(feed-test
|
||||
"alice likes only"
|
||||
(feed/count (feed/notify-verbs IB "alice" (list "like")))
|
||||
2)
|
||||
(feed-test
|
||||
"alice replies only"
|
||||
(feed/count (feed/notify-verbs IB "alice" (list "reply")))
|
||||
1)
|
||||
(feed-test
|
||||
"alice like+reply"
|
||||
(feed/count (feed/notify-verbs IB "alice" (list "like" "reply")))
|
||||
3)
|
||||
(feed-test
|
||||
"alice follow (none)"
|
||||
(feed/count (feed/notify-verbs IB "alice" (list "follow")))
|
||||
0)
|
||||
|
||||
; ---------- digest ----------
|
||||
|
||||
(define dig (feed/notify-digest IB "alice"))
|
||||
|
||||
(feed-test "digest group count" (len dig) 2)
|
||||
(feed-test
|
||||
"digest sorted by key (like|P before reply|Q)"
|
||||
(map (fn (g) (get g :object)) dig)
|
||||
(list "P" "Q"))
|
||||
(feed-test
|
||||
"like group actors"
|
||||
(get (nth dig 0) :actors)
|
||||
(list "bob" "carol"))
|
||||
(feed-test "like group count" (get (nth dig 0) :count) 2)
|
||||
(feed-test "like group verb" (get (nth dig 0) :verb) "like")
|
||||
(feed-test "reply group count" (get (nth dig 1) :count) 1)
|
||||
(feed-test
|
||||
"reply group actors"
|
||||
(get (nth dig 1) :actors)
|
||||
(list "dave"))
|
||||
(feed-test "empty digest for zoe" (feed/notify-digest IB "zoe") (list))
|
||||
86
lib/feed/tests/page.sx
Normal file
86
lib/feed/tests/page.sx
Normal file
@@ -0,0 +1,86 @@
|
||||
; Follow-up — pagination (offset + cursor). (feed-test name got expected)
|
||||
|
||||
; ---------- offset / limit ----------
|
||||
|
||||
(define
|
||||
O
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "o1" 1 (list))
|
||||
(feed/activity "u" "post" "o2" 2 (list))
|
||||
(feed/activity "u" "post" "o3" 3 (list))
|
||||
(feed/activity "u" "post" "o4" 4 (list))
|
||||
(feed/activity "u" "post" "o5" 5 (list)))))
|
||||
|
||||
(feed-test
|
||||
"page 1"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/page O 0 2)))
|
||||
(list "o1" "o2"))
|
||||
(feed-test
|
||||
"page 2"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/page O 2 2)))
|
||||
(list "o3" "o4"))
|
||||
(feed-test
|
||||
"page 3 (partial)"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/page O 4 2)))
|
||||
(list "o5"))
|
||||
(feed-test
|
||||
"page past end empty"
|
||||
(feed/count (feed/page O 10 2))
|
||||
0)
|
||||
(feed-test "page-count 5/2 = 3" (feed/page-count O 2) 3)
|
||||
(feed-test "page-count 5/5 = 1" (feed/page-count O 5) 1)
|
||||
|
||||
; ---------- cursor (recency) ----------
|
||||
|
||||
(define
|
||||
R
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "a" 50 (list))
|
||||
(feed/activity "u" "post" "b" 40 (list))
|
||||
(feed/activity "u" "post" "c" 30 (list))
|
||||
(feed/activity "u" "post" "d" 20 (list))
|
||||
(feed/activity "u" "post" "e" 10 (list)))))
|
||||
|
||||
(define p1 (feed/page-before R 100 2))
|
||||
(feed-test
|
||||
"cursor page 1 newest first"
|
||||
(map (fn (a) (get a :object)) (feed/items p1))
|
||||
(list "a" "b"))
|
||||
(feed-test "next cursor after page 1" (feed/next-cursor p1) 40)
|
||||
|
||||
(define p2 (feed/page-before R (feed/next-cursor p1) 2))
|
||||
(feed-test
|
||||
"cursor page 2"
|
||||
(map (fn (a) (get a :object)) (feed/items p2))
|
||||
(list "c" "d"))
|
||||
(feed-test "next cursor after page 2" (feed/next-cursor p2) 20)
|
||||
|
||||
(define p3 (feed/page-before R (feed/next-cursor p2) 2))
|
||||
(feed-test
|
||||
"cursor page 3 (partial)"
|
||||
(map (fn (a) (get a :object)) (feed/items p3))
|
||||
(list "e"))
|
||||
|
||||
(feed-test
|
||||
"empty page nil cursor"
|
||||
(feed/next-cursor (feed/page-before R 5 2))
|
||||
nil)
|
||||
|
||||
(feed-test
|
||||
"after cursor loads newer"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/recent (feed/after R 30))))
|
||||
(list "a" "b"))
|
||||
(feed-test
|
||||
"before cursor count"
|
||||
(feed/count (feed/before R 30))
|
||||
2)
|
||||
160
lib/feed/tests/rank.sx
Normal file
160
lib/feed/tests/rank.sx
Normal file
@@ -0,0 +1,160 @@
|
||||
; Phase 3 — aggregation + ranking. (feed-test name got expected)
|
||||
|
||||
; ---------- aggregation ----------
|
||||
|
||||
(define
|
||||
A
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 5 (list))
|
||||
(feed/activity "alice" "post" "p2" 15 (list))
|
||||
(feed/activity "bob" "post" "p3" 25 (list))
|
||||
(feed/activity "alice" "like" "p1" 35 (list)))))
|
||||
|
||||
(feed-test "actor-counts" (feed/actor-counts A) {:alice 3 :bob 1})
|
||||
(feed-test "object-counts" (feed/object-counts A) {:p2 1 :p3 1 :p1 2})
|
||||
(feed-test
|
||||
"group-by actor alice len"
|
||||
(len (get (feed/group-by A feed/actor) "alice"))
|
||||
3)
|
||||
(feed-test
|
||||
"group-count empty"
|
||||
(feed/group-count feed/empty feed/actor)
|
||||
{})
|
||||
|
||||
; day bucketing
|
||||
(define
|
||||
D
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 5 (list))
|
||||
(feed/activity "alice" "post" "p2" 8 (list))
|
||||
(feed/activity "alice" "post" "p3" 12 (list)))))
|
||||
|
||||
(feed-test "feed/day floor" (feed/day 12 10) 1)
|
||||
(feed-test "feed/day same bucket" (feed/day 8 10) 0)
|
||||
(feed-test "by-actor-day" (feed/by-actor-day D 10) {:alice#0 2 :alice#1 1})
|
||||
|
||||
; ---------- recency ----------
|
||||
|
||||
(define rec (feed/recency 100 10))
|
||||
(feed-test
|
||||
"recency at=now -> 1"
|
||||
(rec (feed/activity "x" "post" "o" 100 (list)))
|
||||
1)
|
||||
(feed-test
|
||||
"recency age=hl -> .5"
|
||||
(rec (feed/activity "x" "post" "o" 90 (list)))
|
||||
0.5)
|
||||
(feed-test
|
||||
"recency age=2hl -> .25"
|
||||
(rec (feed/activity "x" "post" "o" 80 (list)))
|
||||
0.25)
|
||||
|
||||
; ---------- velocity ----------
|
||||
|
||||
(define vel (feed/velocity D 10))
|
||||
(feed-test
|
||||
"velocity burst (at=12)"
|
||||
(vel (feed/activity "alice" "post" "z" 12 (list)))
|
||||
3)
|
||||
(feed-test
|
||||
"velocity mid (at=8)"
|
||||
(vel (feed/activity "alice" "post" "z" 8 (list)))
|
||||
2)
|
||||
(feed-test
|
||||
"velocity first (at=5)"
|
||||
(vel (feed/activity "alice" "post" "z" 5 (list)))
|
||||
1)
|
||||
(feed-test
|
||||
"velocity other actor"
|
||||
(vel (feed/activity "bob" "post" "z" 12 (list)))
|
||||
0)
|
||||
|
||||
; ---------- engagement ----------
|
||||
|
||||
(define eng (feed/engagement A))
|
||||
(feed-test
|
||||
"engagement p1"
|
||||
(eng (feed/activity "x" "post" "p1" 0 (list)))
|
||||
2)
|
||||
(feed-test
|
||||
"engagement p2"
|
||||
(eng (feed/activity "x" "post" "p2" 0 (list)))
|
||||
1)
|
||||
|
||||
; ---------- composite ----------
|
||||
|
||||
(define
|
||||
cmp1
|
||||
(feed/composite (list (list 2 (fn (a) (get a :at))))))
|
||||
(feed-test
|
||||
"composite single part"
|
||||
(cmp1 (feed/activity "x" "post" "o" 5 (list)))
|
||||
10)
|
||||
(define
|
||||
cmp2
|
||||
(feed/composite
|
||||
(list
|
||||
(list 2 (fn (a) (get a :at)))
|
||||
(list 3 (fn (a) 1)))))
|
||||
(feed-test
|
||||
"composite two parts"
|
||||
(cmp2 (feed/activity "x" "post" "o" 5 (list)))
|
||||
13)
|
||||
|
||||
; ---------- ranking ----------
|
||||
|
||||
(define
|
||||
R
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "oC" 80 (list))
|
||||
(feed/activity "u" "post" "oA" 100 (list))
|
||||
(feed/activity "u" "post" "oB" 90 (list)))))
|
||||
|
||||
(feed-test
|
||||
"rank by recency objects"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/rank R rec)))
|
||||
(list "oA" "oB" "oC"))
|
||||
(feed-test
|
||||
"top-2 by recency"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/top R rec 2)))
|
||||
(list "oA" "oB"))
|
||||
(feed-test "top-2 count" (feed/count (feed/top R rec 2)) 2)
|
||||
|
||||
; constant score -> tiebreak by :at descending
|
||||
(define
|
||||
T
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "f" 10 (list))
|
||||
(feed/activity "u" "post" "g" 30 (list))
|
||||
(feed/activity "u" "post" "h" 20 (list)))))
|
||||
(feed-test
|
||||
"tiebreak at-desc"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/rank T (fn (a) 0))))
|
||||
(list "g" "h" "f"))
|
||||
|
||||
; equal score AND equal :at -> stable input order
|
||||
(define
|
||||
E
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "first" 50 (list))
|
||||
(feed/activity "u" "post" "second" 50 (list)))))
|
||||
(feed-test
|
||||
"stable equal-key input order"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/rank E (fn (a) 0))))
|
||||
(list "first" "second"))
|
||||
|
||||
(feed-test
|
||||
"with-scores attaches score"
|
||||
(get (nth (feed/items (feed/with-scores R rec)) 1) :score)
|
||||
1)
|
||||
|
||||
(feed-test "rank preserves count" (feed/count (feed/rank A rec)) 4)
|
||||
49
lib/feed/tests/thread.sx
Normal file
49
lib/feed/tests/thread.sx
Normal file
@@ -0,0 +1,49 @@
|
||||
; Follow-up — conversation threading via :reply-to closure. (feed-test name got expected)
|
||||
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "a" :object "root" :at 1})
|
||||
(feed/normalize {:actor "b" :object "r1" :at 2 :verb "reply" :reply-to "root"})
|
||||
(feed/normalize {:actor "c" :object "r2" :at 3 :verb "reply" :reply-to "root"})
|
||||
(feed/normalize {:actor "d" :object "r3" :at 4 :verb "reply" :reply-to "r1"})
|
||||
(feed/normalize {:actor "e" :object "x" :at 5}))))
|
||||
|
||||
; ---------- direct replies ----------
|
||||
|
||||
(feed-test "direct replies to root" (feed/reply-count S "root") 2)
|
||||
(feed-test "direct replies to r1" (feed/reply-count S "r1") 1)
|
||||
(feed-test "no replies to r3" (feed/reply-count S "r3") 0)
|
||||
(feed-test
|
||||
"replies objects to root"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/replies S "root")))
|
||||
(list "r1" "r2"))
|
||||
|
||||
; ---------- thread closure ----------
|
||||
|
||||
(feed-test
|
||||
"thread objects root (transitive)"
|
||||
(feed/thread-objects S "root")
|
||||
(list "root" "r1" "r2" "r3"))
|
||||
(feed-test
|
||||
"thread root chronological"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/thread S "root")))
|
||||
(list "root" "r1" "r2" "r3"))
|
||||
(feed-test "thread size root" (feed/thread-size S "root") 4)
|
||||
(feed-test
|
||||
"thread excludes unrelated x"
|
||||
(feed/-elem?
|
||||
"x"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/thread S "root"))))
|
||||
false)
|
||||
|
||||
; ---------- sub-thread ----------
|
||||
|
||||
(feed-test
|
||||
"thread from r1 (sub-tree)"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/thread S "r1")))
|
||||
(list "r1" "r3"))
|
||||
(feed-test "thread size r1" (feed/thread-size S "r1") 2)
|
||||
(feed-test "leaf thread is itself" (feed/thread-size S "r3") 1)
|
||||
(feed-test "unrelated thread is itself" (feed/thread-size S "x") 1)
|
||||
82
lib/feed/tests/trending.sx
Normal file
82
lib/feed/tests/trending.sx
Normal file
@@ -0,0 +1,82 @@
|
||||
; Follow-up — trending objects/actors by recent activity. (feed-test name got expected)
|
||||
|
||||
; window (50,100]: X@60,X@70 (a), Y@80 (b), Z@90 (c); W@40 is too old
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "a" "post" "X" 60 (list))
|
||||
(feed/activity "a" "post" "X" 70 (list))
|
||||
(feed/activity "b" "post" "Y" 80 (list))
|
||||
(feed/activity "c" "post" "Z" 90 (list))
|
||||
(feed/activity "d" "post" "W" 40 (list)))))
|
||||
|
||||
; ---------- trending objects ----------
|
||||
|
||||
(feed-test
|
||||
"trending count (3 in window)"
|
||||
(len (feed/trending S 100 50 10))
|
||||
3)
|
||||
(feed-test
|
||||
"trending top object"
|
||||
(get
|
||||
(nth (feed/trending S 100 50 10) 0)
|
||||
:object)
|
||||
"X")
|
||||
(feed-test
|
||||
"trending top count"
|
||||
(get
|
||||
(nth (feed/trending S 100 50 10) 0)
|
||||
:count)
|
||||
2)
|
||||
(feed-test
|
||||
"trending order (count desc, key asc tiebreak)"
|
||||
(map
|
||||
(fn (e) (get e :object))
|
||||
(feed/trending S 100 50 10))
|
||||
(list "X" "Y" "Z"))
|
||||
(feed-test
|
||||
"trending top-2"
|
||||
(map
|
||||
(fn (e) (get e :object))
|
||||
(feed/trending S 100 50 2))
|
||||
(list "X" "Y"))
|
||||
(feed-test
|
||||
"old object W excluded"
|
||||
(feed/-elem?
|
||||
"W"
|
||||
(map
|
||||
(fn (e) (get e :object))
|
||||
(feed/trending S 100 50 10)))
|
||||
false)
|
||||
(feed-test
|
||||
"narrow window keeps only newest"
|
||||
(map
|
||||
(fn (e) (get e :object))
|
||||
(feed/trending S 100 15 10))
|
||||
(list "Z"))
|
||||
(feed-test
|
||||
"empty window -> nothing"
|
||||
(feed/trending S 100 5 10)
|
||||
(list))
|
||||
|
||||
; ---------- trending actors ----------
|
||||
|
||||
(feed-test
|
||||
"trending actor top"
|
||||
(get
|
||||
(nth (feed/trending-actors S 100 50 10) 0)
|
||||
:actor)
|
||||
"a")
|
||||
(feed-test
|
||||
"trending actor count"
|
||||
(get
|
||||
(nth (feed/trending-actors S 100 50 10) 0)
|
||||
:count)
|
||||
2)
|
||||
(feed-test
|
||||
"trending actors order"
|
||||
(map
|
||||
(fn (e) (get e :actor))
|
||||
(feed/trending-actors S 100 50 10))
|
||||
(list "a" "b" "c"))
|
||||
59
lib/feed/thread.sx
Normal file
59
lib/feed/thread.sx
Normal file
@@ -0,0 +1,59 @@
|
||||
; feed/thread — conversation threading. A reply carries :reply-to <parent-object>
|
||||
; (normalize preserves it). A thread is the transitive closure over :reply-to from
|
||||
; a root object: root + replies + replies-to-replies, gathered chronologically.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-elem?, feed/-distinct).
|
||||
|
||||
; direct replies to an object
|
||||
(define
|
||||
feed/replies
|
||||
(fn
|
||||
(stream object)
|
||||
(feed/filter stream (fn (a) (equal? (get a :reply-to) object)))))
|
||||
|
||||
(define
|
||||
feed/reply-count
|
||||
(fn (stream object) (feed/count (feed/replies stream object))))
|
||||
|
||||
; iterate f from x until the result stops growing (set-closure fixpoint)
|
||||
(define
|
||||
feed/-fixpoint
|
||||
(fn
|
||||
(f x)
|
||||
(let
|
||||
((nx (f x)))
|
||||
(if (= (len nx) (len x)) x (feed/-fixpoint f nx)))))
|
||||
|
||||
; the set of object-ids in the thread rooted at `root`
|
||||
(define
|
||||
feed/thread-objects
|
||||
(fn
|
||||
(stream root)
|
||||
(let
|
||||
((all (feed/items stream)))
|
||||
(feed/-fixpoint
|
||||
(fn
|
||||
(acc)
|
||||
(feed/-distinct
|
||||
(append
|
||||
acc
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(filter (fn (a) (feed/-elem? (get a :reply-to) acc)) all)))))
|
||||
(list root)))))
|
||||
|
||||
; the full thread as a chronological stream (root + all descendants)
|
||||
(define
|
||||
feed/thread
|
||||
(fn
|
||||
(stream root)
|
||||
(let
|
||||
((objs (feed/thread-objects stream root)))
|
||||
(feed/sort-by-at
|
||||
(feed/filter stream (fn (a) (feed/-elem? (get a :object) objs)))))))
|
||||
|
||||
; how many activities are in the thread (root counts as 1)
|
||||
(define
|
||||
feed/thread-size
|
||||
(fn (stream root) (feed/count (feed/thread stream root))))
|
||||
42
lib/feed/trending.sx
Normal file
42
lib/feed/trending.sx
Normal file
@@ -0,0 +1,42 @@
|
||||
; feed/trending — what's hot right now: objects (or actors) ranked by activity
|
||||
; count within a recency window. Deterministic: count descending, ties broken by
|
||||
; key ascending (entries are pre-sorted by key, then stable grade-down by count).
|
||||
;
|
||||
; Requires: lib/feed/stream.sx, lib/feed/aggregate.sx (object/actor-counts),
|
||||
; lib/feed/rank.sx (feed/-desc-by).
|
||||
|
||||
; activities within (now-window, now]
|
||||
(define
|
||||
feed/-recent
|
||||
(fn
|
||||
(stream now window)
|
||||
(feed/filter
|
||||
stream
|
||||
(fn (a) (and (<= (get a :at) now) (> (get a :at) (- now window)))))))
|
||||
|
||||
; counts dict -> top-N entries {label key, :count n}, count desc, key asc
|
||||
(define
|
||||
feed/-top-counts
|
||||
(fn
|
||||
(counts label n)
|
||||
(let
|
||||
((entries (map (fn (k) (assoc {:count (get counts k)} label k)) (sort (keys counts)))))
|
||||
(take (feed/-desc-by entries (fn (e) (get e :count))) n))))
|
||||
|
||||
; top-N trending objects in the window
|
||||
(define
|
||||
feed/trending
|
||||
(fn
|
||||
(stream now window n)
|
||||
(feed/-top-counts
|
||||
(feed/object-counts (feed/-recent stream now window))
|
||||
:object n)))
|
||||
|
||||
; top-N most active actors in the window
|
||||
(define
|
||||
feed/trending-actors
|
||||
(fn
|
||||
(stream now window n)
|
||||
(feed/-top-counts
|
||||
(feed/actor-counts (feed/-recent stream now window))
|
||||
:actor n)))
|
||||
24
lib/search/api.sx
Normal file
24
lib/search/api.sx
Normal file
@@ -0,0 +1,24 @@
|
||||
;; search public API — assembles the canonical Haskell source from all layers.
|
||||
;; Tests and callers concatenate `search/src` with their own top-level bindings
|
||||
;; (e.g. "result = lookupTerm \"cat\" idx\n") and evaluate via the haskell-on-sx
|
||||
;; interpreter. Public Haskell entry points: indexDoc, lookupTerm, deleteDoc,
|
||||
;; docFreq, allTerms, tokens, positioned, evalQuery, parseQuery, searchQuery,
|
||||
;; rankTfIdf, rankBm25, topNTfIdf, topNBm25, fedIndex, aclFilter, searchTfIdfAcl,
|
||||
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf.
|
||||
|
||||
(define
|
||||
search/src
|
||||
(str
|
||||
search/tokenize-src
|
||||
"\n"
|
||||
search/index-src
|
||||
"\n"
|
||||
search/query-src
|
||||
"\n"
|
||||
search/parse-src
|
||||
"\n"
|
||||
search/rank-src
|
||||
"\n"
|
||||
search/fed-src
|
||||
"\n"
|
||||
search/prefix-src))
|
||||
39
lib/search/conformance.conf
Normal file
39
lib/search/conformance.conf
Normal file
@@ -0,0 +1,39 @@
|
||||
# search-on-sx conformance config — sourced by lib/guest/conformance.sh.
|
||||
|
||||
LANG_NAME=search
|
||||
SCOREBOARD_DIR=lib/search
|
||||
MODE=counters
|
||||
COUNTERS_PASS=hk-test-pass
|
||||
COUNTERS_FAIL=hk-test-fail
|
||||
TIMEOUT_PER_SUITE=600
|
||||
|
||||
PRELOADS=(
|
||||
lib/haskell/tokenizer.sx
|
||||
lib/haskell/layout.sx
|
||||
lib/haskell/parser.sx
|
||||
lib/haskell/desugar.sx
|
||||
lib/haskell/runtime.sx
|
||||
lib/haskell/match.sx
|
||||
lib/haskell/eval.sx
|
||||
lib/haskell/map.sx
|
||||
lib/haskell/set.sx
|
||||
lib/haskell/testlib.sx
|
||||
lib/search/tokenize.sx
|
||||
lib/search/index.sx
|
||||
lib/search/query.sx
|
||||
lib/search/parse.sx
|
||||
lib/search/rank.sx
|
||||
lib/search/fed.sx
|
||||
lib/search/prefix.sx
|
||||
lib/search/api.sx
|
||||
lib/search/testlib.sx
|
||||
)
|
||||
|
||||
SUITES=(
|
||||
"index:lib/search/tests/index.sx"
|
||||
"boolean:lib/search/tests/boolean.sx"
|
||||
"parse:lib/search/tests/parse.sx"
|
||||
"rank:lib/search/tests/rank.sx"
|
||||
"integration:lib/search/tests/integration.sx"
|
||||
"prefix:lib/search/tests/prefix.sx"
|
||||
)
|
||||
3
lib/search/conformance.sh
Executable file
3
lib/search/conformance.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
# Thin wrapper — see lib/guest/conformance.sh and lib/search/conformance.conf.
|
||||
exec bash "$(dirname "$0")/../guest/conformance.sh" "$(dirname "$0")/conformance.conf" "$@"
|
||||
16
lib/search/fed.sx
Normal file
16
lib/search/fed.sx
Normal file
@@ -0,0 +1,16 @@
|
||||
;; search federation + ACL — Haskell source fragment. Depends on index + rank.
|
||||
;; Federation merges per-peer INDICES (not ranked results): each peer's local
|
||||
;; DocIds are relabelled to global ids `gid peer local = peer*1000 + local`
|
||||
;; (dedupe by (peer,doc-id) is automatic via the bijection), then posting lists
|
||||
;; are unioned per term. Ranking then runs once over the merged index, which is
|
||||
;; rank-correct. ACL is a post-rank filter: an injected `permit :: DocId -> Bool`
|
||||
;; predicate (viewer baked in by the caller) — never baked into the index.
|
||||
;; fedIndex :: [(PeerId, Index)] -> Index
|
||||
;; aclFilter :: (DocId -> Bool) -> [DocId] -> [DocId]
|
||||
;; searchTfIdfAcl :: (DocId -> Bool) -> [Term] -> Index -> [DocId]
|
||||
;; topNTfIdfAcl :: Int -> (DocId -> Bool) -> [Term] -> Index -> [DocId]
|
||||
;; searchBm25Acl :: (DocId -> Bool) -> Float -> Float -> [Term] -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/fed-src
|
||||
"gid peer local = peer * 1000 + local\nfedRelabelPosting peer p = (gid peer (fst p), snd p)\nfedRelabelEntry peer e = (fst e, map (fedRelabelPosting peer) (snd e))\nfedRelabelIndex peer idx = map (fedRelabelEntry peer) idx\nfedInsP p [] = [p]\nfedInsP p (q:qs) = if fst p < fst q then p : q : qs else if fst p == fst q then p : qs else q : fedInsP p qs\nfedMergePL a b = foldr fedInsP b a\nfedInsTerm t pl [] = [(t, pl)]\nfedInsTerm t pl (x:xs) = if t < fst x then (t, pl) : x : xs else if t == fst x then (fst x, fedMergePL pl (snd x)) : xs else x : fedInsTerm t pl xs\nfedMergeEntry idx e = fedInsTerm (fst e) (snd e) idx\nfedMergeTwo a b = foldl fedMergeEntry a b\nfedAddPeer acc pair = fedMergeTwo acc (fedRelabelIndex (fst pair) (snd pair))\nfedIndex pairs = foldl fedAddPeer emptyIndex pairs\naclFilter permit docs = filter permit docs\nsearchTfIdfAcl permit ts idx = aclFilter permit (rankTfIdf ts idx)\ntopNTfIdfAcl n permit ts idx = take n (aclFilter permit (rankTfIdf ts idx))\nsearchBm25Acl permit k1 b ts idx = aclFilter permit (rankBm25 k1 b ts idx)\n")
|
||||
15
lib/search/index.sx
Normal file
15
lib/search/index.sx
Normal file
@@ -0,0 +1,15 @@
|
||||
;; search inverted index — Haskell source fragment (depends on tokenize).
|
||||
;; Index = [(Term, [(DocId, [Pos])])], sorted by Term; postings sorted by DocId.
|
||||
;; Data.Map's public API lacks toList/keys/map/filter, so a sorted assoc-list
|
||||
;; index is used — it is the conceptual `Map Term [(DocId,[Pos])]` and exposes
|
||||
;; term iteration (allTerms) and df naturally for ranking.
|
||||
;; emptyIndex :: Index
|
||||
;; indexDoc :: DocId -> String -> Index -> Index (re-index replaces)
|
||||
;; lookupTerm :: Term -> Index -> [(DocId, [Pos])]
|
||||
;; deleteDoc :: DocId -> Index -> Index
|
||||
;; docFreq :: Term -> Index -> Int
|
||||
;; allTerms :: Index -> [Term]
|
||||
|
||||
(define
|
||||
search/index-src
|
||||
"emptyIndex = []\ngroupBump [] t p = [(t, [p])]\ngroupBump (g:gs) t p = if fst g == t then (t, snd g ++ [p]) : gs else g : groupBump gs t p\ngroupStep acc tp = groupBump acc (fst tp) (snd tp)\ngroupTok pairs = foldl groupStep [] pairs\ninsPosting d ps [] = [(d, ps)]\ninsPosting d ps (q:qs) = if d < fst q then (d, ps) : q : qs else if d == fst q then (d, ps) : qs else q : insPosting d ps qs\ninsTerm t d ps [] = [(t, [(d, ps)])]\ninsTerm t d ps (e:es) = if t < fst e then (t, [(d, ps)]) : e : es else if t == fst e then (fst e, insPosting d ps (snd e)) : es else e : insTerm t d ps es\nindexStep d ix tp = insTerm (fst tp) d (snd tp) ix\nindexDoc d text idx = foldl (indexStep d) idx (groupTok (positioned text))\nlookupTerm t idx = case lookup t idx of { Nothing -> []; Just pl -> pl }\ndocFreq t idx = length (lookupTerm t idx)\nallTerms idx = map fst idx\npostingKeep d q = fst q /= d\ndropTermDoc d e = (fst e, filter (postingKeep d) (snd e))\nplKeep e = not (null (snd e))\ndeleteDoc d idx = filter plKeep (map (dropTermDoc d) idx)\n")
|
||||
18
lib/search/parse.sx
Normal file
18
lib/search/parse.sx
Normal file
@@ -0,0 +1,18 @@
|
||||
;; search query parser — Haskell source fragment. Depends on tokenize + query.
|
||||
;; Grammar (precedence OR < AND < NOT):
|
||||
;; expr = orExpr
|
||||
;; orExpr = andExpr (OR andExpr)*
|
||||
;; andExpr= notExpr ((AND | <implicit>) notExpr)* -- adjacency means AND
|
||||
;; notExpr= NOT notExpr | atom
|
||||
;; atom = '(' expr ')' | '"' word+ '"' | word
|
||||
;; Keywords AND/OR/NOT are case-insensitive; bare words are normalized via tokens.
|
||||
;; Gotchas: delimiters matched by ord (escaped char literals like '\"' break the
|
||||
;; haskell-on-sx tokenizer); an [] *pattern* inside a `case` alt also breaks the
|
||||
;; parser, so qNormTerm/qDropRP/showQ are written as multi-clause functions.
|
||||
;; parseQuery :: String -> Query
|
||||
;; searchQuery :: String -> Index -> [DocId]
|
||||
;; showQ :: Query -> String -- canonical render for tests/debug
|
||||
|
||||
(define
|
||||
search/parse-src
|
||||
"data QTok = TAnd | TOr | TNot | TLP | TRP | TWord String | TPhrase [String]\nqIsSpace c = ord c == 32\nqIsLP c = ord c == 40\nqIsRP c = ord c == 41\nqIsQuote c = ord c == 34\nqDelim c = qIsSpace c || qIsLP c || qIsRP c || qIsQuote c\nqReadWord [] = ([], [])\nqReadWord (c:cs) = if qDelim c then ([], c:cs) else let (w, rest) = qReadWord cs in (c:w, rest)\nqReadPhrase [] = ([], [])\nqReadPhrase (c:cs) = if qIsQuote c then ([], cs) else let (w, rest) = qReadPhrase cs in (c:w, rest)\ntoUpperCh c = chr (toUpper (ord c))\nqUpper w = joinChars (map toUpperCh w)\nqFirstTok [] = \"\"\nqFirstTok (x:xs) = x\nqNormTerm w = qFirstTok (tokens w)\nqClassify w = if qUpper w == \"AND\" then TAnd else if qUpper w == \"OR\" then TOr else if qUpper w == \"NOT\" then TNot else TWord (qNormTerm w)\nqPhraseTok cs = let (p, rest) = qReadPhrase cs in TPhrase (tokens p) : qtokens rest\nqWordTok cs = let (w, rest) = qReadWord cs in qClassify w : qtokens rest\nqtokens [] = []\nqtokens (c:cs) = if qIsSpace c then qtokens cs else if qIsLP c then TLP : qtokens cs else if qIsRP c then TRP : qtokens cs else if qIsQuote c then qPhraseTok cs else qWordTok (c:cs)\nqDropRP (q, (TRP:rest)) = (q, rest)\nqDropRP (q, ts) = (q, ts)\nparseAtom [] = (Term \"\", [])\nparseAtom (TLP:ts) = qDropRP (parseExpr ts)\nparseAtom (TPhrase ps : ts) = (Phrase ps, ts)\nparseAtom (TWord w : ts) = (Term w, ts)\nparseAtom ts = (Term \"\", ts)\nqWrapNot (q, ts) = (Not q, ts)\nparseNot (TNot:ts) = qWrapNot (parseNot ts)\nparseNot ts = parseAtom ts\nqStartsAtom (TWord w : ts) = True\nqStartsAtom (TPhrase p : ts) = True\nqStartsAtom (TLP : ts) = True\nqStartsAtom (TNot : ts) = True\nqStartsAtom ts = False\nqAndStep left ts = let (r, rest) = parseNot ts in parseAndR (And left r) rest\nparseAndR left (TAnd:ts) = qAndStep left ts\nparseAndR left ts = if qStartsAtom ts then qAndStep left ts else (left, ts)\nparseAnd ts = let (l, rest) = parseNot ts in parseAndR l rest\nparseOrR left (TOr:ts) = let (r, rest) = parseAnd ts in parseOrR (Or left r) rest\nparseOrR left ts = (left, ts)\nparseExpr ts = let (l, rest) = parseAnd ts in parseOrR l rest\nparseQuery s = fst (parseExpr (qtokens s))\nsearchQuery s idx = evalQuery idx (parseQuery s)\njoinSp [] = \"\"\njoinSp [x] = x\njoinSp (x:xs) = x ++ \"-\" ++ joinSp xs\nshowQ (Term t) = \"T:\" ++ t\nshowQ (And a b) = \"(\" ++ showQ a ++ \" & \" ++ showQ b ++ \")\"\nshowQ (Or a b) = \"(\" ++ showQ a ++ \" | \" ++ showQ b ++ \")\"\nshowQ (Not a) = \"!\" ++ showQ a\nshowQ (Phrase ts) = \"P:\" ++ joinSp ts\n")
|
||||
10
lib/search/prefix.sx
Normal file
10
lib/search/prefix.sx
Normal file
@@ -0,0 +1,10 @@
|
||||
;; search prefix / wildcard queries — Haskell source fragment. Depends on index +
|
||||
;; rank (reuses candStep / rankTfIdf). A prefix matches every indexed term that
|
||||
;; starts with it; the matching terms are unioned (OR) into a docid set.
|
||||
;; prefixTerms :: String -> Index -> [Term] (sorted, from allTerms)
|
||||
;; prefixDocs :: String -> Index -> [DocId] (sorted union)
|
||||
;; prefixRankTfIdf :: String -> Index -> [DocId] (ranked by the matched terms)
|
||||
|
||||
(define
|
||||
search/prefix-src
|
||||
"prefixTerms pre idx = filter (isPrefixOf pre) (allTerms idx)\nprefixDocs pre idx = foldl (candStep idx) [] (prefixTerms pre idx)\nprefixRankTfIdf pre idx = rankTfIdf (prefixTerms pre idx) idx\n")
|
||||
11
lib/search/query.sx
Normal file
11
lib/search/query.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
;; search query AST + boolean/phrase evaluation — Haskell source fragment.
|
||||
;; Depends on tokenize + index.
|
||||
;; data Query = Term String | And Query Query | Or Query Query
|
||||
;; | Not Query | Phrase [String]
|
||||
;; evalQuery :: Index -> Query -> [DocId] (sorted, unique)
|
||||
;; Boolean ops are linear merges over docid-sorted posting lists; Not uses
|
||||
;; allDocs as the universe; Phrase checks positional adjacency.
|
||||
|
||||
(define
|
||||
search/query-src
|
||||
"data Query = Term String | And Query Query | Or Query Query | Not Query | Phrase [String]\ndocsWith t idx = map fst (lookupTerm t idx)\nsortedUnion [] ys = ys\nsortedUnion xs [] = xs\nsortedUnion (x:xs) (y:ys) = if x < y then x : sortedUnion xs (y:ys) else if x > y then y : sortedUnion (x:xs) ys else x : sortedUnion xs ys\nsortedInter [] ys = []\nsortedInter xs [] = []\nsortedInter (x:xs) (y:ys) = if x < y then sortedInter xs (y:ys) else if x > y then sortedInter (x:xs) ys else x : sortedInter xs ys\nsortedDiff [] ys = []\nsortedDiff xs [] = xs\nsortedDiff (x:xs) (y:ys) = if x < y then x : sortedDiff xs (y:ys) else if x > y then sortedDiff (x:xs) ys else sortedDiff xs ys\nmergeDocs acc e = sortedUnion acc (map fst (snd e))\nallDocs idx = foldl mergeDocs [] idx\nposIn t d idx = case lookup d (lookupTerm t idx) of { Nothing -> []; Just ps -> ps }\nelemSorted x [] = False\nelemSorted x (y:ys) = if x == y then True else if x < y then False else elemSorted x ys\nphraseAtAll [] d idx p i = True\nphraseAtAll (t:ts) d idx p i = if elemSorted (p + i) (posIn t d idx) then phraseAtAll ts d idx p (i + 1) else False\nphraseStartsAt ts d idx p = phraseAtAll ts d idx p 0\nphraseInDoc [] d idx = True\nphraseInDoc (t0:rest) d idx = any (phraseStartsAt (t0:rest) d idx) (posIn t0 d idx)\nphraseHere ts idx d = phraseInDoc ts d idx\ninterStep idx acc tt = sortedInter acc (docsWith tt idx)\nphraseCands [] idx = allDocs idx\nphraseCands (t:ts) idx = foldl (interStep idx) (docsWith t idx) ts\nphraseDocs ts idx = filter (phraseHere ts idx) (phraseCands ts idx)\nevalQuery idx q = case q of { Term t -> docsWith t idx ; And a b -> sortedInter (evalQuery idx a) (evalQuery idx b) ; Or a b -> sortedUnion (evalQuery idx a) (evalQuery idx b) ; Not a -> sortedDiff (allDocs idx) (evalQuery idx a) ; Phrase ts -> phraseDocs ts idx }\n")
|
||||
14
lib/search/rank.sx
Normal file
14
lib/search/rank.sx
Normal file
@@ -0,0 +1,14 @@
|
||||
;; search ranking — Haskell source fragment. Depends on tokenize + index + query.
|
||||
;; Ranked retrieval over the candidate set (docs containing any query term).
|
||||
;; Scores are floats; ties broken by DocId ascending (deterministic).
|
||||
;; numDocs :: Index -> Int
|
||||
;; docFreq :: Term -> Index -> Int (from index)
|
||||
;; docLen :: DocId -> Index -> Int
|
||||
;; rankTfIdf :: [Term] -> Index -> [DocId]
|
||||
;; topNTfIdf :: Int -> [Term] -> Index -> [DocId]
|
||||
;; rankBm25 :: Float -> Float -> [Term] -> Index -> [DocId] (k1, b)
|
||||
;; topNBm25 :: Int -> Float -> Float -> [Term] -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/rank-src
|
||||
"numDocs idx = length (allDocs idx)\ntfIn t d idx = length (posIn t d idx)\nqIdf n df = if df == 0 then 0 else log (n / df)\nidf t idx = qIdf (numDocs idx) (docFreq t idx)\ntermScoreTf idx d t = tfIn t d idx * idf t idx\ntfidfDoc ts idx d = sum (map (termScoreTf idx d) ts)\ncandStep idx acc t = sortedUnion acc (docsWith t idx)\ncandDocs ts idx = foldl (candStep idx) [] ts\ncmpScore p1 p2 = if fst p1 > fst p2 then LT else if fst p1 < fst p2 then GT else compare (snd p1) (snd p2)\nmkPair f ts idx d = (f ts idx d, d)\nrankWith f ts idx = map snd (sortBy cmpScore (map (mkPair f ts idx) (candDocs ts idx)))\nrankTfIdf ts idx = rankWith tfidfDoc ts idx\ntopNTfIdf n ts idx = take n (rankTfIdf ts idx)\ntfAt d idx t = tfIn t d idx\ndocLen d idx = sum (map (tfAt d idx) (allTerms idx))\nlenAt idx d = docLen d idx\navgDocLen idx = sum (map (lenAt idx) (allDocs idx)) / numDocs idx\nbm25idf t idx = log ((numDocs idx - docFreq t idx + 0.5) / (docFreq t idx + 0.5) + 1)\nbm25Term k1 b avgdl idx d t = bm25idf t idx * (tfIn t d idx * (k1 + 1)) / (tfIn t d idx + k1 * (1 - b + b * docLen d idx / avgdl))\nbm25Doc k1 b ts idx d = sum (map (bm25Term k1 b (avgDocLen idx) idx d) ts)\nrankBm25 k1 b ts idx = rankWith (bm25Doc k1 b) ts idx\ntopNBm25 n k1 b ts idx = take n (rankBm25 k1 b ts idx)\n")
|
||||
15
lib/search/scoreboard.json
Normal file
15
lib/search/scoreboard.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"lang": "search",
|
||||
"total_passed": 136,
|
||||
"total_failed": 0,
|
||||
"total": 136,
|
||||
"suites": [
|
||||
{"name":"index","passed":18,"failed":0,"total":18},
|
||||
{"name":"boolean","passed":28,"failed":0,"total":28},
|
||||
{"name":"parse","passed":32,"failed":0,"total":32},
|
||||
{"name":"rank","passed":23,"failed":0,"total":23},
|
||||
{"name":"integration","passed":21,"failed":0,"total":21},
|
||||
{"name":"prefix","passed":14,"failed":0,"total":14}
|
||||
],
|
||||
"generated": "2026-06-06T20:21:41+00:00"
|
||||
}
|
||||
12
lib/search/scoreboard.md
Normal file
12
lib/search/scoreboard.md
Normal file
@@ -0,0 +1,12 @@
|
||||
# search scoreboard
|
||||
|
||||
**136 / 136 passing** (0 failure(s)).
|
||||
|
||||
| Suite | Passed | Total | Status |
|
||||
|-------|--------|-------|--------|
|
||||
| index | 18 | 18 | ok |
|
||||
| boolean | 28 | 28 | ok |
|
||||
| parse | 32 | 32 | ok |
|
||||
| rank | 23 | 23 | ok |
|
||||
| integration | 21 | 21 | ok |
|
||||
| prefix | 14 | 14 | ok |
|
||||
50
lib/search/testlib.sx
Normal file
50
lib/search/testlib.sx
Normal file
@@ -0,0 +1,50 @@
|
||||
;; search test helpers — convert forced haskell values to plain SX and run
|
||||
;; programs built on top of search/src. Reuses hk-test / counters from
|
||||
;; lib/haskell/testlib.sx (preloaded by the conformance config).
|
||||
|
||||
;; Recursively turn a forced HK value into plain SX:
|
||||
;; cons-list -> SX list, Tuple -> SX list, leaves unchanged.
|
||||
(define
|
||||
search-hk->sx
|
||||
(fn
|
||||
(v)
|
||||
(cond
|
||||
((and (list? v) (not (empty? v)) (= (first v) "[]")) (list))
|
||||
((and (list? v) (not (empty? v)) (= (first v) ":"))
|
||||
(cons
|
||||
(search-hk->sx (nth v 1))
|
||||
(search-hk->sx (nth v 2))))
|
||||
((and (list? v) (not (empty? v)) (= (first v) "Tuple"))
|
||||
(map search-hk->sx (rest v)))
|
||||
(:else v))))
|
||||
|
||||
;; Evaluate `extra` (extra top-level Haskell bindings) on top of search/src
|
||||
;; and return binding `name` as plain SX.
|
||||
(define
|
||||
search-eval
|
||||
(fn
|
||||
(extra name)
|
||||
(search-hk->sx
|
||||
(hk-deep-force
|
||||
(get (hk-eval-program (hk-core (str search/src extra))) name)))))
|
||||
|
||||
(define
|
||||
search-join
|
||||
(fn
|
||||
(sep xs)
|
||||
(cond
|
||||
((empty? xs) "")
|
||||
((empty? (rest xs)) (first xs))
|
||||
(:else (str (first xs) sep (search-join sep (rest xs)))))))
|
||||
|
||||
;; Batch many haskell expressions into ONE program evaluation (amortizes the
|
||||
;; cost of parsing/binding search/src — important under heavy CPU load).
|
||||
;; `setup` is extra top-level Haskell; `exprs` is a list of expression strings
|
||||
;; whose results form a single haskell list. Returns the SX list of results.
|
||||
(define
|
||||
search-batch
|
||||
(fn
|
||||
(setup exprs)
|
||||
(search-eval
|
||||
(str setup "\nresult = [" (search-join ", " exprs) "]\n")
|
||||
"result")))
|
||||
123
lib/search/tests/boolean.sx
Normal file
123
lib/search/tests/boolean.sx
Normal file
@@ -0,0 +1,123 @@
|
||||
;; Phase 2 — query AST + boolean/phrase evaluation (hand-built Query values).
|
||||
;; Corpus:
|
||||
;; doc 1 "the quick brown dog" -> the quick brown dog
|
||||
;; doc 2 "a quick brown fox" -> a quick brown fox
|
||||
;; doc 3 "the dog barks loudly" -> the dog barks loudly
|
||||
;; All queries run in ONE program evaluation (search-batch) to stay fast.
|
||||
|
||||
(define
|
||||
search-corpus
|
||||
"idx = indexDoc 3 \"the dog barks loudly\" (indexDoc 2 \"a quick brown fox\" (indexDoc 1 \"the quick brown dog\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
bool-cases
|
||||
(list
|
||||
(list
|
||||
"term in two docs"
|
||||
"evalQuery idx (Term \"quick\")"
|
||||
(list 1 2))
|
||||
(list
|
||||
"term in two docs (the)"
|
||||
"evalQuery idx (Term \"the\")"
|
||||
(list 1 3))
|
||||
(list "term in one doc" "evalQuery idx (Term \"fox\")" (list 2))
|
||||
(list "term absent" "evalQuery idx (Term \"zzz\")" (list))
|
||||
(list
|
||||
"term case-sensitive at AST level"
|
||||
"evalQuery idx (Term \"QUICK\")"
|
||||
(list))
|
||||
(list "term on empty index" "evalQuery emptyIndex (Term \"cat\")" (list))
|
||||
(list
|
||||
"and both terms"
|
||||
"evalQuery idx (And (Term \"quick\") (Term \"brown\"))"
|
||||
(list 1 2))
|
||||
(list
|
||||
"and overlap subset"
|
||||
"evalQuery idx (And (Term \"the\") (Term \"dog\"))"
|
||||
(list 1 3))
|
||||
(list
|
||||
"and disjoint is empty"
|
||||
"evalQuery idx (And (Term \"the\") (Term \"fox\"))"
|
||||
(list))
|
||||
(list
|
||||
"and right-nested"
|
||||
"evalQuery idx (And (Term \"the\") (And (Term \"dog\") (Term \"barks\")))"
|
||||
(list 3))
|
||||
(list
|
||||
"or two singletons"
|
||||
"evalQuery idx (Or (Term \"fox\") (Term \"barks\"))"
|
||||
(list 2 3))
|
||||
(list
|
||||
"or all docs"
|
||||
"evalQuery idx (Or (Term \"quick\") (Term \"the\"))"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"or with absent term"
|
||||
"evalQuery idx (Or (Term \"fox\") (Term \"zzz\"))"
|
||||
(list 2))
|
||||
(list "not term" "evalQuery idx (Not (Term \"the\"))" (list 2))
|
||||
(list "not term 2" "evalQuery idx (Not (Term \"quick\"))" (list 3))
|
||||
(list
|
||||
"and with not"
|
||||
"evalQuery idx (And (Term \"quick\") (Not (Term \"the\")))"
|
||||
(list 2))
|
||||
(list
|
||||
"double negation"
|
||||
"evalQuery idx (Not (Not (Term \"fox\")))"
|
||||
(list 2))
|
||||
(list
|
||||
"or of and with term"
|
||||
"evalQuery idx (Or (And (Term \"the\") (Term \"dog\")) (Term \"fox\"))"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"phrase adjacent both docs"
|
||||
"evalQuery idx (Phrase [\"quick\", \"brown\"])"
|
||||
(list 1 2))
|
||||
(list
|
||||
"phrase adjacent one doc"
|
||||
"evalQuery idx (Phrase [\"brown\", \"dog\"])"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase the quick"
|
||||
"evalQuery idx (Phrase [\"the\", \"quick\"])"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase dog barks"
|
||||
"evalQuery idx (Phrase [\"dog\", \"barks\"])"
|
||||
(list 3))
|
||||
(list
|
||||
"phrase non-adjacent empty"
|
||||
"evalQuery idx (Phrase [\"quick\", \"dog\"])"
|
||||
(list))
|
||||
(list
|
||||
"phrase order matters"
|
||||
"evalQuery idx (Phrase [\"brown\", \"quick\"])"
|
||||
(list))
|
||||
(list
|
||||
"phrase single term"
|
||||
"evalQuery idx (Phrase [\"dog\"])"
|
||||
(list 1 3))
|
||||
(list
|
||||
"phrase three terms"
|
||||
"evalQuery idx (Phrase [\"the\", \"dog\", \"barks\"])"
|
||||
(list 3))
|
||||
(list
|
||||
"and of phrase and term"
|
||||
"evalQuery idx (And (Phrase [\"quick\", \"brown\"]) (Term \"dog\"))"
|
||||
(list 1))
|
||||
(list
|
||||
"not of phrase"
|
||||
"evalQuery idx (Not (Phrase [\"quick\", \"brown\"]))"
|
||||
(list 3))))
|
||||
|
||||
(define
|
||||
bool-results
|
||||
(search-batch search-corpus (map (fn (c) (nth c 1)) bool-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth bool-results i) (nth c 2)))
|
||||
bool-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
88
lib/search/tests/index.sx
Normal file
88
lib/search/tests/index.sx
Normal file
@@ -0,0 +1,88 @@
|
||||
;; Phase 1 — tokenize + inverted index.
|
||||
;; All cases run in ONE program evaluation (search-batch) to stay fast under load.
|
||||
;; Scalar results (docFreq) are wrapped as singleton lists so the batch is a list
|
||||
;; of lists.
|
||||
|
||||
(define
|
||||
index-cases
|
||||
(list
|
||||
(list
|
||||
"tokens basic lowercases"
|
||||
"tokens \"The Cat sat\""
|
||||
(list "the" "cat" "sat"))
|
||||
(list
|
||||
"tokens strips punctuation"
|
||||
"tokens \"Hello, World!\""
|
||||
(list "hello" "world"))
|
||||
(list "tokens collapses whitespace" "tokens \" a b \"" (list "a" "b"))
|
||||
(list "tokens empty is empty" "tokens \"\"" (list))
|
||||
(list "tokens keeps digits" "tokens \"abc123 x9\"" (list "abc123" "x9"))
|
||||
(list
|
||||
"positioned attaches ordinals"
|
||||
"positioned \"a b a\""
|
||||
(list
|
||||
(list "a" 0)
|
||||
(list "b" 1)
|
||||
(list "a" 2)))
|
||||
(list
|
||||
"index + lookup single doc"
|
||||
"lookupTerm \"cat\" (indexDoc 1 \"the cat sat\" emptyIndex)"
|
||||
(list (list 1 (list 1))))
|
||||
(list
|
||||
"lookup missing term is empty"
|
||||
"lookupTerm \"dog\" (indexDoc 1 \"the cat sat\" emptyIndex)"
|
||||
(list))
|
||||
(list
|
||||
"lookup records all positions"
|
||||
"lookupTerm \"the\" (indexDoc 1 \"the cat the dog the\" emptyIndex)"
|
||||
(list (list 1 (list 0 2 4))))
|
||||
(list
|
||||
"multi-doc posting list sorted by docid"
|
||||
"lookupTerm \"x\" (indexDoc 1 \"x y\" (indexDoc 2 \"x z\" emptyIndex))"
|
||||
(list
|
||||
(list 1 (list 0))
|
||||
(list 2 (list 0))))
|
||||
(list
|
||||
"index/query case symmetry"
|
||||
"lookupTerm \"cat\" (indexDoc 1 \"CAT Cat cat\" emptyIndex)"
|
||||
(list (list 1 (list 0 1 2))))
|
||||
(list
|
||||
"re-index replaces a doc"
|
||||
"lookupTerm \"a\" (indexDoc 1 \"a a a\" (indexDoc 1 \"a\" emptyIndex))"
|
||||
(list (list 1 (list 0 1 2))))
|
||||
(list
|
||||
"delete removes a doc"
|
||||
"lookupTerm \"cat\" (deleteDoc 1 (indexDoc 1 \"the cat\" emptyIndex))"
|
||||
(list))
|
||||
(list
|
||||
"delete leaves other docs"
|
||||
"lookupTerm \"cat\" (deleteDoc 2 (indexDoc 2 \"big cat\" (indexDoc 1 \"the cat\" emptyIndex)))"
|
||||
(list (list 1 (list 1))))
|
||||
(list
|
||||
"docFreq counts docs"
|
||||
"[docFreq \"cat\" (indexDoc 2 \"a cat\" (indexDoc 1 \"the cat\" emptyIndex))]"
|
||||
(list 2))
|
||||
(list
|
||||
"docFreq zero for missing"
|
||||
"[docFreq \"zzz\" (indexDoc 1 \"a b\" emptyIndex)]"
|
||||
(list 0))
|
||||
(list
|
||||
"allTerms sorted and unique"
|
||||
"allTerms (indexDoc 1 \"banana apple cherry apple\" emptyIndex)"
|
||||
(list "apple" "banana" "cherry"))
|
||||
(list
|
||||
"allTerms merged across docs"
|
||||
"allTerms (indexDoc 2 \"d a\" (indexDoc 1 \"c b\" emptyIndex))"
|
||||
(list "a" "b" "c" "d"))))
|
||||
|
||||
(define
|
||||
index-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) index-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth index-results i) (nth c 2)))
|
||||
index-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
102
lib/search/tests/integration.sx
Normal file
102
lib/search/tests/integration.sx
Normal file
@@ -0,0 +1,102 @@
|
||||
;; Phase 4 — federation (merge per-peer indices) + ACL post-filter.
|
||||
;; Peers (global id = peer*1000 + local):
|
||||
;; peer 1: 1 "alpha beta" 2 "alpha gamma" -> 1001 1002
|
||||
;; peer 2: 1 "alpha delta" 2 "beta gamma" -> 2001 2002
|
||||
;; ACL predicates are injected (viewer baked in by the caller), applied post-rank.
|
||||
|
||||
(define
|
||||
fed-setup
|
||||
"p1 = indexDoc 2 \"alpha gamma\" (indexDoc 1 \"alpha beta\" emptyIndex)\np2 = indexDoc 2 \"beta gamma\" (indexDoc 1 \"alpha delta\" emptyIndex)\nfed = fedIndex [(1, p1), (2, p2)]\npermitP1 g = g < 2000\npermitNone g = False\npermitList g = elem g [1002, 2001]\n")
|
||||
|
||||
(define
|
||||
fed-cases
|
||||
(list
|
||||
(list
|
||||
"fed merges all docs"
|
||||
"sort (allDocs fed)"
|
||||
(list 1001 1002 2001 2002))
|
||||
(list
|
||||
"fed docFreq across peers"
|
||||
"[docFreq \"alpha\" fed]"
|
||||
(list 3))
|
||||
(list "fed docFreq beta" "[docFreq \"beta\" fed]" (list 2))
|
||||
(list "fed numDocs" "[numDocs fed]" (list 4))
|
||||
(list
|
||||
"fed term lookup spans peers"
|
||||
"map fst (lookupTerm \"gamma\" fed)"
|
||||
(list 1002 2002))
|
||||
(list
|
||||
"fed preserves positions"
|
||||
"lookupTerm \"beta\" fed"
|
||||
(list
|
||||
(list 1001 (list 1))
|
||||
(list 2002 (list 0))))
|
||||
(list
|
||||
"fed rank alpha tie by gid"
|
||||
"rankTfIdf [\"alpha\"] fed"
|
||||
(list 1001 1002 2001))
|
||||
(list
|
||||
"fed rank beta"
|
||||
"rankTfIdf [\"beta\"] fed"
|
||||
(list 1001 2002))
|
||||
(list
|
||||
"fed boolean and"
|
||||
"searchQuery \"alpha AND beta\" fed"
|
||||
(list 1001))
|
||||
(list
|
||||
"fed boolean or"
|
||||
"searchQuery \"delta OR barks\" fed"
|
||||
(list 2001))
|
||||
(list
|
||||
"fed phrase within peer1"
|
||||
"searchQuery \"\\\"alpha beta\\\"\" fed"
|
||||
(list 1001))
|
||||
(list
|
||||
"fed phrase within peer2"
|
||||
"searchQuery \"\\\"beta gamma\\\"\" fed"
|
||||
(list 2002))
|
||||
(list
|
||||
"fed phrase peer2 alpha delta"
|
||||
"searchQuery \"\\\"alpha delta\\\"\" fed"
|
||||
(list 2001))
|
||||
(list "fed empty peer list" "allDocs (fedIndex [])" (list))
|
||||
(list
|
||||
"fed single relabelled peer"
|
||||
"rankTfIdf [\"alpha\"] (fedIndex [(5, p1)])"
|
||||
(list 5001 5002))
|
||||
(list
|
||||
"acl peer1 only"
|
||||
"aclFilter permitP1 (rankTfIdf [\"alpha\"] fed)"
|
||||
(list 1001 1002))
|
||||
(list
|
||||
"acl allowlist preserves rank order"
|
||||
"aclFilter permitList (rankTfIdf [\"alpha\"] fed)"
|
||||
(list 1002 2001))
|
||||
(list
|
||||
"acl topN after filter"
|
||||
"topNTfIdfAcl 1 permitP1 [\"alpha\"] fed"
|
||||
(list 1001))
|
||||
(list
|
||||
"acl denies all"
|
||||
"aclFilter permitNone (rankTfIdf [\"alpha\"] fed)"
|
||||
(list))
|
||||
(list
|
||||
"acl on bm25"
|
||||
"searchBm25Acl permitP1 1.5 0.75 [\"alpha\"] fed"
|
||||
(list 1001 1002))
|
||||
(list
|
||||
"acl end-to-end tfidf"
|
||||
"searchTfIdfAcl permitP1 [\"alpha\"] fed"
|
||||
(list 1001 1002))))
|
||||
|
||||
(define
|
||||
fed-results
|
||||
(search-batch fed-setup (map (fn (c) (nth c 1)) fed-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth fed-results i) (nth c 2)))
|
||||
fed-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
139
lib/search/tests/parse.sx
Normal file
139
lib/search/tests/parse.sx
Normal file
@@ -0,0 +1,139 @@
|
||||
;; Phase 2 — query parser (parseQuery / searchQuery).
|
||||
;; AST cases assert showQ (parseQuery s); search cases assert searchQuery s idx
|
||||
;; against the standard corpus. Each group runs in one batched program eval.
|
||||
;; doc 1 "the quick brown dog" doc 2 "a quick brown fox" doc 3 "the dog barks loudly"
|
||||
|
||||
(define
|
||||
parse-corpus
|
||||
"idx = indexDoc 3 \"the dog barks loudly\" (indexDoc 2 \"a quick brown fox\" (indexDoc 1 \"the quick brown dog\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
ast-cases
|
||||
(list
|
||||
(list "single term" "showQ (parseQuery \"cat\")" "T:cat")
|
||||
(list "term normalized" "showQ (parseQuery \"CAT\")" "T:cat")
|
||||
(list "explicit and" "showQ (parseQuery \"cat AND dog\")" "(T:cat & T:dog)")
|
||||
(list
|
||||
"lowercase and keyword"
|
||||
"showQ (parseQuery \"cat and dog\")"
|
||||
"(T:cat & T:dog)")
|
||||
(list "implicit and" "showQ (parseQuery \"cat dog\")" "(T:cat & T:dog)")
|
||||
(list "or" "showQ (parseQuery \"cat OR dog\")" "(T:cat | T:dog)")
|
||||
(list "not" "showQ (parseQuery \"NOT cat\")" "!T:cat")
|
||||
(list
|
||||
"and binds tighter than or"
|
||||
"showQ (parseQuery \"cat AND dog OR bird\")"
|
||||
"((T:cat & T:dog) | T:bird)")
|
||||
(list
|
||||
"or then and"
|
||||
"showQ (parseQuery \"cat OR dog AND bird\")"
|
||||
"(T:cat | (T:dog & T:bird))")
|
||||
(list
|
||||
"parens override precedence"
|
||||
"showQ (parseQuery \"(cat OR dog) AND bird\")"
|
||||
"((T:cat | T:dog) & T:bird)")
|
||||
(list
|
||||
"and with not"
|
||||
"showQ (parseQuery \"cat AND NOT dog\")"
|
||||
"(T:cat & !T:dog)")
|
||||
(list
|
||||
"two-word phrase"
|
||||
"showQ (parseQuery \"\\\"quick brown\\\"\")"
|
||||
"P:quick-brown")
|
||||
(list
|
||||
"three-word phrase"
|
||||
"showQ (parseQuery \"\\\"quick brown fox\\\"\")"
|
||||
"P:quick-brown-fox")
|
||||
(list
|
||||
"and left-assoc"
|
||||
"showQ (parseQuery \"a AND b AND c\")"
|
||||
"((T:a & T:b) & T:c)")
|
||||
(list
|
||||
"or left-assoc"
|
||||
"showQ (parseQuery \"a OR b OR c\")"
|
||||
"((T:a | T:b) | T:c)")
|
||||
(list
|
||||
"punctuation stripped"
|
||||
"showQ (parseQuery \"cat, dog!\")"
|
||||
"(T:cat & T:dog)")))
|
||||
|
||||
(define
|
||||
search-cases
|
||||
(list
|
||||
(list "term" "searchQuery \"quick\" idx" (list 1 2))
|
||||
(list
|
||||
"term normalized"
|
||||
"searchQuery \"QUICK\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"explicit and"
|
||||
"searchQuery \"quick AND brown\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"implicit and"
|
||||
"searchQuery \"quick brown\" idx"
|
||||
(list 1 2))
|
||||
(list "and disjoint" "searchQuery \"the AND fox\" idx" (list))
|
||||
(list "or" "searchQuery \"fox OR barks\" idx" (list 2 3))
|
||||
(list "not" "searchQuery \"NOT the\" idx" (list 2))
|
||||
(list "and not" "searchQuery \"quick AND NOT the\" idx" (list 2))
|
||||
(list
|
||||
"precedence and-or"
|
||||
"searchQuery \"the AND dog OR fox\" idx"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"precedence or-and"
|
||||
"searchQuery \"fox OR the AND dog\" idx"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"parens"
|
||||
"searchQuery \"the AND (dog OR fox)\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"phrase"
|
||||
"searchQuery \"\\\"quick brown\\\"\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"phrase one doc"
|
||||
"searchQuery \"\\\"brown dog\\\"\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase and term"
|
||||
"searchQuery \"\\\"quick brown\\\" AND dog\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"not phrase"
|
||||
"searchQuery \"NOT \\\"quick brown\\\"\" idx"
|
||||
(list 3))
|
||||
(list
|
||||
"implicit and terms"
|
||||
"searchQuery \"dog barks\" idx"
|
||||
(list 3))))
|
||||
|
||||
(define
|
||||
ast-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) ast-cases)))
|
||||
(define
|
||||
search-results
|
||||
(search-batch
|
||||
parse-corpus
|
||||
(map (fn (c) (nth c 1)) search-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test
|
||||
(str "ast: " (nth c 0))
|
||||
(nth ast-results i)
|
||||
(nth c 2)))
|
||||
ast-cases)
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test
|
||||
(str "search: " (nth c 0))
|
||||
(nth search-results i)
|
||||
(nth c 2)))
|
||||
search-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
63
lib/search/tests/prefix.sx
Normal file
63
lib/search/tests/prefix.sx
Normal file
@@ -0,0 +1,63 @@
|
||||
;; Extension — prefix / wildcard queries.
|
||||
;; Corpus: 1 "alpha alpine" 2 "beta apple" 3 "banana alpha"
|
||||
;; allTerms sorted: alpha alpine apple banana beta
|
||||
|
||||
(define
|
||||
prefix-setup
|
||||
"idx = indexDoc 3 \"banana alpha\" (indexDoc 2 \"beta apple\" (indexDoc 1 \"alpha alpine\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
prefix-cases
|
||||
(list
|
||||
(list
|
||||
"prefix terms two matches"
|
||||
"prefixTerms \"al\" idx"
|
||||
(list "alpha" "alpine"))
|
||||
(list
|
||||
"prefix terms narrower"
|
||||
"prefixTerms \"alp\" idx"
|
||||
(list "alpha" "alpine"))
|
||||
(list
|
||||
"prefix terms wide"
|
||||
"prefixTerms \"a\" idx"
|
||||
(list "alpha" "alpine" "apple"))
|
||||
(list "prefix terms single" "prefixTerms \"ban\" idx" (list "banana"))
|
||||
(list "prefix terms exact term" "prefixTerms \"beta\" idx" (list "beta"))
|
||||
(list "prefix terms none" "prefixTerms \"z\" idx" (list))
|
||||
(list
|
||||
"prefix docs union"
|
||||
"prefixDocs \"al\" idx"
|
||||
(list 1 3))
|
||||
(list "prefix docs single term" "prefixDocs \"ban\" idx" (list 3))
|
||||
(list
|
||||
"prefix docs wide"
|
||||
"prefixDocs \"a\" idx"
|
||||
(list 1 2 3))
|
||||
(list "prefix docs none" "prefixDocs \"z\" idx" (list))
|
||||
(list
|
||||
"prefix docs exact"
|
||||
"prefixDocs \"alpha\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"prefix rank ranks by matched terms"
|
||||
"prefixRankTfIdf \"al\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"prefix rank single doc"
|
||||
"prefixRankTfIdf \"ban\" idx"
|
||||
(list 3))
|
||||
(list "prefix rank empty" "prefixRankTfIdf \"z\" idx" (list))))
|
||||
|
||||
(define
|
||||
prefix-results
|
||||
(search-batch
|
||||
prefix-setup
|
||||
(map (fn (c) (nth c 1)) prefix-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth prefix-results i) (nth c 2)))
|
||||
prefix-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
90
lib/search/tests/rank.sx
Normal file
90
lib/search/tests/rank.sx
Normal file
@@ -0,0 +1,90 @@
|
||||
;; Phase 3 — ranking (TF-IDF, BM25, top-N). Deterministic: ties broken by DocId.
|
||||
;; Corpora:
|
||||
;; idx1: 1 "alpha alpha alpha gamma" 2 "alpha" 3 "beta"
|
||||
;; idx2: 1 "cat" 2 "cat cat dog elephant frog grape" 3 "zzz"
|
||||
;; idx3: 1 "kite" 2 "kite" (identical docs -> tiebreak)
|
||||
|
||||
(define
|
||||
rank-setup
|
||||
"idx1 = indexDoc 3 \"beta\" (indexDoc 2 \"alpha\" (indexDoc 1 \"alpha alpha alpha gamma\" emptyIndex))\nidx2 = indexDoc 3 \"zzz\" (indexDoc 2 \"cat cat dog elephant frog grape\" (indexDoc 1 \"cat\" emptyIndex))\nidx3 = indexDoc 2 \"kite\" (indexDoc 1 \"kite\" emptyIndex)\n")
|
||||
|
||||
(define
|
||||
rank-cases
|
||||
(list
|
||||
(list
|
||||
"tfidf tf ordering"
|
||||
"rankTfIdf [\"alpha\"] idx1"
|
||||
(list 1 2))
|
||||
(list
|
||||
"tfidf rare term boosts"
|
||||
"rankTfIdf [\"alpha\", \"beta\"] idx1"
|
||||
(list 1 3 2))
|
||||
(list
|
||||
"tfidf single-doc term"
|
||||
"rankTfIdf [\"gamma\"] idx1"
|
||||
(list 1))
|
||||
(list "tfidf absent term empty" "rankTfIdf [\"nope\"] idx1" (list))
|
||||
(list "tfidf empty query empty" "rankTfIdf [] idx1" (list))
|
||||
(list
|
||||
"tfidf candidate union tie by docid"
|
||||
"rankTfIdf [\"beta\", \"gamma\"] idx1"
|
||||
(list 1 3))
|
||||
(list
|
||||
"tfidf tf ordering idx2"
|
||||
"rankTfIdf [\"cat\"] idx2"
|
||||
(list 2 1))
|
||||
(list "topN tfidf 1" "topNTfIdf 1 [\"alpha\"] idx1" (list 1))
|
||||
(list
|
||||
"topN tfidf 2"
|
||||
"topNTfIdf 2 [\"alpha\", \"beta\"] idx1"
|
||||
(list 1 3))
|
||||
(list
|
||||
"topN exceeds results"
|
||||
"topNTfIdf 10 [\"gamma\"] idx1"
|
||||
(list 1))
|
||||
(list "topN zero" "topNTfIdf 0 [\"alpha\"] idx1" (list))
|
||||
(list
|
||||
"bm25 tf+length flips tfidf"
|
||||
"rankBm25 1.5 0.75 [\"cat\"] idx2"
|
||||
(list 1 2))
|
||||
(list
|
||||
"bm25 b=0 ignores length"
|
||||
"rankBm25 1.5 0.0 [\"cat\"] idx2"
|
||||
(list 2 1))
|
||||
(list
|
||||
"bm25 alpha idx1"
|
||||
"rankBm25 1.5 0.75 [\"alpha\"] idx1"
|
||||
(list 1 2))
|
||||
(list "bm25 absent empty" "rankBm25 1.5 0.75 [\"nope\"] idx1" (list))
|
||||
(list
|
||||
"bm25 single-doc term"
|
||||
"rankBm25 1.5 0.75 [\"gamma\"] idx1"
|
||||
(list 1))
|
||||
(list "bm25 topN 1" "topNBm25 1 1.5 0.75 [\"cat\"] idx2" (list 1))
|
||||
(list
|
||||
"bm25 same candidate set"
|
||||
"sort (rankBm25 1.5 0.75 [\"alpha\", \"beta\"] idx1)"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"tfidf stable tiebreak"
|
||||
"rankTfIdf [\"kite\"] idx3"
|
||||
(list 1 2))
|
||||
(list
|
||||
"bm25 stable tiebreak"
|
||||
"rankBm25 1.5 0.75 [\"kite\"] idx3"
|
||||
(list 1 2))
|
||||
(list "numDocs" "[numDocs idx1]" (list 3))
|
||||
(list "docLen counts tokens" "[docLen 1 idx1]" (list 4))
|
||||
(list "docFreq via index" "[docFreq \"alpha\" idx1]" (list 2))))
|
||||
|
||||
(define
|
||||
rank-results
|
||||
(search-batch rank-setup (map (fn (c) (nth c 1)) rank-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth rank-results i) (nth c 2)))
|
||||
rank-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
8
lib/search/tokenize.sx
Normal file
8
lib/search/tokenize.sx
Normal file
@@ -0,0 +1,8 @@
|
||||
;; search tokenizer — Haskell source fragment.
|
||||
;; normalize (lowercase + strip punctuation), split on whitespace, attach positions.
|
||||
;; tokens :: String -> [String]
|
||||
;; positioned :: String -> [(String, Int)] -- 0-based ordinal positions
|
||||
|
||||
(define
|
||||
search/tokenize-src
|
||||
"lowerChar c = chr (toLower (ord c))\nnormChar c = if isAlphaNum c then lowerChar c else ' '\nisBlankCh c = c == ' '\ndropBlanks [] = []\ndropBlanks (c:cs) = if isBlankCh c then dropBlanks cs else c:cs\ntakeWord [] = []\ntakeWord (c:cs) = if isBlankCh c then [] else c : takeWord cs\nafterWord [] = []\nafterWord (c:cs) = if isBlankCh c then c:cs else afterWord cs\nsplitWords s = let s2 = dropBlanks s in if null s2 then [] else takeWord s2 : splitWords (afterWord s2)\nappendStr a b = a ++ b\njoinChars cs = foldr appendStr \"\" cs\ntokens s = map joinChars (splitWords (map normChar s))\nposFrom i [] = []\nposFrom i (x:xs) = (x, i) : posFrom (i + 1) xs\npositioned s = posFrom 0 (tokens s)\n")
|
||||
110
plans/agent-briefings/search-loop.md
Normal file
110
plans/agent-briefings/search-loop.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# search-on-sx loop agent (single agent, queue-driven)
|
||||
|
||||
Role: iterates `plans/search-on-sx.md` forever. **Full-text + structured search on
|
||||
Haskell** — tokenize, inverted index, query AST, boolean + phrase + ranked
|
||||
queries (TF-IDF / BM25), ACL-aware post-filter, federated index merge. Typed ADTs
|
||||
make query parsing clean; lazy lists make posting-list iteration efficient. Sits on
|
||||
`lib/haskell/` (1514/1514 already green); adds a search-shaped vocabulary on top.
|
||||
|
||||
```
|
||||
description: search-on-sx queue loop
|
||||
subagent_type: general-purpose
|
||||
run_in_background: true
|
||||
isolation: worktree
|
||||
```
|
||||
|
||||
## Prompt
|
||||
|
||||
You are the sole background agent working `plans/search-on-sx.md`. Isolated
|
||||
worktree `/root/rose-ash-loops/search` on branch `loops/search`, forever, one
|
||||
commit per feature. Push to `origin/loops/search` after every commit. Never touch
|
||||
`main` or `architecture`.
|
||||
|
||||
## Restart baseline — check before iterating
|
||||
|
||||
1. Read `plans/search-on-sx.md` — roadmap + Progress log.
|
||||
2. `ls lib/search/` — pick up from the most advanced file.
|
||||
3. If `lib/search/tests/*.sx` exist, run them via `bash lib/search/conformance.sh`.
|
||||
Green before new work.
|
||||
4. If `lib/search/scoreboard.md` exists, that's your baseline.
|
||||
5. Read the `lib/haskell/` public API once — that's your substrate. `lib/haskell/
|
||||
haskell.sx` exists; also study `runtime.sx`, `eval.sx`, `parser.sx`, `infer.sx`,
|
||||
`match.sx`, `map.sx`, `set.sx`, `testlib.sx`. Learn how to declare ADTs, pattern
|
||||
match, and use the `Map`/`Set` helpers before writing index code. Verify the real
|
||||
exported names with sx_find_all / grep — don't assume from the plan's sketch.
|
||||
|
||||
## The queue
|
||||
|
||||
Phase order per `plans/search-on-sx.md`:
|
||||
|
||||
- **Phase 1** — tokenize + inverted index + simple term lookup
|
||||
(`Map Term [(DocId,[Pos])]`, insert/lookup, `(search/index doc)`,
|
||||
`(search/query term)`).
|
||||
- **Phase 2** — query AST + boolean/phrase eval (Term | And | Or | Not | Phrase;
|
||||
posting-list set ops; positional phrase match).
|
||||
- **Phase 3** — ranking (TF-IDF, BM25), top-N.
|
||||
- **Phase 4** — ACL-aware post-filter + federation (merge per-peer indices).
|
||||
|
||||
Within a phase, pick the checkbox that unlocks the most tests per effort.
|
||||
|
||||
Every iteration: implement → test → commit → tick `[ ]` → Progress log → next.
|
||||
|
||||
## Ground rules (hard)
|
||||
|
||||
- **Scope:** only `lib/search/**` and `plans/search-on-sx.md`. Do **not** edit
|
||||
`spec/`, `hosts/`, `shared/`, other `lib/<lang>/` dirs, `lib/stdlib.sx`, or
|
||||
`lib/` root. May **import** from `lib/haskell/` only (its public API). Do **not**
|
||||
modify Haskell.
|
||||
- **NEVER call `sx_build`.** 600s watchdog. If the sx_server binary is broken →
|
||||
Blockers entry, stop. Run tests by invoking the sx_server binary directly from a
|
||||
conformance.sh (model it on `lib/haskell/conformance.sh`), pointing `SX_SERVER`
|
||||
at `/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe` — fresh
|
||||
worktrees have no `_build/`, so the relative path won't resolve.
|
||||
- **Shared-file issues** → plan's Blockers with minimal repro; don't fix here.
|
||||
- **SX files:** `sx-tree` MCP tools ONLY. **They take `file:` not `path:`** — a
|
||||
wrong key yields `Yojson Type_error("Expected string, got null")`, which looks
|
||||
like a broken binary but is just a param mismatch. `sx_validate` after edits.
|
||||
Path-based edits (`sx_replace_node`) count comment headers in their indices and
|
||||
can clobber the wrong node — re-read after, or prefer `sx_write_file` for small
|
||||
files.
|
||||
- **Unicode in `.sx`:** raw UTF-8 only, never `\uXXXX` escapes.
|
||||
- **Commit granularity:** one feature per commit. Short factual messages
|
||||
(`search: phrase query positional match + 7 tests`). Push to `origin/loops/search`.
|
||||
- **Plan file:** update Progress log (newest first) + tick boxes every commit.
|
||||
|
||||
## search-specific gotchas
|
||||
|
||||
- **Posting lists are the hot path.** Keep them sorted by DocId so boolean AND/OR
|
||||
are linear merges, not nested scans. Phrase match needs positions, so store
|
||||
`(DocId, [Pos])` — don't drop positions early to save space; you can't recover them.
|
||||
- **Tokenization decides recall.** Normalize consistently (lowercase, strip
|
||||
punctuation) on BOTH index and query side, or queries silently miss. Test the
|
||||
index/query symmetry explicitly.
|
||||
- **Ranking must be deterministic on ties.** TF-IDF/BM25 scores collide; always
|
||||
add a stable tiebreak (DocId ascending) or tests flake.
|
||||
- **ACL filter is per-viewer and post-ranking.** Filter the result list against the
|
||||
viewer, after scoring — never bake visibility into the index (the same index
|
||||
serves all viewers). Inject the permit predicate; don't hardwire an ACL module
|
||||
that doesn't exist yet.
|
||||
- **Federation merges indices, not results.** Merging per-peer inverted indices
|
||||
(union posting lists per term) is cleaner and rank-correct vs merging ranked
|
||||
result lists. Mock peer indices in tests.
|
||||
|
||||
## General gotchas (all loops)
|
||||
|
||||
- SX `do` = R7RS iteration. Use `begin` for multi-expr sequences.
|
||||
- `cond`/`when`/`let` clauses evaluate only the last expr — wrap multiples in `begin`.
|
||||
- `let` is parallel, not sequential — nest `let`s when a binding references an earlier one.
|
||||
- `env-bind!` creates a binding; `env-set!` mutates an existing one (walks scope chain).
|
||||
- `sx_validate` after every structural edit.
|
||||
- Namespace-prefix all guest helpers (`search/...`) — short/host-colliding names
|
||||
get silently shadowed or hang the runtime.
|
||||
|
||||
## Style
|
||||
|
||||
- No comments in `.sx` unless non-obvious.
|
||||
- No new planning docs — update `plans/search-on-sx.md` inline.
|
||||
- Short, factual commit messages.
|
||||
- One feature per iteration. Commit. Log. Push. Next.
|
||||
|
||||
Go. Start by reading the plan; find the first unchecked `[ ]`; implement it.
|
||||
@@ -14,7 +14,7 @@ APL, ACL visibility filtering via `lib/acl/`, federation via fed-sx.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/feed/conformance.sh` → **0/0** (not yet started)
|
||||
`bash lib/feed/conformance.sh` → **189/189** (Phases 1–4 + TF-IDF, notifications, home, smart-dedupe, trending, mute, pagination, threading)
|
||||
|
||||
## Ground rules
|
||||
|
||||
@@ -59,47 +59,118 @@ lib/feed/api.sx lib/feed/fed.sx
|
||||
|
||||
## Phase 1 — Stream model + basic ops
|
||||
|
||||
- [ ] `lib/feed/normalize.sx` — activity record schema; coerce arbitrary inputs
|
||||
- [ ] `lib/feed/stream.sx` — APL vector representation; filter by predicate; sort by
|
||||
- [x] `lib/feed/normalize.sx` — activity record schema; coerce arbitrary inputs
|
||||
- [x] `lib/feed/stream.sx` — APL vector representation; filter by predicate; sort by
|
||||
`:at`; take N (`↑`); reverse (`⌽`)
|
||||
- [ ] `lib/feed/api.sx` — `(feed/post activity)`, `(feed/all)`
|
||||
- [ ] `lib/feed/tests/basic.sx` — 15+ cases: post, query, filter, sort
|
||||
- [ ] `lib/feed/scoreboard.{json,md}`
|
||||
- [ ] `lib/feed/conformance.sh`
|
||||
- [x] `lib/feed/api.sx` — `(feed/post activity)`, `(feed/all)`
|
||||
- [x] `lib/feed/tests/basic.sx` — 30 cases: normalize defaults, filter, sort, take, api
|
||||
- [x] `lib/feed/scoreboard.{json,md}`
|
||||
- [x] `lib/feed/conformance.sh`
|
||||
|
||||
## Phase 2 — Fanout via outer product
|
||||
|
||||
- [ ] follower graph: `followers user → vector of user ids`
|
||||
- [ ] fanout: activities `∘.×` followers → matrix `(activity, follower)` pairs
|
||||
- [ ] flatten to inbox events vector
|
||||
- [ ] dedupe — group by `(actor, verb, object)` collapse to one inbox event per
|
||||
receiver
|
||||
- [ ] `lib/feed/tests/fanout.sx` — 20+ cases: small graph, mutual follow, popular
|
||||
actor (high-fanout), cross-post dedupe
|
||||
- [x] follower graph: `followers user → vector of user ids` (`feed/follow-graph`,
|
||||
`feed/followers`; graph = `{followee -> (followers)}` dict)
|
||||
- [x] fanout: activities `∘.×` audience → matrix via `apl-outer feed/-mk-event`
|
||||
- [x] flatten to inbox events vector (`feed/-flatten` rank-2 → rank-1)
|
||||
- [x] dedupe — `feed/dedupe-inbox` by `(to, actor, verb, object)`; also
|
||||
`feed/dedupe-activities` `(actor verb object)` and `feed/dedupe-collapse`
|
||||
`(verb object)` for cross-actor likes
|
||||
- [x] `lib/feed/tests/fanout.sx` — 29 cases: small graph, mutual follow, star
|
||||
(high-fanout), empty graph, unfollowed actor, cross-post dedupe
|
||||
|
||||
## Phase 3 — Aggregation + ranking
|
||||
|
||||
- [ ] group-by — `(actor, day) → count` via key-reduce
|
||||
- [ ] velocity score — recent activity count over window
|
||||
- [ ] recency score — decay by age
|
||||
- [ ] composite rank — weighted sum of components
|
||||
- [ ] top-N per timeline
|
||||
- [ ] `lib/feed/tests/rank.sx` — 20+ cases: ranking stable on tie, decay shape,
|
||||
per-user weighting
|
||||
- [x] group-by — `feed/group-by`/`feed/group-count` key-reduce; `feed/by-actor-day`
|
||||
buckets `(actor, day)` via `feed/day` (string-joined keys)
|
||||
- [x] velocity score — `feed/velocity` counts actor's activities in `(at-window, at]`
|
||||
- [x] recency score — `feed/recency` half-life decay `0.5^(age/hl)`
|
||||
- [x] composite rank — `feed/composite` weighted sum of `(weight scorer)` parts
|
||||
- [x] top-N per timeline — `feed/top` = rank then take
|
||||
- [x] `lib/feed/tests/rank.sx` — 24 cases: decay shape, velocity burst, stable
|
||||
tie-break, top-N, composite
|
||||
|
||||
## Phase 4 — Visibility filter + federation
|
||||
|
||||
- [ ] ACL filter — each candidate activity passed through `(acl/permit? viewer :read
|
||||
activity)`
|
||||
- [ ] fed-sx outbound — local `feed/post` fans out to remote followers' inboxes
|
||||
- [ ] fed-sx inbound — peer activities arrive at local inbox
|
||||
- [ ] backfill on subscribe — request peer history, merge into local stream
|
||||
- [ ] `lib/feed/tests/integration.sx` — federated timeline with ACL applied
|
||||
`lib/acl/` and fed-sx don't exist yet and are out of scope (import `lib/apl/`
|
||||
only), so ACL/transport are injected: `permit?`, `remote?`, `send-fn`, `fetch-fn`
|
||||
are function parameters. Real acl-sx / fed-sx wire in at the call site unchanged.
|
||||
|
||||
- [x] ACL filter — `feed/visible stream viewer permit?`; default `feed/permit-acl?`
|
||||
reads `:visible-to` allowlist (+ author-sees-own); per-viewer, never cached
|
||||
- [x] fed-sx outbound — `feed/federate`/`feed/deliver` fan out then partition
|
||||
local vs remote inboxes; remote events handed to injected `send-fn`
|
||||
- [x] fed-sx inbound — `feed/inbound` normalizes + `feed/ingest` dedupes peer
|
||||
activities into the local stream
|
||||
- [x] backfill on subscribe — `feed/backfill local fetch-fn peer-id`
|
||||
- [x] `lib/feed/tests/integration.sx` — 22 cases incl. end-to-end
|
||||
`feed/timeline` (federated → ACL for viewer → recency rank → top-N)
|
||||
|
||||
## Progress log
|
||||
|
||||
(loop fills this in)
|
||||
- **Phase 1 done (30/30).** Stream = APL rank-1 array whose ravel holds activity
|
||||
dicts. `normalize.sx` (record schema + accessors), `stream.sx` (filter via `/`
|
||||
compress, sort via `⍋` grade-up [stable], take via `↑`, reverse via `⌽`,
|
||||
by-actor/verb/object/since predicates), `api.sx` (mutable log: post/all/reset!/size).
|
||||
Substrate: `apl-compress`, `apl-grade-up`, `apl-take`, `apl-reverse`, `make-array`.
|
||||
Grade-up returns 1-based indices (⎕IO=1), is stable on ties → deterministic sort.
|
||||
- **Phase 2 done (59/59 total).** `fanout.sx` (graph + `apl-outer` showcase),
|
||||
`dedupe.sx` (per-key dedupe, first-wins stable). Key APL gotcha: `scalar?` is
|
||||
true for ANY dict and `disclose` nils a non-array dict, so an apl-outer combiner
|
||||
MUST `enclose` its event dict — apl-outer discloses it back intact. `apl-unique`
|
||||
preserves first-occurrence order; dict `keys` order is NOT stable, so
|
||||
`feed/audience` sorts (else recipient ordering flakes). `apl-compress` needs a
|
||||
rank-1 array, so the (activity×follower) matrix is flattened to its ravel before
|
||||
the edge-guard filter.
|
||||
- **Phase 3 done (83/83 total).** `aggregate.sx` (group-by/count, day buckets) +
|
||||
`rank.sx` (recency/velocity/engagement scorers, composite, top-N). `sort` is
|
||||
single-arg ascending only — no comparator — so ranking uses a stable two-pass
|
||||
`apl-grade-down` (by :at desc, then by score desc) for deterministic tie-breaks.
|
||||
Dict keys must be strings, so composite group keys are string-joined ("actor#day").
|
||||
- **Phase 4 done (105/105 total).** `acl.sx` (per-viewer `feed/visible`,
|
||||
`feed/timeline` capstone) + `fed.sx` (merge/ingest/inbound/backfill/federate/
|
||||
deliver). ACL/transport are dependency-injected (permit?/remote?/send-fn/fetch-fn)
|
||||
since lib/acl + fed-sx don't exist. `feed/normalize` now MERGEs defaults over the
|
||||
raw dict (was projecting to 5 keys) so extra metadata (:visible-to, peer fields)
|
||||
survives — matches the "flexible bag" principle.
|
||||
|
||||
## Blockers
|
||||
## Roadmap is complete (all 4 phases). Possible follow-ups:
|
||||
|
||||
(loop fills this in)
|
||||
- Wire real acl-sx once `lib/acl/` exists (swap injected `permit?`).
|
||||
- Wire real fed-sx transport (swap `send-fn`/`fetch-fn`).
|
||||
- [x] TF-IDF over `:tags` for content ranking — `content.sx`: `feed/tag-df`,
|
||||
`feed/tag-idf` (log N/df), `feed/tfidf-score`, `feed/by-relevance`; 15 tests.
|
||||
Composes as a scorer with rank.sx. (120/120 total.)
|
||||
- [x] Notification feed (verb-filtered, per-recipient) — `notify.sx`:
|
||||
`feed/notifications`, `feed/notify-verbs`, `feed/notify-digest` (collapses
|
||||
"X, Y liked Z" by (verb,object), sorted-deterministic); 8 tests. (128/128 total.)
|
||||
- [x] **Capstone** `feed/home` — the whole pipeline as one line: fanout ∘ inbox ∘
|
||||
dedupe ∘ ACL ∘ rank ∘ take (`home.sx`); 6 tests incl. per-viewer ACL + cross-post
|
||||
dedupe. (134/134 total.)
|
||||
- [x] Per-verb dedupe rules (briefing gotcha #3) — `feed/dedupe-smart` /
|
||||
`feed/smart-key`: reactions (like/follow/boost/...) collapse cross-actor on
|
||||
(verb,object); posts stay distinct per actor. `feed/collapse-verbs` is
|
||||
rebindable policy; 9 tests. (143/143 total.)
|
||||
- [x] Trending — `feed/trending` / `feed/trending-actors`: objects/actors ranked
|
||||
by activity count in a recency window, count-desc with key-asc tiebreak
|
||||
(`trending.sx`); 11 tests. (154/154 total.)
|
||||
- [x] Mute/block — `feed/mute-actors` / `feed/mute-tags` / `feed/mute-objects` /
|
||||
`feed/apply-prefs`: viewer-controlled per-request filtering (complements ACL's
|
||||
author-controlled visibility) (`mute.sx`); 9 tests. (163/163 total.)
|
||||
- [x] Pagination — `feed/page`/`feed/page-count` (offset) + `feed/before`/
|
||||
`feed/after`/`feed/page-before`/`feed/next-cursor` (cursor by :at, stable under
|
||||
inserts) (`page.sx`); 14 tests. (177/177 total.)
|
||||
- [x] Threading — `feed/replies`/`feed/reply-count`/`feed/thread`/
|
||||
`feed/thread-objects`/`feed/thread-size`: conversation closure over `:reply-to`
|
||||
(transitive fixpoint), chronological (`thread.sx`); 12 tests. (189/189 total.)
|
||||
|
||||
(none)
|
||||
|
||||
## Notes for next iteration
|
||||
|
||||
- sx-tree MCP tools take `file:` NOT `path:` (CLAUDE.md is stale). Wrong key →
|
||||
`Yojson Type_error("Expected string, got null")`. Looks like a broken binary, isn't.
|
||||
- sx_server binary lives in main repo: `/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe`
|
||||
(worktree has no `_build`). conformance.sh already points there with relative fallback.
|
||||
- Phase 2 substrate verified available: `apl-outer` (∘.×), `apl-member` (∊),
|
||||
`apl-unique`, `apl-iota` (1-based).
|
||||
|
||||
@@ -10,7 +10,7 @@ extension that merges per-peer indices.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/search/conformance.sh` → **0/0** (not yet started)
|
||||
`bash lib/search/conformance.sh` → **122/122** (Phases 1–4 complete)
|
||||
|
||||
## Ground rules
|
||||
|
||||
@@ -61,46 +61,110 @@ lib/search/index.sx lib/search/eval.sx
|
||||
|
||||
## Phase 1 — Tokenize + index
|
||||
|
||||
- [ ] `lib/search/tokenize.sx` — normalize (lowercase, strip punctuation), split on
|
||||
- [x] `lib/search/tokenize.sx` — normalize (lowercase, strip punctuation), split on
|
||||
whitespace, return positions
|
||||
- [ ] `lib/search/index.sx` — inverted index data structure (typed `Map` from
|
||||
haskell lib); `insert`, `delete`, `lookup`
|
||||
- [ ] `lib/search/api.sx` — `(search/index doc)`, `(search/lookup term)`
|
||||
- [ ] `lib/search/tests/index.sx` — 15+ cases: tokenize, insert + lookup, update,
|
||||
delete, multi-doc
|
||||
- [ ] `lib/search/scoreboard.{json,md}`
|
||||
- [ ] `lib/search/conformance.sh`
|
||||
- [x] `lib/search/index.sx` — inverted index data structure; `indexDoc`, `deleteDoc`,
|
||||
`lookupTerm`, `docFreq`, `allTerms`. (Data.Map's public API lacks
|
||||
toList/keys/map/filter, so a sorted assoc-list `[(Term,[(DocId,[Pos])])]` is used —
|
||||
the conceptual `Map Term [(DocId,[Pos])]` with free term iteration.)
|
||||
- [x] `lib/search/api.sx` — assembles `search/src` (tokenize + index); Haskell entry
|
||||
points `indexDoc` / `lookupTerm`
|
||||
- [x] `lib/search/tests/index.sx` — 18 cases: tokenize, insert + lookup, update,
|
||||
delete, multi-doc, positions, docFreq, allTerms
|
||||
- [x] `lib/search/scoreboard.{json,md}`
|
||||
- [x] `lib/search/conformance.sh`
|
||||
|
||||
## Phase 2 — Query AST + boolean evaluation
|
||||
|
||||
- [ ] Query ADT: `Term Text | And Query Query | Or Query Query | Not Query |
|
||||
Phrase [Text]`
|
||||
- [ ] `lib/search/parse.sx` — query syntax parser (boolean operators, quoted phrases)
|
||||
- [ ] `lib/search/eval.sx` — boolean eval via set ops on posting lists
|
||||
- [ ] phrase eval — adjacency check using positions
|
||||
- [ ] `lib/search/tests/boolean.sx` — 25+ cases: term, and, or, not, phrase,
|
||||
composition, parser edge cases
|
||||
- [x] Query ADT: `Term String | And Query Query | Or Query Query | Not Query |
|
||||
Phrase [String]` (in `lib/search/query.sx`)
|
||||
- [x] `lib/search/parse.sx` — query syntax parser: tokenizer + recursive-descent
|
||||
(OR < AND < NOT precedence, implicit AND on adjacency, quoted phrases, parens,
|
||||
case-insensitive keywords); `parseQuery`, `searchQuery`, `showQ`
|
||||
- [x] `lib/search/query.sx` — boolean eval via set ops on docid-sorted posting lists
|
||||
(sortedUnion/Inter/Diff, Not over allDocs universe)
|
||||
- [x] phrase eval — positional adjacency check (phraseInDoc / phraseStartsAt)
|
||||
- [x] `lib/search/tests/boolean.sx` — 28 cases: term, and, or, not, phrase,
|
||||
composition (parser edge cases move to the parse.sx suite)
|
||||
|
||||
## Phase 3 — Ranking
|
||||
|
||||
- [ ] document frequency tracking — extend index with `df` per term
|
||||
- [ ] TF-IDF scoring
|
||||
- [ ] BM25 scoring (configurable k1, b)
|
||||
- [ ] top-N retrieval (heap-based)
|
||||
- [ ] `lib/search/tests/rank.sx` — 20+ cases: TF-IDF behavior, BM25 vs TF-IDF,
|
||||
ranking stability, top-N correctness
|
||||
- [x] document frequency — `docFreq`/`idf`/`bm25idf` derived from the index
|
||||
(posting-list length); no separate df store needed
|
||||
- [x] TF-IDF scoring (`rankTfIdf`)
|
||||
- [x] BM25 scoring, configurable k1/b (`rankBm25 k1 b`)
|
||||
- [x] top-N retrieval (`topNTfIdf`/`topNBm25` — sortBy + take; stable DocId tiebreak)
|
||||
- [x] `lib/search/tests/rank.sx` — 23 cases: TF-IDF tf/idf behavior, BM25 length-norm
|
||||
+ tf-saturation flips vs TF-IDF, b-parameter effect, tiebreak stability, top-N
|
||||
|
||||
## Phase 4 — ACL filter + federation
|
||||
|
||||
- [ ] post-filter — each candidate result tested via `(acl/permit? viewer :read doc)`
|
||||
- [ ] federated query — fan out to peer instances via fed-sx, merge results
|
||||
- [ ] merge policy — interleave by rank, dedupe by `(peer, doc-id)`
|
||||
- [ ] `lib/search/tests/integration.sx` — federated search with ACL filter
|
||||
- [x] post-filter — `aclFilter`/`searchTfIdfAcl`/`topNTfIdfAcl`/`searchBm25Acl` take an
|
||||
injected `permit :: DocId -> Bool` predicate, applied post-rank (never in the index)
|
||||
- [x] federated query — `fedIndex :: [(PeerId, Index)] -> Index` merges per-peer
|
||||
inverted indices (union posting lists per term); rank/search run once over the merge
|
||||
- [x] merge policy — relabel local DocIds to global `gid = peer*1000 + local`
|
||||
(bijection ⇒ dedupe by (peer,doc-id) is automatic); ranking interleaves peers by score
|
||||
- [x] `lib/search/tests/integration.sx` — 21 cases: index merge, cross-peer df/lookup,
|
||||
position preservation, boolean/phrase over the merge, ACL filter + top-N + bm25
|
||||
|
||||
## Extensions (post-roadmap, search-shaped vocabulary)
|
||||
|
||||
- [x] prefix / wildcard queries (`prefixTerms`, `prefixDocs`, `prefixRankTfIdf`) — 14 tests
|
||||
- [ ] fuzzy matching — edit distance term expansion
|
||||
- [ ] result pagination (offset / limit)
|
||||
- [ ] snippet / highlight generation
|
||||
- [ ] stemming (suffix stripping) — recall-improving normalizer
|
||||
|
||||
## Progress log
|
||||
|
||||
(loop fills this in)
|
||||
- **Extension: prefix/wildcard queries (136/136 total).** `prefixTerms` matches every
|
||||
indexed term starting with a prefix (via allTerms + isPrefixOf); `prefixDocs` unions
|
||||
their docs; `prefixRankTfIdf` ranks treating the matched terms as the query. 14 tests.
|
||||
- **Phase 4 complete — federation + ACL (122/122 total). Roadmap done.** `fedIndex`
|
||||
merges per-peer inverted indices (union posting lists per term) after relabelling
|
||||
local DocIds to global `gid = peer*1000 + local` — the bijection makes (peer,doc-id)
|
||||
dedupe automatic and keeps positions, so ranking runs once over the merge and
|
||||
interleaves peers by score (rank-correct). ACL is a post-rank `filter` over an
|
||||
injected `permit :: DocId -> Bool` (viewer baked in by the caller) — never in the
|
||||
index; `searchTfIdfAcl`/`topNTfIdfAcl`/`searchBm25Acl`. 21 integration tests.
|
||||
- **Phase 3 complete — ranking (101/101 total).** TF-IDF (`rankTfIdf`) and BM25
|
||||
(`rankBm25 k1 b`) over the candidate set (docs containing any query term), scores
|
||||
as floats with deterministic DocId-ascending tiebreak; `topNTfIdf`/`topNBm25` via
|
||||
sortBy+take. df/idf derived from posting-list length (no separate df store). 23
|
||||
tests incl. a BM25-vs-TF-IDF flip (length-norm + tf-saturation) and the b-parameter
|
||||
effect. Float division/`log`/float literals all work in haskell-on-sx.
|
||||
- **Phase 2 complete — parser (78/78 total).** Query tokenizer (ord-based
|
||||
delimiters, quoted phrases) + recursive-descent parser with OR<AND<NOT precedence,
|
||||
implicit AND on adjacency, parens, case-insensitive keywords. `parseQuery`,
|
||||
`searchQuery`, `showQ` (canonical render for AST tests). 32 tests in parse.sx.
|
||||
**haskell-on-sx parser gotchas hit while writing this (see parse.sx header):**
|
||||
(1) escaped char literals like `'\"'` break the tokenizer — match delimiters by
|
||||
`ord c == 34`; (2) an `[]` *pattern* inside a `case` alt breaks the parser — use
|
||||
multi-clause functions instead; (3) `case`/constructor patterns and `let (a,b)=..`
|
||||
are fine. Embedded Haskell string literals in a `.sx` source string need single
|
||||
`\"`, not `\\\"`.
|
||||
- **Phase 2 boolean/phrase eval (46/46 total).** Query ADT
|
||||
`Term|And|Or|Not|Phrase` + `evalQuery :: Index -> Query -> [DocId]` in query.sx.
|
||||
Boolean ops are linear merges over docid-sorted posting lists; Not subtracts from
|
||||
the allDocs universe; Phrase checks positional adjacency. 28 tests in boolean.sx.
|
||||
Refactored both suites to **batch all cases into one program eval** (search-batch
|
||||
in testlib) — under the heavy CPU load on this box (~11 on 2 cores), 18–28 separate
|
||||
hk-eval-program calls timed out; one combined eval per suite is ~20× faster.
|
||||
Parser (parse.sx) is the remaining Phase 2 box.
|
||||
- **Phase 1 complete (18/18).** Tokenizer (lowercase + strip punctuation + positions),
|
||||
inverted index as sorted assoc-list `[(Term,[(DocId,[Pos])])]`, indexDoc/deleteDoc/
|
||||
lookupTerm/docFreq/allTerms. Search lib is Haskell source assembled into `search/src`
|
||||
and evaluated via the haskell-on-sx interpreter; tests reuse `hk-test` counters and a
|
||||
`search-eval` helper that forces HK values to plain SX. conformance.sh models
|
||||
lib/haskell (MODE=counters, COUNTERS_PASS/FAIL=hk-test-pass/fail).
|
||||
|
||||
## Blockers
|
||||
|
||||
(loop fills this in)
|
||||
- **None.** Note: the box is heavily CPU-oversubscribed by sibling loop agents
|
||||
(load ~11 on 2 cores); each program eval is ~10× slower than nominal, so suite
|
||||
timeout is set to 600s. Runs are correct, just slow.
|
||||
- **Data.Map public API gap (informational, not fixing):** the haskell-on-sx
|
||||
`import Data.Map` binds only empty/singleton/insert/lookup/member/size/null/delete/
|
||||
insertWith/adjust/findWithDefault — no toList/keys/elems/map/filter/unionWith. Index
|
||||
uses a pure assoc-list instead so term iteration and federation merge stay simple.
|
||||
|
||||
Reference in New Issue
Block a user