Compare commits
36 Commits
loops/go
...
loops/sear
| Author | SHA1 | Date | |
|---|---|---|---|
| 5d62d08e1c | |||
| db2a5dc6ab | |||
| cfa68c3db3 | |||
| cf4e613e43 | |||
| 911a2f57c0 | |||
| 7231cb651f | |||
| 5945b51cfd | |||
| 3ab8270a58 | |||
| 9d3b775b25 | |||
| 77ab827b91 | |||
| a3f9d4f6c9 | |||
| 4c84decc01 | |||
| 0f0da0319c | |||
| b8cf3eb1b8 | |||
| e2de5a4675 | |||
| 1e4cf25015 | |||
| 9c4a5d1913 | |||
| f91ac82434 | |||
| 5136249ae5 | |||
| 6fc61147a8 | |||
| 0122c41ecb | |||
| 58656b03e4 | |||
| b0feb7b01b | |||
| a979297959 | |||
| 37226cf6eb | |||
| 50a7f31a39 | |||
| 915f51b2b6 | |||
| e7501bdf8f | |||
| c3a0727645 | |||
| 1b94082a71 | |||
| 57184daaee | |||
| d9e2627b89 | |||
| bcabed6bce | |||
| 5098a8f015 | |||
| 9fe5c9044d | |||
| c6f397c3d9 |
@@ -1561,7 +1561,66 @@
|
||||
(er-register-pure-bif! "crypto" "hash" 2 er-bif-crypto-hash)
|
||||
(er-register-pure-bif! "cid" "from_bytes" 1 er-bif-cid-from-bytes)
|
||||
(er-register-pure-bif! "cid" "to_string" 1 er-bif-cid-to-string)
|
||||
|
||||
;; ── binary_to_list / list_to_binary (Step 3b — term codec) ──────
|
||||
;; Standard Erlang semantics:
|
||||
;; binary_to_list(<<B1,B2,...>>) -> [B1, B2, ...] (Erlang cons of ints)
|
||||
;; list_to_binary(IoList) -> <<...>> (flattens nested
|
||||
;; iolists; elements are byte ints 0-255 or binaries)
|
||||
;; Bad arg / out-of-range byte / non-iolist element -> error:badarg.
|
||||
|
||||
(define er-bif-binary-to-list
|
||||
(fn (vs)
|
||||
(let ((v (nth vs 0)))
|
||||
(cond
|
||||
(not (er-binary? v))
|
||||
(raise (er-mk-error-marker (er-mk-atom "badarg")))
|
||||
:else
|
||||
(let ((bs (get v :bytes)) (out (er-mk-nil)))
|
||||
(for-each
|
||||
(fn (i)
|
||||
(set! out (er-mk-cons (nth bs (- (- (len bs) 1) i)) out)))
|
||||
(range 0 (len bs)))
|
||||
out)))))
|
||||
|
||||
;; Walk an Erlang iolist, appending bytes to `acc` (a mutable SX list).
|
||||
;; Accepts: nil, cons-of-X, binary, integer in 0..255. Anything else
|
||||
;; signals failure by setting (nth fail 0) to true.
|
||||
(define er-iolist-walk!
|
||||
(fn (v acc fail)
|
||||
(cond
|
||||
(nth fail 0) nil
|
||||
(er-nil? v) nil
|
||||
(er-cons? v)
|
||||
(do (er-iolist-walk! (get v :head) acc fail)
|
||||
(er-iolist-walk! (get v :tail) acc fail))
|
||||
(er-binary? v)
|
||||
(for-each
|
||||
(fn (i) (append! acc (nth (get v :bytes) i)))
|
||||
(range 0 (len (get v :bytes))))
|
||||
(= (type-of v) "number")
|
||||
(cond
|
||||
(and (>= v 0) (<= v 255)) (append! acc v)
|
||||
:else (set-nth! fail 0 true))
|
||||
:else (set-nth! fail 0 true))))
|
||||
|
||||
(define er-bif-list-to-binary
|
||||
(fn (vs)
|
||||
(let ((v (nth vs 0)) (acc (list)) (fail (list false)))
|
||||
(cond
|
||||
(not (or (er-nil? v) (er-cons? v) (er-binary? v)))
|
||||
(raise (er-mk-error-marker (er-mk-atom "badarg")))
|
||||
:else
|
||||
(do
|
||||
(er-iolist-walk! v acc fail)
|
||||
(cond
|
||||
(nth fail 0)
|
||||
(raise (er-mk-error-marker (er-mk-atom "badarg")))
|
||||
:else (er-mk-binary acc)))))))
|
||||
|
||||
(er-register-bif! "file" "list_dir" 1 er-bif-file-list-dir)
|
||||
(er-register-pure-bif! "erlang" "binary_to_list" 1 er-bif-binary-to-list)
|
||||
(er-register-pure-bif! "erlang" "list_to_binary" 1 er-bif-list-to-binary)
|
||||
(er-mk-atom "ok")))
|
||||
|
||||
;; Register everything at load time.
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
{
|
||||
"language": "erlang",
|
||||
"total_pass": 729,
|
||||
"total": 729,
|
||||
"total_pass": 761,
|
||||
"total": 761,
|
||||
"suites": [
|
||||
{"name":"tokenize","pass":62,"total":62,"status":"ok"},
|
||||
{"name":"parse","pass":52,"total":52,"status":"ok"},
|
||||
{"name":"eval","pass":385,"total":385,"status":"ok"},
|
||||
{"name":"eval","pass":408,"total":408,"status":"ok"},
|
||||
{"name":"runtime","pass":93,"total":93,"status":"ok"},
|
||||
{"name":"ring","pass":4,"total":4,"status":"ok"},
|
||||
{"name":"ping-pong","pass":4,"total":4,"status":"ok"},
|
||||
{"name":"bank","pass":8,"total":8,"status":"ok"},
|
||||
{"name":"echo","pass":7,"total":7,"status":"ok"},
|
||||
{"name":"fib","pass":8,"total":8,"status":"ok"},
|
||||
{"name":"ffi","pass":28,"total":28,"status":"ok"},
|
||||
{"name":"ffi","pass":37,"total":37,"status":"ok"},
|
||||
{"name":"vm","pass":78,"total":78,"status":"ok"}
|
||||
]
|
||||
}
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
# Erlang-on-SX Scoreboard
|
||||
|
||||
**Total: 729 / 729 tests passing**
|
||||
**Total: 761 / 761 tests passing**
|
||||
|
||||
| | Suite | Pass | Total |
|
||||
|---|---|---|---|
|
||||
| ✅ | tokenize | 62 | 62 |
|
||||
| ✅ | parse | 52 | 52 |
|
||||
| ✅ | eval | 385 | 385 |
|
||||
| ✅ | eval | 408 | 408 |
|
||||
| ✅ | runtime | 93 | 93 |
|
||||
| ✅ | ring | 4 | 4 |
|
||||
| ✅ | ping-pong | 4 | 4 |
|
||||
| ✅ | bank | 8 | 8 |
|
||||
| ✅ | echo | 7 | 7 |
|
||||
| ✅ | fib | 8 | 8 |
|
||||
| ✅ | ffi | 28 | 28 |
|
||||
| ✅ | ffi | 37 | 37 |
|
||||
| ✅ | vm | 78 | 78 |
|
||||
|
||||
|
||||
|
||||
@@ -228,9 +228,10 @@
|
||||
(er-eval-test "tuple_size 0" (ev "tuple_size({})") 0)
|
||||
|
||||
;; ── BIFs: atom / list conversions ───────────────────────────────
|
||||
(er-eval-test "atom_to_list" (ev "atom_to_list(hello)") "hello")
|
||||
(er-eval-test "atom_to_list -> charlist length" (ev "length(atom_to_list(hello))") 5)
|
||||
(er-eval-test "atom_to_list -> head $h" (ev "hd(atom_to_list(hello))") 104)
|
||||
(er-eval-test "list_to_atom roundtrip"
|
||||
(nm (ev "list_to_atom(atom_to_list(foo))")) "foo")
|
||||
(nm (ev "list_to_atom(atom_to_list(foo))")) "foo") ;; round-trip via charlist
|
||||
(er-eval-test "list_to_atom fresh"
|
||||
(nm (ev "list_to_atom(\"bar\")")) "bar")
|
||||
|
||||
@@ -1060,11 +1061,13 @@
|
||||
(er-eval-test "list_to_tuple roundtrip"
|
||||
(ev "tuple_size(list_to_tuple([10, 20, 30]))") 3)
|
||||
|
||||
(er-eval-test "integer_to_list" (ev "integer_to_list(42)") "42")
|
||||
(er-eval-test "integer_to_list neg" (ev "integer_to_list(-99)") "-99")
|
||||
(er-eval-test "integer_to_list -> charlist length" (ev "length(integer_to_list(42))") 2)
|
||||
(er-eval-test "integer_to_list 42 head $4" (ev "hd(integer_to_list(42))") 52)
|
||||
(er-eval-test "integer_to_list neg -> charlist length" (ev "length(integer_to_list(-99))") 3)
|
||||
(er-eval-test "integer_to_list -99 head $-" (ev "hd(integer_to_list(-99))") 45)
|
||||
(er-eval-test "list_to_integer" (ev "list_to_integer(\"123\")") 123)
|
||||
(er-eval-test "list_to_integer roundtrip"
|
||||
(ev "list_to_integer(integer_to_list(7))") 7)
|
||||
(ev "list_to_integer(integer_to_list(7))") 7) ;; round-trip via charlist
|
||||
|
||||
(er-eval-test "is_function fun"
|
||||
(nm (ev "F = fun (X) -> X end, is_function(F)")) "true")
|
||||
@@ -1341,6 +1344,42 @@
|
||||
(get (nth (get er-rt-cap-result :elements) 4) :name) "true")
|
||||
|
||||
|
||||
|
||||
;; ── $X char literals (Step 3b substrate fix 2026-06-04) ──────────
|
||||
(er-eval-test "char $A" (ev "$A") 65)
|
||||
(er-eval-test "char $a" (ev "$a") 97)
|
||||
(er-eval-test "char $0 is digit, not escape-NUL" (ev "$0") 48)
|
||||
(er-eval-test "char $\\n is newline (10)" (ev "$\\n") 10)
|
||||
(er-eval-test "char $\\t is tab (9)" (ev "$\\t") 9)
|
||||
(er-eval-test "char $\\r is CR (13)" (ev "$\\r") 13)
|
||||
(er-eval-test "char $\\s is space (32)" (ev "$\\s") 32)
|
||||
(er-eval-test "char $\\0 is NUL (0)" (ev "$\\0") 0)
|
||||
(er-eval-test "char $\\\\ is backslash (92)" (ev "$\\\\") 92)
|
||||
(er-eval-test "[$h,$i] head is 104" (ev "hd([$h, $i])") 104)
|
||||
(er-eval-test "list_to_binary char-list -> bytes"
|
||||
(ev "byte_size(list_to_binary([$f, $e, $d]))") 3)
|
||||
(er-eval-test "list_to_binary char-list round-trip"
|
||||
(nm (ev "list_to_binary([$h, $i]) =:= <<104, 105>>")) "true")
|
||||
|
||||
|
||||
;; ── atom_to_list / integer_to_list charlist semantics (Step 3b substrate fix #3) ──
|
||||
(er-eval-test "atom_to_list hd is char code"
|
||||
(ev "hd(atom_to_list(hi))") 104)
|
||||
(er-eval-test "atom_to_list maps to bytes via list_to_binary"
|
||||
(ev "byte_size(list_to_binary(atom_to_list(hello)))") 5)
|
||||
(er-eval-test "atom_to_list -> list_to_binary -> bytes content"
|
||||
(nm (ev "list_to_binary(atom_to_list(ok)) =:= <<111, 107>>")) "true")
|
||||
(er-eval-test "integer_to_list 12345 -> 5 chars"
|
||||
(ev "length(integer_to_list(12345))") 5)
|
||||
(er-eval-test "integer_to_list -> bytes -> back"
|
||||
(ev "list_to_integer(integer_to_list(99999))") 99999)
|
||||
(er-eval-test "list_to_atom from charlist"
|
||||
(nm (ev "list_to_atom([$f, $o, $o])")) "foo")
|
||||
(er-eval-test "list_to_atom from SX-string back-compat"
|
||||
(nm (ev "list_to_atom(\"bar\")")) "bar")
|
||||
(er-eval-test "list_to_integer from charlist"
|
||||
(ev "list_to_integer([$1, $0, $0])") 100)
|
||||
|
||||
(define
|
||||
er-eval-test-summary
|
||||
(str "eval " er-eval-test-pass "/" er-eval-test-count))
|
||||
|
||||
@@ -160,6 +160,51 @@
|
||||
(ffi-nm (ffi-ev "element(2, file:list_dir(\"/no/such/dir/xyz\"))"))
|
||||
"enoent")
|
||||
|
||||
(er-ffi-test
|
||||
"binary_to_list <<1,2,3>> length"
|
||||
(ffi-ev "length(binary_to_list(<<1,2,3,4,5>>))")
|
||||
5)
|
||||
|
||||
(er-ffi-test
|
||||
"binary_to_list hd byte"
|
||||
(ffi-ev "hd(binary_to_list(<<7,8,9>>))")
|
||||
7)
|
||||
|
||||
(er-ffi-test
|
||||
"binary_to_list empty -> []"
|
||||
(ffi-nm (ffi-ev "case binary_to_list(<<>>) of [] -> empty end"))
|
||||
"empty")
|
||||
|
||||
(er-ffi-test
|
||||
"list_to_binary flat list bytes"
|
||||
(ffi-ev "byte_size(list_to_binary([1,2,3]))")
|
||||
3)
|
||||
|
||||
(er-ffi-test
|
||||
"list_to_binary nested iolist"
|
||||
(ffi-ev "byte_size(list_to_binary([1, <<2,3>>, [4, [5]]]))")
|
||||
5)
|
||||
|
||||
(er-ffi-test
|
||||
"list_to_binary round-trip via binary_to_list"
|
||||
(ffi-nm (ffi-ev "list_to_binary(binary_to_list(<<10,20,30>>)) =:= <<10,20,30>>"))
|
||||
"true")
|
||||
|
||||
(er-ffi-test
|
||||
"binary_to_list non-binary -> error:badarg"
|
||||
(ffi-nm (ffi-ev "try binary_to_list(42) catch error:badarg -> ok end"))
|
||||
"ok")
|
||||
|
||||
(er-ffi-test
|
||||
"list_to_binary out-of-range byte -> error:badarg"
|
||||
(ffi-nm (ffi-ev "try list_to_binary([300]) catch error:badarg -> ok end"))
|
||||
"ok")
|
||||
|
||||
(er-ffi-test
|
||||
"list_to_binary non-iolist -> error:badarg"
|
||||
(ffi-nm (ffi-ev "try list_to_binary(42) catch error:badarg -> ok end"))
|
||||
"ok")
|
||||
|
||||
;; ── Still deferred (no host primitive): httpc (HTTP client, v2),
|
||||
;; sqlite-* (v2 indexes). Assert NOT registered so a future iteration
|
||||
;; that wires them without updating this suite fails fast.
|
||||
|
||||
@@ -229,13 +229,37 @@
|
||||
(= ch "$")
|
||||
(do
|
||||
(er-advance! 1)
|
||||
(if
|
||||
(and (< pos src-len) (= (er-cur) "\\"))
|
||||
(do
|
||||
(er-advance! 1)
|
||||
(when (< pos src-len) (er-advance! 1)))
|
||||
(when (< pos src-len) (er-advance! 1)))
|
||||
(er-emit! "integer" (slice src start pos) start)
|
||||
;; Emit the char's decimal code as the integer token value
|
||||
;; (was: raw "$X" text — parse-number then returned nil).
|
||||
(let
|
||||
((code (cond
|
||||
(>= pos src-len) 0
|
||||
(= (er-cur) "\\")
|
||||
(do
|
||||
(er-advance! 1)
|
||||
(let ((esc (if (< pos src-len) (er-cur) "")))
|
||||
(when (< pos src-len) (er-advance! 1))
|
||||
(cond
|
||||
(= esc "n") 10
|
||||
(= esc "t") 9
|
||||
(= esc "r") 13
|
||||
(= esc "s") 32
|
||||
(= esc "b") 8
|
||||
(= esc "e") 27
|
||||
(= esc "f") 12
|
||||
(= esc "v") 11
|
||||
(= esc "d") 127
|
||||
(= esc "0") 0
|
||||
(= esc "\\") 92
|
||||
(= esc "\"") 34
|
||||
(= esc "'") 39
|
||||
(= esc "") 0
|
||||
:else (char->integer (nth (string->list esc) 0)))))
|
||||
:else
|
||||
(let ((c (er-cur)))
|
||||
(er-advance! 1)
|
||||
(char->integer (nth (string->list c) 0))))))
|
||||
(er-emit! "integer" (str code) start))
|
||||
(scan!))
|
||||
(er-lower? ch)
|
||||
(do
|
||||
|
||||
@@ -107,7 +107,12 @@
|
||||
(let
|
||||
((ty (get node :type)))
|
||||
(cond
|
||||
(= ty "integer") (parse-number (get node :value))
|
||||
(= ty "integer")
|
||||
(let ((n (parse-number (get node :value))))
|
||||
(cond
|
||||
(= n nil) (error (str "Erlang: invalid integer literal: "
|
||||
(get node :value)))
|
||||
:else (truncate n)))
|
||||
(= ty "float") (parse-number (get node :value))
|
||||
(= ty "atom") (er-mk-atom (get node :value))
|
||||
(= ty "string") (get node :value)
|
||||
@@ -821,16 +826,30 @@
|
||||
(len (get v :elements))
|
||||
(error "Erlang: tuple_size: not a tuple")))))
|
||||
|
||||
(define er-string->charlist
|
||||
(fn (s)
|
||||
(let ((cs (string->list s)) (out (er-mk-nil)))
|
||||
(for-each
|
||||
(fn (i)
|
||||
(set! out (er-mk-cons
|
||||
(char->integer (nth cs (- (- (len cs) 1) i)))
|
||||
out)))
|
||||
(range 0 (len cs)))
|
||||
out)))
|
||||
|
||||
(define
|
||||
er-bif-atom-to-list
|
||||
(fn
|
||||
(vs)
|
||||
(let
|
||||
((v (er-bif-arg1 vs "atom_to_list")))
|
||||
;; Standard Erlang: atom_to_list/1 returns an Erlang charlist
|
||||
;; (list of integer char codes). Was: SX string of :name —
|
||||
;; unusable from Erlang-land for [Char|T] / ++ / binary segments.
|
||||
(if
|
||||
(er-atom? v)
|
||||
(get v :name)
|
||||
(error "Erlang: atom_to_list: not an atom")))))
|
||||
(er-string->charlist (get v :name))
|
||||
(raise (er-mk-error-marker (er-mk-atom "badarg")))))))
|
||||
|
||||
(define
|
||||
er-bif-list-to-atom
|
||||
@@ -838,10 +857,11 @@
|
||||
(vs)
|
||||
(let
|
||||
((v (er-bif-arg1 vs "list_to_atom")))
|
||||
(if
|
||||
(= (type-of v) "string")
|
||||
(er-mk-atom v)
|
||||
(error "Erlang: list_to_atom: not a string")))))
|
||||
;; Accept Erlang charlist (cons of ints) or SX string.
|
||||
(let ((s (er-source-to-string v)))
|
||||
(cond
|
||||
(= s nil) (raise (er-mk-error-marker (er-mk-atom "badarg")))
|
||||
:else (er-mk-atom s))))))
|
||||
|
||||
;; ── lists module ─────────────────────────────────────────────────
|
||||
(define
|
||||
@@ -1597,10 +1617,12 @@
|
||||
(vs)
|
||||
(let
|
||||
((v (er-bif-arg1 vs "integer_to_list")))
|
||||
;; Standard Erlang: integer_to_list/1 returns an Erlang charlist
|
||||
;; (e.g. integer_to_list(42) -> [$4, $2] -> [52, 50]).
|
||||
(cond
|
||||
(not (= (type-of v) "number"))
|
||||
(raise (er-mk-error-marker (er-mk-atom "badarg")))
|
||||
:else (str v)))))
|
||||
:else (er-string->charlist (str v))))))
|
||||
|
||||
(define
|
||||
er-bif-list-to-integer
|
||||
@@ -1608,15 +1630,14 @@
|
||||
(vs)
|
||||
(let
|
||||
((v (er-bif-arg1 vs "list_to_integer")))
|
||||
(cond
|
||||
(not (= (type-of v) "string"))
|
||||
(raise (er-mk-error-marker (er-mk-atom "badarg")))
|
||||
:else (let
|
||||
((n (parse-number v)))
|
||||
(cond
|
||||
(= n nil)
|
||||
(raise (er-mk-error-marker (er-mk-atom "badarg")))
|
||||
:else n))))))
|
||||
;; Accept Erlang charlist (cons of ints) or SX string.
|
||||
(let ((s (er-source-to-string v)))
|
||||
(cond
|
||||
(= s nil) (raise (er-mk-error-marker (er-mk-atom "badarg")))
|
||||
:else (let ((n (parse-number s)))
|
||||
(cond
|
||||
(= n nil) (raise (er-mk-error-marker (er-mk-atom "badarg")))
|
||||
:else n)))))))
|
||||
|
||||
(define
|
||||
er-bif-is-function
|
||||
|
||||
38
lib/feed/acl.sx
Normal file
38
lib/feed/acl.sx
Normal file
@@ -0,0 +1,38 @@
|
||||
; feed/acl — per-viewer visibility filtering. The same candidate stream yields
|
||||
; different timelines for different viewers, so ACL is applied per request and
|
||||
; pre-ACL timelines are never cached.
|
||||
;
|
||||
; permit? is injected: (permit? viewer activity) -> bool. Wire a real acl-sx
|
||||
; predicate here; feed/permit-acl? is a self-contained default that reads an
|
||||
; optional :visible-to allowlist on the activity.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-elem?), lib/feed/rank.sx (feed/top).
|
||||
|
||||
; default permit: actor always sees own activity; absent/nil :visible-to is
|
||||
; public; otherwise viewer must be in the allowlist.
|
||||
(define
|
||||
feed/permit-acl?
|
||||
(fn
|
||||
(viewer a)
|
||||
(or
|
||||
(equal? viewer (get a :actor))
|
||||
(let
|
||||
((allowed (get a :visible-to nil)))
|
||||
(if (= allowed nil) true (feed/-elem? viewer allowed))))))
|
||||
|
||||
(define feed/permit-public? (fn (viewer a) true))
|
||||
|
||||
; filter a stream to what viewer may read
|
||||
(define
|
||||
feed/visible
|
||||
(fn
|
||||
(stream viewer permit?)
|
||||
(feed/filter stream (fn (a) (permit? viewer a)))))
|
||||
|
||||
; the capstone: candidate stream -> ACL for viewer -> rank -> top-N
|
||||
(define
|
||||
feed/timeline
|
||||
(fn
|
||||
(stream viewer permit? score-fn n)
|
||||
(feed/top (feed/visible stream viewer permit?) score-fn n)))
|
||||
62
lib/feed/aggregate.sx
Normal file
62
lib/feed/aggregate.sx
Normal file
@@ -0,0 +1,62 @@
|
||||
; feed/aggregate — group-by / counting via key-reduce. Keys must be strings
|
||||
; (dict keys), so composite keys (actor, day) are joined into one string.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx.
|
||||
|
||||
; group activities into a dict: key-string -> (list of activities), order-preserving
|
||||
(define
|
||||
feed/group-by
|
||||
(fn
|
||||
(stream key-fn)
|
||||
(reduce
|
||||
(fn
|
||||
(g a)
|
||||
(let
|
||||
((k (key-fn a)))
|
||||
(assoc g k (append (get g k (list)) (list a)))))
|
||||
{}
|
||||
(feed/items stream))))
|
||||
|
||||
; key-string -> count
|
||||
(define
|
||||
feed/group-count
|
||||
(fn
|
||||
(stream key-fn)
|
||||
(reduce
|
||||
(fn
|
||||
(g a)
|
||||
(let
|
||||
((k (key-fn a)))
|
||||
(assoc g k (+ (get g k 0) 1))))
|
||||
{}
|
||||
(feed/items stream))))
|
||||
|
||||
; --- composite keys ---------------------------------------------------------
|
||||
|
||||
(define feed/day (fn (at window) (floor (/ at window))))
|
||||
|
||||
; (actor, day-bucket) -> "actor#day"
|
||||
(define
|
||||
feed/actor-day-key
|
||||
(fn
|
||||
(window)
|
||||
(fn
|
||||
(a)
|
||||
(string-append
|
||||
(get a :actor)
|
||||
"#"
|
||||
(number->string (feed/day (get a :at) window))))))
|
||||
|
||||
(define
|
||||
feed/by-actor-day
|
||||
(fn (stream window) (feed/group-count stream (feed/actor-day-key window))))
|
||||
|
||||
; per-actor activity counts
|
||||
(define
|
||||
feed/actor-counts
|
||||
(fn (stream) (feed/group-count stream feed/actor)))
|
||||
|
||||
; per-object activity counts (engagement)
|
||||
(define
|
||||
feed/object-counts
|
||||
(fn (stream) (feed/group-count stream feed/object)))
|
||||
24
lib/feed/api.sx
Normal file
24
lib/feed/api.sx
Normal file
@@ -0,0 +1,24 @@
|
||||
; feed/api — ergonomic API over the stream layer for non-APL callers.
|
||||
; A single mutable activity log; post appends, all returns it as a stream.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx (loaded by harness).
|
||||
|
||||
(define feed/-log (list))
|
||||
|
||||
; post — normalize then append. Returns the stored activity.
|
||||
(define
|
||||
feed/post
|
||||
(fn
|
||||
(raw)
|
||||
(let
|
||||
((a (feed/normalize raw)))
|
||||
(begin (set! feed/-log (append feed/-log (list a))) a))))
|
||||
|
||||
; all — the whole log as a stream (insertion order)
|
||||
(define feed/all (fn () (feed/stream feed/-log)))
|
||||
|
||||
; reset! — clear the log (test hygiene)
|
||||
(define feed/reset! (fn () (begin (set! feed/-log (list)) nil)))
|
||||
|
||||
; size — number of posted activities
|
||||
(define feed/size (fn () (len feed/-log)))
|
||||
125
lib/feed/conformance.sh
Executable file
125
lib/feed/conformance.sh
Executable file
@@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env bash
|
||||
# lib/feed/conformance.sh — run feed test suites, emit scoreboard.json + scoreboard.md.
|
||||
|
||||
set -uo pipefail
|
||||
cd "$(git rev-parse --show-toplevel)"
|
||||
|
||||
SX_SERVER="${SX_SERVER:-/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
SX_SERVER="hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||
fi
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
echo "ERROR: sx_server.exe not found." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SUITES=(basic fanout rank integration content notify home dedupe trending mute page thread)
|
||||
|
||||
OUT_JSON="lib/feed/scoreboard.json"
|
||||
OUT_MD="lib/feed/scoreboard.md"
|
||||
|
||||
run_suite() {
|
||||
local suite=$1
|
||||
local file="lib/feed/tests/${suite}.sx"
|
||||
local TMP
|
||||
TMP=$(mktemp)
|
||||
cat > "$TMP" << EPOCHS
|
||||
(epoch 1)
|
||||
(load "spec/stdlib.sx")
|
||||
(load "lib/r7rs.sx")
|
||||
(load "lib/apl/runtime.sx")
|
||||
(load "lib/feed/normalize.sx")
|
||||
(load "lib/feed/stream.sx")
|
||||
(load "lib/feed/api.sx")
|
||||
(load "lib/feed/fanout.sx")
|
||||
(load "lib/feed/dedupe.sx")
|
||||
(load "lib/feed/aggregate.sx")
|
||||
(load "lib/feed/rank.sx")
|
||||
(load "lib/feed/acl.sx")
|
||||
(load "lib/feed/fed.sx")
|
||||
(load "lib/feed/content.sx")
|
||||
(load "lib/feed/notify.sx")
|
||||
(load "lib/feed/home.sx")
|
||||
(load "lib/feed/trending.sx")
|
||||
(load "lib/feed/mute.sx")
|
||||
(load "lib/feed/page.sx")
|
||||
(load "lib/feed/thread.sx")
|
||||
(epoch 2)
|
||||
(eval "(define feed-test-pass 0)")
|
||||
(eval "(define feed-test-fail 0)")
|
||||
(eval "(define feed-test (fn (name got expected) (if (= got expected) (set! feed-test-pass (+ feed-test-pass 1)) (set! feed-test-fail (+ feed-test-fail 1)))))")
|
||||
(epoch 3)
|
||||
(load "${file}")
|
||||
(epoch 4)
|
||||
(eval "(list feed-test-pass feed-test-fail)")
|
||||
EPOCHS
|
||||
|
||||
local OUTPUT
|
||||
OUTPUT=$(timeout 300 "$SX_SERVER" < "$TMP" 2>/dev/null)
|
||||
rm -f "$TMP"
|
||||
|
||||
local LINE
|
||||
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 4 / {getline; print; exit}')
|
||||
if [ -z "$LINE" ]; then
|
||||
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 4 \([0-9]+ [0-9]+\)\)' | tail -1 \
|
||||
| sed -E 's/^\(ok 4 //; s/\)$//')
|
||||
fi
|
||||
|
||||
local P F
|
||||
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/')
|
||||
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/')
|
||||
P=${P:-0}
|
||||
F=${F:-0}
|
||||
echo "${P} ${F}"
|
||||
}
|
||||
|
||||
declare -A SUITE_PASS
|
||||
declare -A SUITE_FAIL
|
||||
TOTAL_PASS=0
|
||||
TOTAL_FAIL=0
|
||||
|
||||
echo "Running feed conformance suite..." >&2
|
||||
for s in "${SUITES[@]}"; do
|
||||
read -r p f < <(run_suite "$s")
|
||||
SUITE_PASS[$s]=$p
|
||||
SUITE_FAIL[$s]=$f
|
||||
TOTAL_PASS=$((TOTAL_PASS + p))
|
||||
TOTAL_FAIL=$((TOTAL_FAIL + f))
|
||||
printf " %-12s %d/%d\n" "$s" "$p" "$((p+f))" >&2
|
||||
done
|
||||
|
||||
# scoreboard.json
|
||||
{
|
||||
printf '{\n'
|
||||
printf ' "suites": {\n'
|
||||
first=1
|
||||
for s in "${SUITES[@]}"; do
|
||||
if [ $first -eq 0 ]; then printf ',\n'; fi
|
||||
printf ' "%s": {"pass": %d, "fail": %d}' "$s" "${SUITE_PASS[$s]}" "${SUITE_FAIL[$s]}"
|
||||
first=0
|
||||
done
|
||||
printf '\n },\n'
|
||||
printf ' "total_pass": %d,\n' "$TOTAL_PASS"
|
||||
printf ' "total_fail": %d,\n' "$TOTAL_FAIL"
|
||||
printf ' "total": %d\n' "$((TOTAL_PASS + TOTAL_FAIL))"
|
||||
printf '}\n'
|
||||
} > "$OUT_JSON"
|
||||
|
||||
# scoreboard.md
|
||||
{
|
||||
printf '# feed Conformance Scoreboard\n\n'
|
||||
printf '_Generated by `lib/feed/conformance.sh`_\n\n'
|
||||
printf '| Suite | Pass | Fail | Total |\n'
|
||||
printf '|-------|-----:|-----:|------:|\n'
|
||||
for s in "${SUITES[@]}"; do
|
||||
p=${SUITE_PASS[$s]}
|
||||
f=${SUITE_FAIL[$s]}
|
||||
printf '| %s | %d | %d | %d |\n' "$s" "$p" "$f" "$((p+f))"
|
||||
done
|
||||
printf '| **Total** | **%d** | **%d** | **%d** |\n' "$TOTAL_PASS" "$TOTAL_FAIL" "$((TOTAL_PASS + TOTAL_FAIL))"
|
||||
} > "$OUT_MD"
|
||||
|
||||
echo "Wrote $OUT_JSON and $OUT_MD" >&2
|
||||
echo "Total: $TOTAL_PASS pass, $TOTAL_FAIL fail" >&2
|
||||
|
||||
[ "$TOTAL_FAIL" -eq 0 ]
|
||||
68
lib/feed/content.sx
Normal file
68
lib/feed/content.sx
Normal file
@@ -0,0 +1,68 @@
|
||||
; feed/content — TF-IDF relevance over activity :tags. Rare tags carry more
|
||||
; signal, so an activity matching an uncommon tag ranks above one matching a
|
||||
; common tag. Composes with rank.sx: feed/tfidf-score is just another scorer.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-distinct), lib/feed/rank.sx (feed/rank).
|
||||
|
||||
; document frequency: tag -> number of activities whose :tags contain it
|
||||
; (a tag repeated within one activity counts once toward df)
|
||||
(define
|
||||
feed/tag-df
|
||||
(fn
|
||||
(stream)
|
||||
(reduce
|
||||
(fn
|
||||
(df a)
|
||||
(reduce
|
||||
(fn (d t) (assoc d t (+ (get d t 0) 1)))
|
||||
df
|
||||
(feed/-distinct (get a :tags))))
|
||||
{}
|
||||
(feed/items stream))))
|
||||
|
||||
; inverse document frequency: tag -> log(N / df)
|
||||
(define
|
||||
feed/tag-idf
|
||||
(fn
|
||||
(stream)
|
||||
(let
|
||||
((n (feed/count stream)) (df (feed/tag-df stream)))
|
||||
(reduce
|
||||
(fn (idf t) (assoc idf t (log (/ n (get df t)))))
|
||||
{}
|
||||
(keys df)))))
|
||||
|
||||
; term frequency within one activity: tag -> occurrence count
|
||||
(define
|
||||
feed/-tf
|
||||
(fn
|
||||
(a)
|
||||
(reduce
|
||||
(fn (tf t) (assoc tf t (+ (get tf t 0) 1)))
|
||||
{}
|
||||
(get a :tags))))
|
||||
|
||||
; relevance of an activity to a query (list of tags) given precomputed idf:
|
||||
; sum over query tags of tf(tag in activity) * idf(tag in corpus)
|
||||
(define
|
||||
feed/tfidf-score
|
||||
(fn
|
||||
(idf query)
|
||||
(fn
|
||||
(a)
|
||||
(let
|
||||
((tf (feed/-tf a)))
|
||||
(reduce
|
||||
(fn
|
||||
(acc t)
|
||||
(+ acc (* (get tf t 0) (get idf t 0))))
|
||||
0
|
||||
query)))))
|
||||
|
||||
; rank a stream by relevance to query tags (idf computed over the stream itself)
|
||||
(define
|
||||
feed/by-relevance
|
||||
(fn
|
||||
(stream query)
|
||||
(feed/rank stream (feed/tfidf-score (feed/tag-idf stream) query))))
|
||||
76
lib/feed/dedupe.sx
Normal file
76
lib/feed/dedupe.sx
Normal file
@@ -0,0 +1,76 @@
|
||||
; feed/dedupe — collapse duplicate items, keeping first occurrence per key.
|
||||
; Each verb may want its own key (see briefing): "alice posted X" keys on
|
||||
; (actor verb object) — distinct per actor; "alice liked X / bob liked X"
|
||||
; collapse on (verb object) so the cross-actor likes fold into one.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-elem? lives in fanout.sx).
|
||||
|
||||
; generic: dedupe a stream by key-fn, first occurrence wins (stable)
|
||||
(define
|
||||
feed/-dedup-by
|
||||
(fn
|
||||
(items key-fn)
|
||||
(get
|
||||
(reduce
|
||||
(fn
|
||||
(st x)
|
||||
(let
|
||||
((k (key-fn x)))
|
||||
(if (feed/-elem? k (get st :seen)) st {:seen (append (get st :seen) (list k)) :out (append (get st :out) (list x))})))
|
||||
{:seen (list) :out (list)}
|
||||
items)
|
||||
:out)))
|
||||
|
||||
(define
|
||||
feed/dedupe
|
||||
(fn
|
||||
(stream key-fn)
|
||||
(feed/stream (feed/-dedup-by (feed/items stream) key-fn))))
|
||||
|
||||
; --- keys -------------------------------------------------------------------
|
||||
|
||||
(define
|
||||
feed/activity-key
|
||||
(fn (a) (list (get a :actor) (get a :verb) (get a :object))))
|
||||
|
||||
; collapse cross-actor duplicates of the same verb+object (e.g. likes)
|
||||
(define feed/collapse-key (fn (a) (list (get a :verb) (get a :object))))
|
||||
|
||||
; per-receiver inbox key — one inbox event per (receiver, actor, verb, object)
|
||||
(define
|
||||
feed/event-key
|
||||
(fn
|
||||
(ev)
|
||||
(let
|
||||
((a (get ev :activity)))
|
||||
(list (get ev :to) (get a :actor) (get a :verb) (get a :object)))))
|
||||
|
||||
; verbs whose duplicates collapse across actors (reactions, not authorship).
|
||||
; rebindable: callers can (set! feed/collapse-verbs ...) to tune the policy.
|
||||
(define
|
||||
feed/collapse-verbs
|
||||
(list "like" "favourite" "follow" "boost" "repost"))
|
||||
|
||||
; per-verb key: collapse-verbs fold on (verb object); the rest key on
|
||||
; (actor verb object).
|
||||
(define
|
||||
feed/smart-key
|
||||
(fn
|
||||
(a)
|
||||
(if
|
||||
(feed/-elem? (get a :verb) feed/collapse-verbs)
|
||||
(feed/collapse-key a)
|
||||
(feed/activity-key a))))
|
||||
|
||||
; --- ready-made dedupers ----------------------------------------------------
|
||||
|
||||
(define feed/dedupe-activities (fn (s) (feed/dedupe s feed/activity-key)))
|
||||
|
||||
(define feed/dedupe-collapse (fn (s) (feed/dedupe s feed/collapse-key)))
|
||||
|
||||
; verb-aware: reactions collapse cross-actor, posts stay distinct per actor
|
||||
(define feed/dedupe-smart (fn (s) (feed/dedupe s feed/smart-key)))
|
||||
|
||||
; dedupe an inbox: at most one event per receiver per (actor verb object)
|
||||
(define feed/dedupe-inbox (fn (inbox) (feed/dedupe inbox feed/event-key)))
|
||||
114
lib/feed/fanout.sx
Normal file
114
lib/feed/fanout.sx
Normal file
@@ -0,0 +1,114 @@
|
||||
; feed/fanout — THE SHOWCASE. Fan activities out to followers via the APL outer
|
||||
; product (∘.×). activities ∘.× audience → an (activity × follower) matrix of
|
||||
; inbox events; flatten to a vector; guard-keep only real follow edges.
|
||||
;
|
||||
; Requires: lib/apl/runtime.sx, lib/feed/normalize.sx, lib/feed/stream.sx.
|
||||
;
|
||||
; NOTE: apl-outer's combiner result is run through (if (scalar? r) (disclose r) r).
|
||||
; A bare dict counts as a scalar (shape ()) and disclose nils it — so the combiner
|
||||
; must (enclose ...) its event dict; apl-outer then discloses it back intact.
|
||||
|
||||
; --- graph: {followee -> (list of followers)} -------------------------------
|
||||
|
||||
(define feed/followers (fn (graph user) (get graph user (list))))
|
||||
|
||||
; build a graph from (follower followee) edges: "follower follows followee"
|
||||
(define
|
||||
feed/follow-graph
|
||||
(fn
|
||||
(edges)
|
||||
(reduce
|
||||
(fn
|
||||
(g e)
|
||||
(let
|
||||
((follower (first e)) (followee (nth e 1)))
|
||||
(assoc
|
||||
g
|
||||
followee
|
||||
(append (feed/followers g followee) (list follower)))))
|
||||
{}
|
||||
edges)))
|
||||
|
||||
; --- helpers ----------------------------------------------------------------
|
||||
|
||||
; unwrap an apl-scalar (has :ravel) back to its value; pass activities through
|
||||
(define
|
||||
feed/-val
|
||||
(fn
|
||||
(x)
|
||||
(if (and (= (type-of x) "dict") (has-key? x :ravel)) (disclose x) x)))
|
||||
|
||||
(define feed/-elem? (fn (x lst) (some (fn (y) (equal? x y)) lst)))
|
||||
|
||||
(define
|
||||
feed/-distinct
|
||||
(fn
|
||||
(lst)
|
||||
(if
|
||||
(= (len lst) 0)
|
||||
(list)
|
||||
(get (apl-unique (make-array (list (len lst)) lst)) :ravel))))
|
||||
|
||||
; rank-2 matrix -> rank-1 stream of its ravel
|
||||
(define feed/-flatten (fn (arr) (feed/stream (get arr :ravel))))
|
||||
|
||||
; distinct receivers across the whole graph, sorted for determinism
|
||||
; (dict key order is unspecified, so sort to pin audience/recipient ordering)
|
||||
(define
|
||||
feed/audience
|
||||
(fn
|
||||
(graph)
|
||||
(sort
|
||||
(feed/-distinct
|
||||
(reduce
|
||||
(fn (acc k) (append acc (feed/followers graph k)))
|
||||
(list)
|
||||
(keys graph))))))
|
||||
|
||||
; --- the outer product ------------------------------------------------------
|
||||
|
||||
; one (activity, follower) inbox event, enclosed so apl-outer keeps the dict
|
||||
(define feed/-mk-event (fn (a f) (enclose {:activity (feed/-val a) :to (feed/-val f)})))
|
||||
|
||||
; keep events where :to actually follows the activity's actor
|
||||
(define
|
||||
feed/-edge?
|
||||
(fn
|
||||
(graph)
|
||||
(fn
|
||||
(ev)
|
||||
(feed/-elem?
|
||||
(get ev :to)
|
||||
(feed/followers graph (get (get ev :activity) :actor))))))
|
||||
|
||||
; fanout — activities ∘.× audience, flatten, guard-keep real edges
|
||||
(define
|
||||
feed/fanout
|
||||
(fn
|
||||
(stream graph)
|
||||
(let
|
||||
((matrix (apl-outer feed/-mk-event stream (feed/stream (feed/audience graph)))))
|
||||
(feed/filter (feed/-flatten matrix) (feed/-edge? graph)))))
|
||||
|
||||
; --- inbox queries ----------------------------------------------------------
|
||||
|
||||
(define
|
||||
feed/inbox-for
|
||||
(fn
|
||||
(inbox user)
|
||||
(feed/filter inbox (fn (ev) (equal? (get ev :to) user)))))
|
||||
|
||||
(define
|
||||
feed/recipients
|
||||
(fn
|
||||
(inbox)
|
||||
(feed/-distinct (map (fn (ev) (get ev :to)) (feed/items inbox)))))
|
||||
|
||||
; the activities (unwrapped) destined for a user
|
||||
(define
|
||||
feed/inbox-activities
|
||||
(fn
|
||||
(inbox user)
|
||||
(map
|
||||
(fn (ev) (get ev :activity))
|
||||
(feed/items (feed/inbox-for inbox user)))))
|
||||
60
lib/feed/fed.sx
Normal file
60
lib/feed/fed.sx
Normal file
@@ -0,0 +1,60 @@
|
||||
; feed/fed — federation. Outbound: a local post fans out, then splits into local
|
||||
; vs remote inboxes; remote events are handed to an injected send-fn. Inbound:
|
||||
; peer activities merge into the local stream, deduped. Backfill: pull peer
|
||||
; history via an injected fetch-fn and merge.
|
||||
;
|
||||
; remote? / send-fn / fetch-fn are injected so real fed-sx transport wires in here
|
||||
; without feed depending on it.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx,
|
||||
; lib/feed/dedupe.sx.
|
||||
|
||||
; --- merge / ingest ---------------------------------------------------------
|
||||
|
||||
(define
|
||||
feed/merge
|
||||
(fn (s1 s2) (feed/stream (append (feed/items s1) (feed/items s2)))))
|
||||
|
||||
; merge a peer stream into local, dropping (actor verb object) duplicates
|
||||
(define
|
||||
feed/ingest
|
||||
(fn (local peer) (feed/dedupe-activities (feed/merge local peer))))
|
||||
|
||||
; --- inbound ----------------------------------------------------------------
|
||||
|
||||
; peer pushes raw activities to the local inbox; normalize + ingest
|
||||
(define
|
||||
feed/inbound
|
||||
(fn
|
||||
(local raw-activities)
|
||||
(feed/ingest local (feed/stream (map feed/normalize raw-activities)))))
|
||||
|
||||
; backfill on subscribe: pull peer history via fetch-fn, normalize, ingest
|
||||
(define
|
||||
feed/backfill
|
||||
(fn (local fetch-fn peer-id) (feed/inbound local (fetch-fn peer-id))))
|
||||
|
||||
; --- outbound ---------------------------------------------------------------
|
||||
|
||||
; split an inbox into local vs remote deliveries by viewer-id predicate
|
||||
(define feed/partition-inbox (fn (inbox remote?) {:local (feed/filter inbox (fn (ev) (not (remote? (get ev :to))))) :remote (feed/filter inbox (fn (ev) (remote? (get ev :to))))}))
|
||||
|
||||
; fan a stream out over the graph, then partition by locality
|
||||
(define
|
||||
feed/federate
|
||||
(fn
|
||||
(stream graph remote?)
|
||||
(feed/partition-inbox (feed/fanout stream graph) remote?)))
|
||||
|
||||
; deliver: hand each remote event to send-fn, return the local inbox to enqueue
|
||||
(define
|
||||
feed/deliver
|
||||
(fn
|
||||
(stream graph remote? send-fn)
|
||||
(let
|
||||
((parts (feed/federate stream graph remote?)))
|
||||
(begin
|
||||
(for-each
|
||||
(fn (ev) (send-fn (get ev :to) (get ev :activity)))
|
||||
(feed/items (get parts :remote)))
|
||||
(get parts :local)))))
|
||||
23
lib/feed/home.sx
Normal file
23
lib/feed/home.sx
Normal file
@@ -0,0 +1,23 @@
|
||||
; feed/home — the capstone. A user's home timeline is the whole pipeline as one
|
||||
; line: fan all activities out over the follow graph, take the events landing in
|
||||
; the viewer's inbox, dedupe cross-posts, apply the viewer's ACL, rank, take N.
|
||||
;
|
||||
; Requires: fanout.sx, dedupe.sx, acl.sx (feed/timeline), rank.sx, stream.sx.
|
||||
|
||||
; the activities in a user's inbox, as a stream
|
||||
(define
|
||||
feed/inbox-stream
|
||||
(fn (inbox user) (feed/stream (feed/inbox-activities inbox user))))
|
||||
|
||||
; fanout ∘ inbox ∘ dedupe ∘ ACL ∘ rank ∘ take
|
||||
(define
|
||||
feed/home
|
||||
(fn
|
||||
(stream graph viewer permit? score-fn n)
|
||||
(feed/timeline
|
||||
(feed/dedupe-activities
|
||||
(feed/inbox-stream (feed/fanout stream graph) viewer))
|
||||
viewer
|
||||
permit?
|
||||
score-fn
|
||||
n)))
|
||||
44
lib/feed/mute.sx
Normal file
44
lib/feed/mute.sx
Normal file
@@ -0,0 +1,44 @@
|
||||
; feed/mute — viewer-controlled filtering. ACL (acl.sx) is author-controlled
|
||||
; visibility; mute is the reader's own preference: hide muted actors or tags.
|
||||
; Like ACL it is per-viewer and applied per request, never cached.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-elem?).
|
||||
|
||||
; drop activities authored by a muted actor
|
||||
(define
|
||||
feed/mute-actors
|
||||
(fn
|
||||
(stream actors)
|
||||
(feed/filter
|
||||
stream
|
||||
(fn (a) (not (feed/-elem? (get a :actor) actors))))))
|
||||
|
||||
; drop activities carrying any muted tag
|
||||
(define
|
||||
feed/mute-tags
|
||||
(fn
|
||||
(stream tags)
|
||||
(feed/filter
|
||||
stream
|
||||
(fn (a) (not (some (fn (t) (feed/-elem? t tags)) (get a :tags)))))))
|
||||
|
||||
; drop activities about a muted object (thread mute)
|
||||
(define
|
||||
feed/mute-objects
|
||||
(fn
|
||||
(stream objects)
|
||||
(feed/filter
|
||||
stream
|
||||
(fn (a) (not (feed/-elem? (get a :object) objects))))))
|
||||
|
||||
; apply a viewer preference bag: {:mute-actors (...) :mute-tags (...) :mute-objects (...)}
|
||||
(define
|
||||
feed/apply-prefs
|
||||
(fn
|
||||
(stream prefs)
|
||||
(feed/mute-objects
|
||||
(feed/mute-tags
|
||||
(feed/mute-actors stream (get prefs :mute-actors (list)))
|
||||
(get prefs :mute-tags (list)))
|
||||
(get prefs :mute-objects (list)))))
|
||||
31
lib/feed/normalize.sx
Normal file
31
lib/feed/normalize.sx
Normal file
@@ -0,0 +1,31 @@
|
||||
; feed/normalize — coerce arbitrary input into the canonical activity record.
|
||||
; An activity is a small dict {:actor :verb :object :at :tags}; a stream is an
|
||||
; APL vector of such dicts (see stream.sx). Extra keys on the raw input survive
|
||||
; (e.g. :visible-to for ACL, peer metadata for federation) — :tags is the
|
||||
; flexible bag but the record is not closed.
|
||||
|
||||
(define feed/activity-keys (list :actor :verb :object :at :tags))
|
||||
|
||||
(define
|
||||
feed/normalize
|
||||
(fn
|
||||
(raw)
|
||||
(let
|
||||
((d (if (= (type-of raw) "dict") raw {})))
|
||||
(merge d {:actor (get d :actor "") :object (get d :object nil) :at (get d :at 0) :tags (let ((t (get d :tags (list)))) (if (list? t) t (list t))) :verb (get d :verb "post")}))))
|
||||
|
||||
(define
|
||||
feed/activity
|
||||
(fn (actor verb object at tags) (feed/normalize {:actor actor :object object :at at :tags tags :verb verb})))
|
||||
|
||||
(define feed/actor (fn (a) (get a :actor)))
|
||||
(define feed/verb (fn (a) (get a :verb)))
|
||||
(define feed/object (fn (a) (get a :object)))
|
||||
(define feed/at (fn (a) (get a :at)))
|
||||
(define feed/tags (fn (a) (get a :tags)))
|
||||
|
||||
(define
|
||||
feed/activity?
|
||||
(fn
|
||||
(a)
|
||||
(and (= (type-of a) "dict") (has-key? a :actor) (has-key? a :verb))))
|
||||
45
lib/feed/notify.sx
Normal file
45
lib/feed/notify.sx
Normal file
@@ -0,0 +1,45 @@
|
||||
; feed/notify — a notification feed is a thin layer over a recipient's inbox:
|
||||
; the events directed at a user, optionally verb-filtered, and a digest that
|
||||
; collapses "alice, bob and 1 other liked X" by (verb, object).
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/inbox-for, feed/-elem?).
|
||||
|
||||
; all inbox events for a user (their raw notifications)
|
||||
(define feed/notifications (fn (inbox user) (feed/inbox-for inbox user)))
|
||||
|
||||
; restrict to notification-worthy verbs (e.g. (list "like" "reply" "follow"))
|
||||
(define
|
||||
feed/notify-verbs
|
||||
(fn
|
||||
(inbox user verbs)
|
||||
(feed/filter
|
||||
(feed/inbox-for inbox user)
|
||||
(fn (ev) (feed/-elem? (get (get ev :activity) :verb) verbs)))))
|
||||
|
||||
; group key "verb|object" — deterministic, sortable
|
||||
(define
|
||||
feed/-notify-key
|
||||
(fn
|
||||
(ev)
|
||||
(let
|
||||
((a (get ev :activity)))
|
||||
(string-append (get a :verb) "|" (get a :object)))))
|
||||
|
||||
; digest: one entry per (verb, object) with the distinct actors and a count,
|
||||
; ordered by key for determinism.
|
||||
(define
|
||||
feed/notify-digest
|
||||
(fn
|
||||
(inbox user)
|
||||
(let
|
||||
((events (feed/items (feed/inbox-for inbox user))))
|
||||
(let
|
||||
((groups (reduce (fn (g ev) (let ((a (get ev :activity)) (k (feed/-notify-key ev))) (let ((cur (get g k {:object (get a :object) :actors (list) :verb (get a :verb)}))) (assoc g k (assoc cur :actors (append (get cur :actors) (list (get a :actor)))))))) {} events)))
|
||||
(map
|
||||
(fn
|
||||
(k)
|
||||
(let
|
||||
((grp (get groups k)))
|
||||
(assoc grp :count (len (get grp :actors)))))
|
||||
(sort (keys groups)))))))
|
||||
50
lib/feed/page.sx
Normal file
50
lib/feed/page.sx
Normal file
@@ -0,0 +1,50 @@
|
||||
; feed/page — pagination. Offset/limit for indexed access, and cursor-based
|
||||
; (by :at) for recency feeds, which is stable under inserts: a cursor is the
|
||||
; :at of the last item seen, and the next page is the newest items older than it.
|
||||
;
|
||||
; Requires: lib/feed/stream.sx (feed/recent, feed/take, feed/filter).
|
||||
|
||||
; --- offset / limit ---------------------------------------------------------
|
||||
|
||||
(define
|
||||
feed/page
|
||||
(fn
|
||||
(stream offset limit)
|
||||
(feed/stream (take (drop (feed/items stream) offset) limit))))
|
||||
|
||||
(define
|
||||
feed/page-count
|
||||
(fn (stream limit) (ceil (/ (feed/count stream) limit))))
|
||||
|
||||
; --- cursor (recency feeds) -------------------------------------------------
|
||||
|
||||
; activities strictly older than cursor (scroll down / load older)
|
||||
(define
|
||||
feed/before
|
||||
(fn
|
||||
(stream cursor)
|
||||
(feed/filter stream (fn (a) (< (get a :at) cursor)))))
|
||||
|
||||
; activities strictly newer than cursor (load newer / "N new posts")
|
||||
(define
|
||||
feed/after
|
||||
(fn
|
||||
(stream cursor)
|
||||
(feed/filter stream (fn (a) (> (get a :at) cursor)))))
|
||||
|
||||
; one page: the `limit` newest activities older than cursor, newest first
|
||||
(define
|
||||
feed/page-before
|
||||
(fn
|
||||
(stream cursor limit)
|
||||
(feed/take (feed/recent (feed/before stream cursor)) limit)))
|
||||
|
||||
; cursor to fetch the next (older) page: :at of the last item of a page,
|
||||
; or nil when the page is empty (end of feed)
|
||||
(define
|
||||
feed/next-cursor
|
||||
(fn
|
||||
(page)
|
||||
(let
|
||||
((items (feed/items page)))
|
||||
(if (= (len items) 0) nil (get (last items) :at)))))
|
||||
92
lib/feed/rank.sx
Normal file
92
lib/feed/rank.sx
Normal file
@@ -0,0 +1,92 @@
|
||||
; feed/rank — scoring + ranking. Scorers are (activity -> number). Ranking is a
|
||||
; stable two-pass grade-down: first by :at descending (the tiebreak), then by
|
||||
; score descending — so ties resolve by recency, then by input order. Fully
|
||||
; deterministic on ties.
|
||||
;
|
||||
; Requires: lib/apl/runtime.sx, lib/feed/normalize.sx, lib/feed/stream.sx.
|
||||
|
||||
; --- scorers ----------------------------------------------------------------
|
||||
|
||||
; recency: half-life decay. score = 0.5 ^ (age / half-life). at==now -> 1.0.
|
||||
(define
|
||||
feed/recency
|
||||
(fn
|
||||
(now half-life)
|
||||
(fn (a) (expt 0.5 (/ (- now (get a :at)) half-life)))))
|
||||
|
||||
; velocity: how many of this actor's activities fall in (at-window, at] —
|
||||
; a burst of recent activity scores higher.
|
||||
(define
|
||||
feed/velocity
|
||||
(fn
|
||||
(stream window)
|
||||
(fn
|
||||
(a)
|
||||
(len
|
||||
(filter
|
||||
(fn
|
||||
(b)
|
||||
(and
|
||||
(equal? (get b :actor) (get a :actor))
|
||||
(<= (get b :at) (get a :at))
|
||||
(> (get b :at) (- (get a :at) window))))
|
||||
(feed/items stream))))))
|
||||
|
||||
; engagement: how many activities in the stream touch this activity's :object
|
||||
(define
|
||||
feed/engagement
|
||||
(fn
|
||||
(stream)
|
||||
(fn
|
||||
(a)
|
||||
(len
|
||||
(filter
|
||||
(fn (b) (equal? (get b :object) (get a :object)))
|
||||
(feed/items stream))))))
|
||||
|
||||
; composite: weighted sum. parts = (list (list weight scorer) ...)
|
||||
(define
|
||||
feed/composite
|
||||
(fn
|
||||
(parts)
|
||||
(fn
|
||||
(a)
|
||||
(reduce
|
||||
(fn (acc p) (+ acc (* (first p) ((nth p 1) a))))
|
||||
0
|
||||
parts))))
|
||||
|
||||
; --- ranking ----------------------------------------------------------------
|
||||
|
||||
; stable reorder of items by key-fn, descending (grade-down is stable)
|
||||
(define
|
||||
feed/-desc-by
|
||||
(fn
|
||||
(items key-fn)
|
||||
(let
|
||||
((keys (make-array (list (len items)) (map key-fn items))))
|
||||
(let
|
||||
((order (get (apl-grade-down keys) :ravel)))
|
||||
(map (fn (i) (nth items (- i 1))) order)))))
|
||||
|
||||
; rank by score descending; ties -> :at descending -> input order
|
||||
(define
|
||||
feed/rank
|
||||
(fn
|
||||
(stream score-fn)
|
||||
(let
|
||||
((by-at (feed/-desc-by (feed/items stream) feed/at)))
|
||||
(feed/stream (feed/-desc-by by-at score-fn)))))
|
||||
|
||||
; attach a :score to each activity (for inspection / debugging)
|
||||
(define
|
||||
feed/with-scores
|
||||
(fn
|
||||
(stream score-fn)
|
||||
(feed/stream
|
||||
(map (fn (a) (assoc a :score (score-fn a))) (feed/items stream)))))
|
||||
|
||||
; top-N ranked timeline
|
||||
(define
|
||||
feed/top
|
||||
(fn (stream score-fn n) (feed/take (feed/rank stream score-fn) n)))
|
||||
19
lib/feed/scoreboard.json
Normal file
19
lib/feed/scoreboard.json
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"suites": {
|
||||
"basic": {"pass": 30, "fail": 0},
|
||||
"fanout": {"pass": 29, "fail": 0},
|
||||
"rank": {"pass": 24, "fail": 0},
|
||||
"integration": {"pass": 22, "fail": 0},
|
||||
"content": {"pass": 15, "fail": 0},
|
||||
"notify": {"pass": 8, "fail": 0},
|
||||
"home": {"pass": 6, "fail": 0},
|
||||
"dedupe": {"pass": 9, "fail": 0},
|
||||
"trending": {"pass": 11, "fail": 0},
|
||||
"mute": {"pass": 9, "fail": 0},
|
||||
"page": {"pass": 14, "fail": 0},
|
||||
"thread": {"pass": 12, "fail": 0}
|
||||
},
|
||||
"total_pass": 189,
|
||||
"total_fail": 0,
|
||||
"total": 189
|
||||
}
|
||||
19
lib/feed/scoreboard.md
Normal file
19
lib/feed/scoreboard.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# feed Conformance Scoreboard
|
||||
|
||||
_Generated by `lib/feed/conformance.sh`_
|
||||
|
||||
| Suite | Pass | Fail | Total |
|
||||
|-------|-----:|-----:|------:|
|
||||
| basic | 30 | 0 | 30 |
|
||||
| fanout | 29 | 0 | 29 |
|
||||
| rank | 24 | 0 | 24 |
|
||||
| integration | 22 | 0 | 22 |
|
||||
| content | 15 | 0 | 15 |
|
||||
| notify | 8 | 0 | 8 |
|
||||
| home | 6 | 0 | 6 |
|
||||
| dedupe | 9 | 0 | 9 |
|
||||
| trending | 11 | 0 | 11 |
|
||||
| mute | 9 | 0 | 9 |
|
||||
| page | 14 | 0 | 14 |
|
||||
| thread | 12 | 0 | 12 |
|
||||
| **Total** | **189** | **0** | **189** |
|
||||
75
lib/feed/stream.sx
Normal file
75
lib/feed/stream.sx
Normal file
@@ -0,0 +1,75 @@
|
||||
; feed/stream — a stream is an APL vector (rank-1 array) whose ravel holds
|
||||
; activity dicts. Operations lift APL primitives onto this shape: filter via
|
||||
; compress (/), sort via grade (⍋), take via ↑, reverse via ⌽.
|
||||
;
|
||||
; Requires: lib/apl/runtime.sx, lib/feed/normalize.sx (loaded by harness).
|
||||
|
||||
(define feed/stream (fn (acts) (make-array (list (len acts)) acts)))
|
||||
|
||||
(define feed/items (fn (s) (get s :ravel)))
|
||||
|
||||
(define feed/count (fn (s) (len (get s :ravel))))
|
||||
|
||||
(define feed/empty (feed/stream (list)))
|
||||
|
||||
(define feed/empty? (fn (s) (= (feed/count s) 0)))
|
||||
|
||||
; filter — bool mask ∘ compress. pred : activity -> truthy
|
||||
(define
|
||||
feed/filter
|
||||
(fn
|
||||
(s pred)
|
||||
(let
|
||||
((items (get s :ravel)))
|
||||
(let
|
||||
((mask (make-array (list (len items)) (map (fn (a) (if (pred a) 1 0)) items))))
|
||||
(apl-compress mask s)))))
|
||||
|
||||
; sort-by — ascending, stable on ties (grade-up is stable). key-fn : activity -> number
|
||||
(define
|
||||
feed/sort-by
|
||||
(fn
|
||||
(s key-fn)
|
||||
(let
|
||||
((items (get s :ravel)))
|
||||
(let
|
||||
((keys (make-array (list (len items)) (map key-fn items))))
|
||||
(let
|
||||
((order (get (apl-grade-up keys) :ravel)))
|
||||
(feed/stream (map (fn (i) (nth items (- i 1))) order)))))))
|
||||
|
||||
(define feed/sort-by-at (fn (s) (feed/sort-by s feed/at)))
|
||||
|
||||
; newest-first: ascending sort then reverse (⌽)
|
||||
(define feed/recent (fn (s) (apl-reverse (feed/sort-by-at s))))
|
||||
|
||||
; take N (↑), clamped to stream length so it never over-takes/pads
|
||||
(define
|
||||
feed/take
|
||||
(fn
|
||||
(s n)
|
||||
(let
|
||||
((c (feed/count s)))
|
||||
(if (>= n c) s (apl-take (apl-scalar n) s)))))
|
||||
|
||||
(define feed/reverse (fn (s) (apl-reverse s)))
|
||||
|
||||
; common predicates
|
||||
(define
|
||||
feed/by-actor
|
||||
(fn (s actor) (feed/filter s (fn (a) (equal? (get a :actor) actor)))))
|
||||
|
||||
(define
|
||||
feed/by-verb
|
||||
(fn (s verb) (feed/filter s (fn (a) (equal? (get a :verb) verb)))))
|
||||
|
||||
(define
|
||||
feed/by-object
|
||||
(fn
|
||||
(s object)
|
||||
(feed/filter s (fn (a) (equal? (get a :object) object)))))
|
||||
|
||||
; activities at or after timestamp t
|
||||
(define
|
||||
feed/since
|
||||
(fn (s t) (feed/filter s (fn (a) (>= (get a :at) t)))))
|
||||
118
lib/feed/tests/basic.sx
Normal file
118
lib/feed/tests/basic.sx
Normal file
@@ -0,0 +1,118 @@
|
||||
; Phase 1 — normalize, stream ops, api. Uses the feed-test harness
|
||||
; (feed-test name got expected) provided by conformance.sh.
|
||||
|
||||
; ---------- normalize ----------
|
||||
|
||||
(feed-test
|
||||
"normalize default actor"
|
||||
(feed/actor (feed/normalize {}))
|
||||
"")
|
||||
(feed-test
|
||||
"normalize default verb"
|
||||
(feed/verb (feed/normalize {}))
|
||||
"post")
|
||||
(feed-test
|
||||
"normalize default at"
|
||||
(feed/at (feed/normalize {}))
|
||||
0)
|
||||
(feed-test
|
||||
"normalize default object"
|
||||
(feed/object (feed/normalize {}))
|
||||
nil)
|
||||
(feed-test
|
||||
"normalize default tags"
|
||||
(feed/tags (feed/normalize {}))
|
||||
(list))
|
||||
(feed-test
|
||||
"normalize keeps actor"
|
||||
(feed/actor (feed/normalize {:actor "alice"}))
|
||||
"alice")
|
||||
(feed-test
|
||||
"normalize keeps verb"
|
||||
(feed/verb (feed/normalize {:verb "like"}))
|
||||
"like")
|
||||
(feed-test
|
||||
"normalize scalar tag -> list"
|
||||
(feed/tags (feed/normalize {:tags "x"}))
|
||||
(list "x"))
|
||||
(feed-test
|
||||
"normalize list tags kept"
|
||||
(feed/tags (feed/normalize {:tags (list "a" "b")}))
|
||||
(list "a" "b"))
|
||||
(feed-test
|
||||
"activity constructor at"
|
||||
(feed/at (feed/activity "a" "post" "o" 5 (list)))
|
||||
5)
|
||||
(feed-test
|
||||
"activity? on activity"
|
||||
(feed/activity? (feed/normalize {:actor "a"}))
|
||||
true)
|
||||
(feed-test "activity? on number" (feed/activity? 5) false)
|
||||
(feed-test "activity? on bare dict" (feed/activity? {:foo 1}) false)
|
||||
|
||||
; ---------- stream ----------
|
||||
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 30 (list))
|
||||
(feed/activity "bob" "like" "p1" 10 (list))
|
||||
(feed/activity "alice" "post" "p2" 20 (list)))))
|
||||
|
||||
(feed-test "stream count" (feed/count S) 3)
|
||||
(feed-test "stream items len" (len (feed/items S)) 3)
|
||||
(feed-test
|
||||
"sort-by-at actors asc"
|
||||
(map feed/actor (feed/items (feed/sort-by-at S)))
|
||||
(list "bob" "alice" "alice"))
|
||||
(feed-test
|
||||
"recent newest first"
|
||||
(map feed/at (feed/items (feed/recent S)))
|
||||
(list 30 20 10))
|
||||
(feed-test
|
||||
"take 2 of recent"
|
||||
(feed/count (feed/take (feed/recent S) 2))
|
||||
2)
|
||||
(feed-test
|
||||
"take clamps past end"
|
||||
(feed/count (feed/take S 10))
|
||||
3)
|
||||
(feed-test
|
||||
"by-actor alice count"
|
||||
(feed/count (feed/by-actor S "alice"))
|
||||
2)
|
||||
(feed-test
|
||||
"by-verb like actor"
|
||||
(map feed/actor (feed/items (feed/by-verb S "like")))
|
||||
(list "bob"))
|
||||
(feed-test
|
||||
"by-object p1 count"
|
||||
(feed/count (feed/by-object S "p1"))
|
||||
2)
|
||||
(feed-test
|
||||
"since 20 count"
|
||||
(feed/count (feed/since S 20))
|
||||
2)
|
||||
(feed-test
|
||||
"reverse ats"
|
||||
(map feed/at (feed/items (feed/reverse S)))
|
||||
(list 20 10 30))
|
||||
(feed-test "empty? on empty" (feed/empty? feed/empty) true)
|
||||
(feed-test
|
||||
"empty? on filtered-out"
|
||||
(feed/empty? (feed/by-actor S "zzz"))
|
||||
true)
|
||||
|
||||
; ---------- api ----------
|
||||
|
||||
(feed/reset!)
|
||||
(feed/post {:actor "x" :at 1 :verb "post"})
|
||||
(feed/post {:actor "y" :at 2 :verb "like"})
|
||||
(feed-test "api size after posts" (feed/size) 2)
|
||||
(feed-test "api all count" (feed/count (feed/all)) 2)
|
||||
(feed-test
|
||||
"post returns normalized verb"
|
||||
(feed/verb (feed/post {:actor "z"}))
|
||||
"post")
|
||||
(feed-test "api size after third post" (feed/size) 3)
|
||||
85
lib/feed/tests/content.sx
Normal file
85
lib/feed/tests/content.sx
Normal file
@@ -0,0 +1,85 @@
|
||||
; Follow-up — TF-IDF content ranking over :tags. (feed-test name got expected)
|
||||
|
||||
(define
|
||||
corpus
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "u" :object "o1" :at 10 :tags (list "cats" "funny")})
|
||||
(feed/normalize {:actor "u" :object "o2" :at 20 :tags (list "cats" "news")})
|
||||
(feed/normalize {:actor "u" :object "o3" :at 30 :tags (list "politics" "news")})
|
||||
(feed/normalize {:actor "u" :object "o4" :at 40 :tags (list "cats")}))))
|
||||
|
||||
; ---------- document frequency ----------
|
||||
|
||||
(feed-test "df cats" (get (feed/tag-df corpus) "cats") 3)
|
||||
(feed-test "df news" (get (feed/tag-df corpus) "news") 2)
|
||||
(feed-test "df funny" (get (feed/tag-df corpus) "funny") 1)
|
||||
(feed-test "df politics" (get (feed/tag-df corpus) "politics") 1)
|
||||
(feed-test "df full" (feed/tag-df corpus) {:news 2 :funny 1 :politics 1 :cats 3})
|
||||
|
||||
; ---------- inverse document frequency ----------
|
||||
|
||||
(feed-test
|
||||
"idf news = log(4/2)"
|
||||
(get (feed/tag-idf corpus) "news")
|
||||
(log 2))
|
||||
(feed-test
|
||||
"idf funny = log(4/1)"
|
||||
(get (feed/tag-idf corpus) "funny")
|
||||
(log 4))
|
||||
(feed-test
|
||||
"rarer tag has higher idf"
|
||||
(>
|
||||
(get (feed/tag-idf corpus) "funny")
|
||||
(get (feed/tag-idf corpus) "cats"))
|
||||
true)
|
||||
|
||||
; ---------- tf-idf scoring ----------
|
||||
|
||||
(define idf (feed/tag-idf corpus))
|
||||
|
||||
(feed-test
|
||||
"score query funny on o1"
|
||||
((feed/tfidf-score idf (list "funny")) (feed/normalize {:actor "u" :object "x" :tags (list "cats" "funny")}))
|
||||
(log 4))
|
||||
(feed-test
|
||||
"score query funny on non-match"
|
||||
((feed/tfidf-score idf (list "funny")) (feed/normalize {:actor "u" :object "x" :tags (list "cats")}))
|
||||
0)
|
||||
(feed-test
|
||||
"unknown query tag scores 0"
|
||||
((feed/tfidf-score idf (list "zzz")) (feed/normalize {:actor "u" :object "x" :tags (list "cats")}))
|
||||
0)
|
||||
|
||||
; ---------- ranking by relevance ----------
|
||||
|
||||
; query news: o2,o3 match (score log2), o1,o4 don't (0); ties break by :at desc
|
||||
(feed-test
|
||||
"by-relevance news order"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/by-relevance corpus (list "news"))))
|
||||
(list "o3" "o2" "o4" "o1"))
|
||||
|
||||
; query funny: only o1 matches -> ranks first
|
||||
(feed-test
|
||||
"by-relevance funny first"
|
||||
(get
|
||||
(nth (feed/items (feed/by-relevance corpus (list "funny"))) 0)
|
||||
:object)
|
||||
"o1")
|
||||
|
||||
; query (cats news): o2 carries both tags -> highest combined tf-idf
|
||||
(feed-test
|
||||
"by-relevance cats+news top"
|
||||
(get
|
||||
(nth
|
||||
(feed/items (feed/by-relevance corpus (list "cats" "news")))
|
||||
0)
|
||||
:object)
|
||||
"o2")
|
||||
|
||||
(feed-test
|
||||
"by-relevance preserves count"
|
||||
(feed/count (feed/by-relevance corpus (list "cats")))
|
||||
4)
|
||||
56
lib/feed/tests/dedupe.sx
Normal file
56
lib/feed/tests/dedupe.sx
Normal file
@@ -0,0 +1,56 @@
|
||||
; Follow-up — verb-aware (smart) dedupe. (feed-test name got expected)
|
||||
|
||||
; reactions (like/follow) collapse cross-actor; posts stay distinct per actor
|
||||
(define
|
||||
M
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "like" "X" 1 (list))
|
||||
(feed/activity "bob" "like" "X" 2 (list))
|
||||
(feed/activity "alice" "post" "P" 3 (list))
|
||||
(feed/activity "bob" "post" "P" 4 (list))
|
||||
(feed/activity "alice" "follow" "C" 5 (list))
|
||||
(feed/activity "bob" "follow" "C" 6 (list))))) ; collapses
|
||||
|
||||
(feed-test
|
||||
"smart dedupe total"
|
||||
(feed/count (feed/dedupe-smart M))
|
||||
4)
|
||||
(feed-test
|
||||
"smart keeps both posts"
|
||||
(feed/count (feed/by-verb (feed/dedupe-smart M) "post"))
|
||||
2)
|
||||
(feed-test
|
||||
"smart collapses likes to one"
|
||||
(feed/count (feed/by-verb (feed/dedupe-smart M) "like"))
|
||||
1)
|
||||
(feed-test
|
||||
"smart collapses follows to one"
|
||||
(feed/count (feed/by-verb (feed/dedupe-smart M) "follow"))
|
||||
1)
|
||||
(feed-test
|
||||
"collapsed like keeps first actor"
|
||||
(map feed/actor (feed/items (feed/by-verb (feed/dedupe-smart M) "like")))
|
||||
(list "alice"))
|
||||
|
||||
; contrast: plain activity dedupe keeps cross-actor likes distinct
|
||||
(feed-test
|
||||
"activity dedupe keeps both likes"
|
||||
(feed/count (feed/by-verb (feed/dedupe-activities M) "like"))
|
||||
2)
|
||||
|
||||
; contrast: blanket collapse folds the two posts (same verb+object) too
|
||||
(feed-test
|
||||
"collapse dedupe folds posts"
|
||||
(feed/count (feed/by-verb (feed/dedupe-collapse M) "post"))
|
||||
1)
|
||||
|
||||
; smart-key dispatch
|
||||
(feed-test
|
||||
"smart-key reaction -> (verb object)"
|
||||
(feed/smart-key (feed/activity "alice" "like" "X" 0 (list)))
|
||||
(list "like" "X"))
|
||||
(feed-test
|
||||
"smart-key post -> (actor verb object)"
|
||||
(feed/smart-key (feed/activity "alice" "post" "P" 0 (list)))
|
||||
(list "alice" "post" "P"))
|
||||
187
lib/feed/tests/fanout.sx
Normal file
187
lib/feed/tests/fanout.sx
Normal file
@@ -0,0 +1,187 @@
|
||||
; Phase 2 — fanout via outer product + dedupe. (feed-test name got expected)
|
||||
|
||||
; ---------- graph ----------
|
||||
|
||||
; edges: (follower followee). bob,carol follow alice; carol,dave follow bob.
|
||||
(define
|
||||
G
|
||||
(feed/follow-graph
|
||||
(list
|
||||
(list "bob" "alice")
|
||||
(list "carol" "alice")
|
||||
(list "carol" "bob")
|
||||
(list "dave" "bob"))))
|
||||
|
||||
(feed-test "followers alice" (feed/followers G "alice") (list "bob" "carol"))
|
||||
(feed-test "followers bob" (feed/followers G "bob") (list "carol" "dave"))
|
||||
(feed-test "followers unknown" (feed/followers G "zzz") (list))
|
||||
(feed-test "audience distinct" (feed/audience G) (list "bob" "carol" "dave"))
|
||||
|
||||
; ---------- fanout ----------
|
||||
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 10 (list))
|
||||
(feed/activity "alice" "post" "p2" 20 (list))
|
||||
(feed/activity "bob" "like" "p1" 30 (list)))))
|
||||
|
||||
(define IB (feed/fanout S G))
|
||||
|
||||
(feed-test "fanout total edges" (feed/count IB) 6)
|
||||
(feed-test
|
||||
"inbox bob count"
|
||||
(feed/count (feed/inbox-for IB "bob"))
|
||||
2)
|
||||
(feed-test
|
||||
"inbox carol count"
|
||||
(feed/count (feed/inbox-for IB "carol"))
|
||||
3)
|
||||
(feed-test
|
||||
"inbox dave count"
|
||||
(feed/count (feed/inbox-for IB "dave"))
|
||||
1)
|
||||
(feed-test
|
||||
"inbox alice (follows none)"
|
||||
(feed/count (feed/inbox-for IB "alice"))
|
||||
0)
|
||||
(feed-test
|
||||
"recipients order"
|
||||
(feed/recipients IB)
|
||||
(list "bob" "carol" "dave"))
|
||||
(feed-test
|
||||
"bob inbox objects"
|
||||
(map (fn (a) (get a :object)) (feed/inbox-activities IB "bob"))
|
||||
(list "p1" "p2"))
|
||||
(feed-test
|
||||
"dave inbox objects"
|
||||
(map (fn (a) (get a :object)) (feed/inbox-activities IB "dave"))
|
||||
(list "p1"))
|
||||
(feed-test
|
||||
"dave inbox verb"
|
||||
(map (fn (a) (get a :verb)) (feed/inbox-activities IB "dave"))
|
||||
(list "like"))
|
||||
|
||||
; empty graph → no audience → no edges
|
||||
(feed-test
|
||||
"empty graph fanout"
|
||||
(feed/count (feed/fanout S {}))
|
||||
0)
|
||||
|
||||
; actor nobody follows produces no edges
|
||||
(define
|
||||
Sghost
|
||||
(feed/stream (list (feed/activity "ghost" "post" "g1" 5 (list)))))
|
||||
(feed-test
|
||||
"unfollowed actor fanout"
|
||||
(feed/count (feed/fanout Sghost G))
|
||||
0)
|
||||
|
||||
; ---------- high fanout (popular actor) ----------
|
||||
|
||||
(define
|
||||
Gstar
|
||||
(feed/follow-graph
|
||||
(list
|
||||
(list "u1" "star")
|
||||
(list "u2" "star")
|
||||
(list "u3" "star")
|
||||
(list "u4" "star")
|
||||
(list "u5" "star"))))
|
||||
(define
|
||||
Sstar
|
||||
(feed/stream (list (feed/activity "star" "post" "s1" 1 (list)))))
|
||||
(feed-test
|
||||
"star fanout count"
|
||||
(feed/count (feed/fanout Sstar Gstar))
|
||||
5)
|
||||
(feed-test "star audience size" (len (feed/audience Gstar)) 5)
|
||||
|
||||
; ---------- mutual follow ----------
|
||||
|
||||
(define Gmut (feed/follow-graph (list (list "a" "b") (list "b" "a"))))
|
||||
(define
|
||||
Smut
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "a" "post" "pa" 1 (list))
|
||||
(feed/activity "b" "post" "pb" 2 (list)))))
|
||||
(define IBmut (feed/fanout Smut Gmut))
|
||||
(feed-test "mutual total" (feed/count IBmut) 2)
|
||||
(feed-test
|
||||
"mutual a gets pb"
|
||||
(map (fn (x) (get x :object)) (feed/inbox-activities IBmut "a"))
|
||||
(list "pb"))
|
||||
(feed-test
|
||||
"mutual b gets pa"
|
||||
(map (fn (x) (get x :object)) (feed/inbox-activities IBmut "b"))
|
||||
(list "pa"))
|
||||
|
||||
; ---------- dedupe ----------
|
||||
|
||||
(define
|
||||
Sdup2
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 1 (list))
|
||||
(feed/activity "alice" "post" "p1" 9 (list))
|
||||
(feed/activity "alice" "post" "p2" 2 (list)))))
|
||||
(feed-test
|
||||
"dedupe-activities collapses dup"
|
||||
(feed/count (feed/dedupe-activities Sdup2))
|
||||
2)
|
||||
(feed-test
|
||||
"dedupe-activities keeps distinct"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/dedupe-activities Sdup2)))
|
||||
(list "p1" "p2"))
|
||||
|
||||
(define
|
||||
Slikes
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "like" "X" 1 (list))
|
||||
(feed/activity "bob" "like" "X" 2 (list))
|
||||
(feed/activity "carol" "like" "Y" 3 (list)))))
|
||||
(feed-test
|
||||
"collapse cross-actor likes"
|
||||
(feed/count (feed/dedupe-collapse Slikes))
|
||||
2)
|
||||
(feed-test
|
||||
"collapse keeps distinct objects"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/dedupe-collapse Slikes)))
|
||||
(list "X" "Y"))
|
||||
|
||||
(feed-test
|
||||
"activity-key shape"
|
||||
(feed/activity-key (feed/activity "a" "post" "o" 0 (list)))
|
||||
(list "a" "post" "o"))
|
||||
(feed-test
|
||||
"collapse-key shape"
|
||||
(feed/collapse-key (feed/activity "a" "like" "o" 0 (list)))
|
||||
(list "like" "o"))
|
||||
|
||||
; cross-post: alice posts p1 twice → bob's inbox has it twice → dedupe-inbox → once
|
||||
(define
|
||||
Scross
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 1 (list))
|
||||
(feed/activity "alice" "post" "p1" 5 (list)))))
|
||||
(define IBcross (feed/fanout Scross G))
|
||||
(feed-test
|
||||
"cross-post raw bob count"
|
||||
(feed/count (feed/inbox-for IBcross "bob"))
|
||||
2)
|
||||
(feed-test
|
||||
"cross-post deduped bob count"
|
||||
(feed/count (feed/inbox-for (feed/dedupe-inbox IBcross) "bob"))
|
||||
1)
|
||||
(feed-test
|
||||
"dedupe-inbox keeps distinct receivers"
|
||||
(feed/count (feed/dedupe-inbox IBcross))
|
||||
2)
|
||||
73
lib/feed/tests/home.sx
Normal file
73
lib/feed/tests/home.sx
Normal file
@@ -0,0 +1,73 @@
|
||||
; Follow-up — feed/home capstone pipeline. (feed-test name got expected)
|
||||
|
||||
; alice follows star and bob (edges: follower followee)
|
||||
(define
|
||||
G
|
||||
(feed/follow-graph (list (list "alice" "star") (list "alice" "bob"))))
|
||||
|
||||
; star posts s1 then s2; bob posts b1; star re-posts s1 (cross-post dup);
|
||||
; zoe posts z1 (alice does NOT follow zoe)
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "star" "post" "s1" 10 (list))
|
||||
(feed/activity "star" "post" "s2" 20 (list))
|
||||
(feed/activity "bob" "post" "b1" 15 (list))
|
||||
(feed/activity "star" "post" "s1" 5 (list))
|
||||
(feed/activity "zoe" "post" "z1" 30 (list)))))
|
||||
|
||||
(define rec (feed/recency 100 10))
|
||||
|
||||
(feed-test
|
||||
"home count (deduped, followed only)"
|
||||
(feed/count (feed/home S G "alice" feed/permit-public? rec 10))
|
||||
3)
|
||||
|
||||
(feed-test
|
||||
"home order by recency"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/home S G "alice" feed/permit-public? rec 10)))
|
||||
(list "s2" "b1" "s1"))
|
||||
|
||||
(feed-test
|
||||
"home excludes unfollowed zoe"
|
||||
(feed/-elem?
|
||||
"z1"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/home S G "alice" feed/permit-public? rec 10))))
|
||||
false)
|
||||
|
||||
(feed-test
|
||||
"home top-2"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/home S G "alice" feed/permit-public? rec 2)))
|
||||
(list "s2" "b1"))
|
||||
|
||||
(feed-test
|
||||
"home dedupes cross-post (one s1)"
|
||||
(len
|
||||
(filter
|
||||
(fn (o) (equal? o "s1"))
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items
|
||||
(feed/home S G "alice" feed/permit-public? rec 10)))))
|
||||
1)
|
||||
|
||||
; ACL applied per-viewer in the home pipeline
|
||||
(define
|
||||
Sacl
|
||||
(feed/stream
|
||||
(list (feed/normalize {:actor "star" :object "pub" :at 20}) (feed/normalize {:actor "star" :object "sec" :visible-to (list "carol") :at 25}))))
|
||||
(define Gacl (feed/follow-graph (list (list "alice" "star"))))
|
||||
|
||||
(feed-test
|
||||
"home hides activity alice not permitted"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/home Sacl Gacl "alice" feed/permit-acl? rec 10)))
|
||||
(list "pub"))
|
||||
155
lib/feed/tests/integration.sx
Normal file
155
lib/feed/tests/integration.sx
Normal file
@@ -0,0 +1,155 @@
|
||||
; Phase 4 — visibility (ACL) + federation, and the end-to-end timeline.
|
||||
; (feed-test name got expected)
|
||||
|
||||
; ---------- ACL visibility ----------
|
||||
; pub: public. sec: bob, allows carol. dm: frank, allows dave.
|
||||
|
||||
(define
|
||||
C
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "alice" :object "pub" :at 10})
|
||||
(feed/normalize {:actor "bob" :object "sec" :visible-to (list "carol") :at 20})
|
||||
(feed/normalize {:actor "frank" :object "dm" :visible-to (list "dave") :at 30}))))
|
||||
|
||||
(feed-test
|
||||
"public visible to anyone"
|
||||
(feed/count (feed/visible C "zoe" feed/permit-acl?))
|
||||
1)
|
||||
(feed-test
|
||||
"carol sees allowlisted + public"
|
||||
(feed/count (feed/visible C "carol" feed/permit-acl?))
|
||||
2)
|
||||
(feed-test
|
||||
"dave sees dm + public"
|
||||
(feed/count (feed/visible C "dave" feed/permit-acl?))
|
||||
2)
|
||||
(feed-test
|
||||
"author always sees own private"
|
||||
(feed/count (feed/visible C "frank" feed/permit-acl?))
|
||||
2)
|
||||
(feed-test
|
||||
"permit-public? lets all through"
|
||||
(feed/count (feed/visible C "zoe" feed/permit-public?))
|
||||
3)
|
||||
(feed-test
|
||||
"visible objects for dave"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/visible C "dave" feed/permit-acl?)))
|
||||
(list "pub" "dm"))
|
||||
|
||||
; per-viewer: same stream, different timelines
|
||||
(feed-test
|
||||
"zoe timeline differs from carol"
|
||||
(not
|
||||
(=
|
||||
(feed/count (feed/visible C "zoe" feed/permit-acl?))
|
||||
(feed/count (feed/visible C "carol" feed/permit-acl?))))
|
||||
true)
|
||||
|
||||
; ---------- federation: merge / ingest ----------
|
||||
|
||||
(define
|
||||
L
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 10 (list))
|
||||
(feed/activity "alice" "post" "p2" 20 (list)))))
|
||||
(define
|
||||
P
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p2" 20 (list))
|
||||
(feed/activity "peer" "post" "p9" 25 (list)))))
|
||||
|
||||
(feed-test "merge concatenates" (feed/count (feed/merge L P)) 4)
|
||||
(feed-test
|
||||
"ingest dedupes overlap"
|
||||
(feed/count (feed/ingest L P))
|
||||
3)
|
||||
|
||||
(feed-test
|
||||
"inbound normalizes + ingests"
|
||||
(feed/count (feed/inbound L (list {:actor "peer" :object "p9" :at 25} {:actor "alice" :object "p1" :at 10})))
|
||||
3)
|
||||
|
||||
; backfill via injected fetch-fn
|
||||
(define peer-history (fn (peer-id) (list {:actor peer-id :object "h1" :at 1} {:actor peer-id :object "h2" :at 2})))
|
||||
(feed-test
|
||||
"backfill merges peer history"
|
||||
(feed/count (feed/backfill L peer-history "remote"))
|
||||
4)
|
||||
(feed-test
|
||||
"backfill objects present"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items
|
||||
(feed/by-actor (feed/backfill L peer-history "remote") "remote")))
|
||||
(list "h1" "h2"))
|
||||
|
||||
; ---------- federation: outbound partition ----------
|
||||
|
||||
; bob (local), alice@remote + carol@remote (remote) follow star
|
||||
(define
|
||||
Gf
|
||||
(feed/follow-graph
|
||||
(list
|
||||
(list "bob" "star")
|
||||
(list "alice@remote" "star")
|
||||
(list "carol@remote" "star"))))
|
||||
(define
|
||||
Sf
|
||||
(feed/stream (list (feed/activity "star" "post" "s1" 1 (list)))))
|
||||
(define
|
||||
remote?
|
||||
(fn (id) (feed/-elem? id (list "alice@remote" "carol@remote"))))
|
||||
(define parts (feed/federate Sf Gf remote?))
|
||||
|
||||
(feed-test "local deliveries" (feed/count (get parts :local)) 1)
|
||||
(feed-test "remote deliveries" (feed/count (get parts :remote)) 2)
|
||||
(feed-test
|
||||
"local recipient is bob"
|
||||
(feed/recipients (get parts :local))
|
||||
(list "bob"))
|
||||
|
||||
; deliver: send-fn receives each remote event, local inbox returned
|
||||
(define sent (list))
|
||||
(define send-fn (fn (to act) (set! sent (append sent (list to)))))
|
||||
(define local-inbox (feed/deliver Sf Gf remote? send-fn))
|
||||
(feed-test "deliver returns local inbox" (feed/count local-inbox) 1)
|
||||
(feed-test "deliver sent to both remotes" (len sent) 2)
|
||||
(feed-test "deliver remote targets" sent (list "alice@remote" "carol@remote"))
|
||||
|
||||
; ---------- end-to-end: federated, ACL-filtered, ranked timeline ----------
|
||||
|
||||
(define
|
||||
base
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "alice" :object "a1" :at 100})
|
||||
(feed/normalize {:actor "bob" :object "b1" :visible-to (list "carol") :at 90})
|
||||
(feed/normalize {:actor "eve" :object "e1" :visible-to (list "dave") :at 80}))))
|
||||
(define federated (feed/inbound base (list {:actor "peer" :object "x1" :at 110})))
|
||||
(define rec (feed/recency 120 10))
|
||||
(define
|
||||
carol-tl
|
||||
(feed/timeline federated "carol" feed/permit-acl? rec 3))
|
||||
|
||||
; eve's :visible-to excludes carol -> filtered out; peer/alice public, bob allows carol
|
||||
(feed-test "carol federated timeline count" (feed/count carol-tl) 3)
|
||||
(feed-test
|
||||
"carol timeline order (recency)"
|
||||
(map (fn (a) (get a :object)) (feed/items carol-tl))
|
||||
(list "x1" "a1" "b1"))
|
||||
(feed-test
|
||||
"eve dm excluded from carol"
|
||||
(feed/-elem? "e1" (map (fn (a) (get a :object)) (feed/items carol-tl)))
|
||||
false)
|
||||
(feed-test
|
||||
"dave sees eve dm not bob"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items
|
||||
(feed/timeline federated "dave" feed/permit-acl? rec 5)))
|
||||
(list "x1" "a1" "e1"))
|
||||
68
lib/feed/tests/mute.sx
Normal file
68
lib/feed/tests/mute.sx
Normal file
@@ -0,0 +1,68 @@
|
||||
; Follow-up — viewer mute/block filtering. (feed-test name got expected)
|
||||
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "alice" :object "P1" :at 1 :tags (list "news")})
|
||||
(feed/normalize {:actor "bob" :object "P2" :at 2 :tags (list "spam")})
|
||||
(feed/normalize {:actor "alice" :object "P3" :at 3 :tags (list "cats")})
|
||||
(feed/normalize {:actor "carol" :object "P4" :at 4 :tags (list "news" "spam")}))))
|
||||
|
||||
; ---------- mute actors ----------
|
||||
|
||||
(feed-test
|
||||
"mute bob drops his post"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/mute-actors S (list "bob"))))
|
||||
(list "P1" "P3" "P4"))
|
||||
(feed-test
|
||||
"mute alice drops two"
|
||||
(feed/count (feed/mute-actors S (list "alice")))
|
||||
2)
|
||||
(feed-test
|
||||
"mute nobody keeps all"
|
||||
(feed/count (feed/mute-actors S (list)))
|
||||
4)
|
||||
|
||||
; ---------- mute tags ----------
|
||||
|
||||
(feed-test
|
||||
"mute spam tag drops two"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/mute-tags S (list "spam"))))
|
||||
(list "P1" "P3"))
|
||||
(feed-test
|
||||
"mute news+cats leaves spam-only"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/mute-tags S (list "news" "cats"))))
|
||||
(list "P2"))
|
||||
|
||||
; ---------- mute objects ----------
|
||||
|
||||
(feed-test
|
||||
"mute object P3 (thread mute)"
|
||||
(feed/count (feed/mute-objects S (list "P3")))
|
||||
3)
|
||||
|
||||
; ---------- combined prefs ----------
|
||||
|
||||
(feed-test
|
||||
"apply-prefs actors + tags"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/apply-prefs S {:mute-actors (list "bob") :mute-tags (list "cats")})))
|
||||
(list "P1" "P4"))
|
||||
(feed-test
|
||||
"apply-prefs empty keeps all"
|
||||
(feed/count (feed/apply-prefs S {}))
|
||||
4)
|
||||
(feed-test
|
||||
"apply-prefs all three filters"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/apply-prefs S {:mute-objects (list "P3") :mute-actors (list "carol") :mute-tags (list "spam")})))
|
||||
(list "P1"))
|
||||
69
lib/feed/tests/notify.sx
Normal file
69
lib/feed/tests/notify.sx
Normal file
@@ -0,0 +1,69 @@
|
||||
; Follow-up — notification feed over an inbox. (feed-test name got expected)
|
||||
|
||||
; an inbox is a stream of {:to receiver :activity act} events
|
||||
(define mk-ev (fn (to act) {:activity act :to to}))
|
||||
|
||||
(define
|
||||
IB
|
||||
(feed/stream
|
||||
(list
|
||||
(mk-ev "alice" (feed/activity "bob" "like" "P" 10 (list)))
|
||||
(mk-ev "alice" (feed/activity "carol" "like" "P" 20 (list)))
|
||||
(mk-ev "alice" (feed/activity "dave" "reply" "Q" 30 (list)))
|
||||
(mk-ev "bob" (feed/activity "eve" "like" "R" 40 (list))))))
|
||||
|
||||
; ---------- raw notifications ----------
|
||||
|
||||
(feed-test
|
||||
"alice notification count"
|
||||
(feed/count (feed/notifications IB "alice"))
|
||||
3)
|
||||
(feed-test
|
||||
"bob notification count"
|
||||
(feed/count (feed/notifications IB "bob"))
|
||||
1)
|
||||
(feed-test
|
||||
"zoe no notifications"
|
||||
(feed/count (feed/notifications IB "zoe"))
|
||||
0)
|
||||
|
||||
; ---------- verb filtering ----------
|
||||
|
||||
(feed-test
|
||||
"alice likes only"
|
||||
(feed/count (feed/notify-verbs IB "alice" (list "like")))
|
||||
2)
|
||||
(feed-test
|
||||
"alice replies only"
|
||||
(feed/count (feed/notify-verbs IB "alice" (list "reply")))
|
||||
1)
|
||||
(feed-test
|
||||
"alice like+reply"
|
||||
(feed/count (feed/notify-verbs IB "alice" (list "like" "reply")))
|
||||
3)
|
||||
(feed-test
|
||||
"alice follow (none)"
|
||||
(feed/count (feed/notify-verbs IB "alice" (list "follow")))
|
||||
0)
|
||||
|
||||
; ---------- digest ----------
|
||||
|
||||
(define dig (feed/notify-digest IB "alice"))
|
||||
|
||||
(feed-test "digest group count" (len dig) 2)
|
||||
(feed-test
|
||||
"digest sorted by key (like|P before reply|Q)"
|
||||
(map (fn (g) (get g :object)) dig)
|
||||
(list "P" "Q"))
|
||||
(feed-test
|
||||
"like group actors"
|
||||
(get (nth dig 0) :actors)
|
||||
(list "bob" "carol"))
|
||||
(feed-test "like group count" (get (nth dig 0) :count) 2)
|
||||
(feed-test "like group verb" (get (nth dig 0) :verb) "like")
|
||||
(feed-test "reply group count" (get (nth dig 1) :count) 1)
|
||||
(feed-test
|
||||
"reply group actors"
|
||||
(get (nth dig 1) :actors)
|
||||
(list "dave"))
|
||||
(feed-test "empty digest for zoe" (feed/notify-digest IB "zoe") (list))
|
||||
86
lib/feed/tests/page.sx
Normal file
86
lib/feed/tests/page.sx
Normal file
@@ -0,0 +1,86 @@
|
||||
; Follow-up — pagination (offset + cursor). (feed-test name got expected)
|
||||
|
||||
; ---------- offset / limit ----------
|
||||
|
||||
(define
|
||||
O
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "o1" 1 (list))
|
||||
(feed/activity "u" "post" "o2" 2 (list))
|
||||
(feed/activity "u" "post" "o3" 3 (list))
|
||||
(feed/activity "u" "post" "o4" 4 (list))
|
||||
(feed/activity "u" "post" "o5" 5 (list)))))
|
||||
|
||||
(feed-test
|
||||
"page 1"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/page O 0 2)))
|
||||
(list "o1" "o2"))
|
||||
(feed-test
|
||||
"page 2"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/page O 2 2)))
|
||||
(list "o3" "o4"))
|
||||
(feed-test
|
||||
"page 3 (partial)"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/page O 4 2)))
|
||||
(list "o5"))
|
||||
(feed-test
|
||||
"page past end empty"
|
||||
(feed/count (feed/page O 10 2))
|
||||
0)
|
||||
(feed-test "page-count 5/2 = 3" (feed/page-count O 2) 3)
|
||||
(feed-test "page-count 5/5 = 1" (feed/page-count O 5) 1)
|
||||
|
||||
; ---------- cursor (recency) ----------
|
||||
|
||||
(define
|
||||
R
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "a" 50 (list))
|
||||
(feed/activity "u" "post" "b" 40 (list))
|
||||
(feed/activity "u" "post" "c" 30 (list))
|
||||
(feed/activity "u" "post" "d" 20 (list))
|
||||
(feed/activity "u" "post" "e" 10 (list)))))
|
||||
|
||||
(define p1 (feed/page-before R 100 2))
|
||||
(feed-test
|
||||
"cursor page 1 newest first"
|
||||
(map (fn (a) (get a :object)) (feed/items p1))
|
||||
(list "a" "b"))
|
||||
(feed-test "next cursor after page 1" (feed/next-cursor p1) 40)
|
||||
|
||||
(define p2 (feed/page-before R (feed/next-cursor p1) 2))
|
||||
(feed-test
|
||||
"cursor page 2"
|
||||
(map (fn (a) (get a :object)) (feed/items p2))
|
||||
(list "c" "d"))
|
||||
(feed-test "next cursor after page 2" (feed/next-cursor p2) 20)
|
||||
|
||||
(define p3 (feed/page-before R (feed/next-cursor p2) 2))
|
||||
(feed-test
|
||||
"cursor page 3 (partial)"
|
||||
(map (fn (a) (get a :object)) (feed/items p3))
|
||||
(list "e"))
|
||||
|
||||
(feed-test
|
||||
"empty page nil cursor"
|
||||
(feed/next-cursor (feed/page-before R 5 2))
|
||||
nil)
|
||||
|
||||
(feed-test
|
||||
"after cursor loads newer"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/recent (feed/after R 30))))
|
||||
(list "a" "b"))
|
||||
(feed-test
|
||||
"before cursor count"
|
||||
(feed/count (feed/before R 30))
|
||||
2)
|
||||
160
lib/feed/tests/rank.sx
Normal file
160
lib/feed/tests/rank.sx
Normal file
@@ -0,0 +1,160 @@
|
||||
; Phase 3 — aggregation + ranking. (feed-test name got expected)
|
||||
|
||||
; ---------- aggregation ----------
|
||||
|
||||
(define
|
||||
A
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 5 (list))
|
||||
(feed/activity "alice" "post" "p2" 15 (list))
|
||||
(feed/activity "bob" "post" "p3" 25 (list))
|
||||
(feed/activity "alice" "like" "p1" 35 (list)))))
|
||||
|
||||
(feed-test "actor-counts" (feed/actor-counts A) {:alice 3 :bob 1})
|
||||
(feed-test "object-counts" (feed/object-counts A) {:p2 1 :p3 1 :p1 2})
|
||||
(feed-test
|
||||
"group-by actor alice len"
|
||||
(len (get (feed/group-by A feed/actor) "alice"))
|
||||
3)
|
||||
(feed-test
|
||||
"group-count empty"
|
||||
(feed/group-count feed/empty feed/actor)
|
||||
{})
|
||||
|
||||
; day bucketing
|
||||
(define
|
||||
D
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "alice" "post" "p1" 5 (list))
|
||||
(feed/activity "alice" "post" "p2" 8 (list))
|
||||
(feed/activity "alice" "post" "p3" 12 (list)))))
|
||||
|
||||
(feed-test "feed/day floor" (feed/day 12 10) 1)
|
||||
(feed-test "feed/day same bucket" (feed/day 8 10) 0)
|
||||
(feed-test "by-actor-day" (feed/by-actor-day D 10) {:alice#0 2 :alice#1 1})
|
||||
|
||||
; ---------- recency ----------
|
||||
|
||||
(define rec (feed/recency 100 10))
|
||||
(feed-test
|
||||
"recency at=now -> 1"
|
||||
(rec (feed/activity "x" "post" "o" 100 (list)))
|
||||
1)
|
||||
(feed-test
|
||||
"recency age=hl -> .5"
|
||||
(rec (feed/activity "x" "post" "o" 90 (list)))
|
||||
0.5)
|
||||
(feed-test
|
||||
"recency age=2hl -> .25"
|
||||
(rec (feed/activity "x" "post" "o" 80 (list)))
|
||||
0.25)
|
||||
|
||||
; ---------- velocity ----------
|
||||
|
||||
(define vel (feed/velocity D 10))
|
||||
(feed-test
|
||||
"velocity burst (at=12)"
|
||||
(vel (feed/activity "alice" "post" "z" 12 (list)))
|
||||
3)
|
||||
(feed-test
|
||||
"velocity mid (at=8)"
|
||||
(vel (feed/activity "alice" "post" "z" 8 (list)))
|
||||
2)
|
||||
(feed-test
|
||||
"velocity first (at=5)"
|
||||
(vel (feed/activity "alice" "post" "z" 5 (list)))
|
||||
1)
|
||||
(feed-test
|
||||
"velocity other actor"
|
||||
(vel (feed/activity "bob" "post" "z" 12 (list)))
|
||||
0)
|
||||
|
||||
; ---------- engagement ----------
|
||||
|
||||
(define eng (feed/engagement A))
|
||||
(feed-test
|
||||
"engagement p1"
|
||||
(eng (feed/activity "x" "post" "p1" 0 (list)))
|
||||
2)
|
||||
(feed-test
|
||||
"engagement p2"
|
||||
(eng (feed/activity "x" "post" "p2" 0 (list)))
|
||||
1)
|
||||
|
||||
; ---------- composite ----------
|
||||
|
||||
(define
|
||||
cmp1
|
||||
(feed/composite (list (list 2 (fn (a) (get a :at))))))
|
||||
(feed-test
|
||||
"composite single part"
|
||||
(cmp1 (feed/activity "x" "post" "o" 5 (list)))
|
||||
10)
|
||||
(define
|
||||
cmp2
|
||||
(feed/composite
|
||||
(list
|
||||
(list 2 (fn (a) (get a :at)))
|
||||
(list 3 (fn (a) 1)))))
|
||||
(feed-test
|
||||
"composite two parts"
|
||||
(cmp2 (feed/activity "x" "post" "o" 5 (list)))
|
||||
13)
|
||||
|
||||
; ---------- ranking ----------
|
||||
|
||||
(define
|
||||
R
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "oC" 80 (list))
|
||||
(feed/activity "u" "post" "oA" 100 (list))
|
||||
(feed/activity "u" "post" "oB" 90 (list)))))
|
||||
|
||||
(feed-test
|
||||
"rank by recency objects"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/rank R rec)))
|
||||
(list "oA" "oB" "oC"))
|
||||
(feed-test
|
||||
"top-2 by recency"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/top R rec 2)))
|
||||
(list "oA" "oB"))
|
||||
(feed-test "top-2 count" (feed/count (feed/top R rec 2)) 2)
|
||||
|
||||
; constant score -> tiebreak by :at descending
|
||||
(define
|
||||
T
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "f" 10 (list))
|
||||
(feed/activity "u" "post" "g" 30 (list))
|
||||
(feed/activity "u" "post" "h" 20 (list)))))
|
||||
(feed-test
|
||||
"tiebreak at-desc"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/rank T (fn (a) 0))))
|
||||
(list "g" "h" "f"))
|
||||
|
||||
; equal score AND equal :at -> stable input order
|
||||
(define
|
||||
E
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "u" "post" "first" 50 (list))
|
||||
(feed/activity "u" "post" "second" 50 (list)))))
|
||||
(feed-test
|
||||
"stable equal-key input order"
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(feed/items (feed/rank E (fn (a) 0))))
|
||||
(list "first" "second"))
|
||||
|
||||
(feed-test
|
||||
"with-scores attaches score"
|
||||
(get (nth (feed/items (feed/with-scores R rec)) 1) :score)
|
||||
1)
|
||||
|
||||
(feed-test "rank preserves count" (feed/count (feed/rank A rec)) 4)
|
||||
49
lib/feed/tests/thread.sx
Normal file
49
lib/feed/tests/thread.sx
Normal file
@@ -0,0 +1,49 @@
|
||||
; Follow-up — conversation threading via :reply-to closure. (feed-test name got expected)
|
||||
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/normalize {:actor "a" :object "root" :at 1})
|
||||
(feed/normalize {:actor "b" :object "r1" :at 2 :verb "reply" :reply-to "root"})
|
||||
(feed/normalize {:actor "c" :object "r2" :at 3 :verb "reply" :reply-to "root"})
|
||||
(feed/normalize {:actor "d" :object "r3" :at 4 :verb "reply" :reply-to "r1"})
|
||||
(feed/normalize {:actor "e" :object "x" :at 5}))))
|
||||
|
||||
; ---------- direct replies ----------
|
||||
|
||||
(feed-test "direct replies to root" (feed/reply-count S "root") 2)
|
||||
(feed-test "direct replies to r1" (feed/reply-count S "r1") 1)
|
||||
(feed-test "no replies to r3" (feed/reply-count S "r3") 0)
|
||||
(feed-test
|
||||
"replies objects to root"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/replies S "root")))
|
||||
(list "r1" "r2"))
|
||||
|
||||
; ---------- thread closure ----------
|
||||
|
||||
(feed-test
|
||||
"thread objects root (transitive)"
|
||||
(feed/thread-objects S "root")
|
||||
(list "root" "r1" "r2" "r3"))
|
||||
(feed-test
|
||||
"thread root chronological"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/thread S "root")))
|
||||
(list "root" "r1" "r2" "r3"))
|
||||
(feed-test "thread size root" (feed/thread-size S "root") 4)
|
||||
(feed-test
|
||||
"thread excludes unrelated x"
|
||||
(feed/-elem?
|
||||
"x"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/thread S "root"))))
|
||||
false)
|
||||
|
||||
; ---------- sub-thread ----------
|
||||
|
||||
(feed-test
|
||||
"thread from r1 (sub-tree)"
|
||||
(map (fn (a) (get a :object)) (feed/items (feed/thread S "r1")))
|
||||
(list "r1" "r3"))
|
||||
(feed-test "thread size r1" (feed/thread-size S "r1") 2)
|
||||
(feed-test "leaf thread is itself" (feed/thread-size S "r3") 1)
|
||||
(feed-test "unrelated thread is itself" (feed/thread-size S "x") 1)
|
||||
82
lib/feed/tests/trending.sx
Normal file
82
lib/feed/tests/trending.sx
Normal file
@@ -0,0 +1,82 @@
|
||||
; Follow-up — trending objects/actors by recent activity. (feed-test name got expected)
|
||||
|
||||
; window (50,100]: X@60,X@70 (a), Y@80 (b), Z@90 (c); W@40 is too old
|
||||
(define
|
||||
S
|
||||
(feed/stream
|
||||
(list
|
||||
(feed/activity "a" "post" "X" 60 (list))
|
||||
(feed/activity "a" "post" "X" 70 (list))
|
||||
(feed/activity "b" "post" "Y" 80 (list))
|
||||
(feed/activity "c" "post" "Z" 90 (list))
|
||||
(feed/activity "d" "post" "W" 40 (list)))))
|
||||
|
||||
; ---------- trending objects ----------
|
||||
|
||||
(feed-test
|
||||
"trending count (3 in window)"
|
||||
(len (feed/trending S 100 50 10))
|
||||
3)
|
||||
(feed-test
|
||||
"trending top object"
|
||||
(get
|
||||
(nth (feed/trending S 100 50 10) 0)
|
||||
:object)
|
||||
"X")
|
||||
(feed-test
|
||||
"trending top count"
|
||||
(get
|
||||
(nth (feed/trending S 100 50 10) 0)
|
||||
:count)
|
||||
2)
|
||||
(feed-test
|
||||
"trending order (count desc, key asc tiebreak)"
|
||||
(map
|
||||
(fn (e) (get e :object))
|
||||
(feed/trending S 100 50 10))
|
||||
(list "X" "Y" "Z"))
|
||||
(feed-test
|
||||
"trending top-2"
|
||||
(map
|
||||
(fn (e) (get e :object))
|
||||
(feed/trending S 100 50 2))
|
||||
(list "X" "Y"))
|
||||
(feed-test
|
||||
"old object W excluded"
|
||||
(feed/-elem?
|
||||
"W"
|
||||
(map
|
||||
(fn (e) (get e :object))
|
||||
(feed/trending S 100 50 10)))
|
||||
false)
|
||||
(feed-test
|
||||
"narrow window keeps only newest"
|
||||
(map
|
||||
(fn (e) (get e :object))
|
||||
(feed/trending S 100 15 10))
|
||||
(list "Z"))
|
||||
(feed-test
|
||||
"empty window -> nothing"
|
||||
(feed/trending S 100 5 10)
|
||||
(list))
|
||||
|
||||
; ---------- trending actors ----------
|
||||
|
||||
(feed-test
|
||||
"trending actor top"
|
||||
(get
|
||||
(nth (feed/trending-actors S 100 50 10) 0)
|
||||
:actor)
|
||||
"a")
|
||||
(feed-test
|
||||
"trending actor count"
|
||||
(get
|
||||
(nth (feed/trending-actors S 100 50 10) 0)
|
||||
:count)
|
||||
2)
|
||||
(feed-test
|
||||
"trending actors order"
|
||||
(map
|
||||
(fn (e) (get e :actor))
|
||||
(feed/trending-actors S 100 50 10))
|
||||
(list "a" "b" "c"))
|
||||
59
lib/feed/thread.sx
Normal file
59
lib/feed/thread.sx
Normal file
@@ -0,0 +1,59 @@
|
||||
; feed/thread — conversation threading. A reply carries :reply-to <parent-object>
|
||||
; (normalize preserves it). A thread is the transitive closure over :reply-to from
|
||||
; a root object: root + replies + replies-to-replies, gathered chronologically.
|
||||
;
|
||||
; Requires: lib/feed/normalize.sx, lib/feed/stream.sx, lib/feed/fanout.sx
|
||||
; (feed/-elem?, feed/-distinct).
|
||||
|
||||
; direct replies to an object
|
||||
(define
|
||||
feed/replies
|
||||
(fn
|
||||
(stream object)
|
||||
(feed/filter stream (fn (a) (equal? (get a :reply-to) object)))))
|
||||
|
||||
(define
|
||||
feed/reply-count
|
||||
(fn (stream object) (feed/count (feed/replies stream object))))
|
||||
|
||||
; iterate f from x until the result stops growing (set-closure fixpoint)
|
||||
(define
|
||||
feed/-fixpoint
|
||||
(fn
|
||||
(f x)
|
||||
(let
|
||||
((nx (f x)))
|
||||
(if (= (len nx) (len x)) x (feed/-fixpoint f nx)))))
|
||||
|
||||
; the set of object-ids in the thread rooted at `root`
|
||||
(define
|
||||
feed/thread-objects
|
||||
(fn
|
||||
(stream root)
|
||||
(let
|
||||
((all (feed/items stream)))
|
||||
(feed/-fixpoint
|
||||
(fn
|
||||
(acc)
|
||||
(feed/-distinct
|
||||
(append
|
||||
acc
|
||||
(map
|
||||
(fn (a) (get a :object))
|
||||
(filter (fn (a) (feed/-elem? (get a :reply-to) acc)) all)))))
|
||||
(list root)))))
|
||||
|
||||
; the full thread as a chronological stream (root + all descendants)
|
||||
(define
|
||||
feed/thread
|
||||
(fn
|
||||
(stream root)
|
||||
(let
|
||||
((objs (feed/thread-objects stream root)))
|
||||
(feed/sort-by-at
|
||||
(feed/filter stream (fn (a) (feed/-elem? (get a :object) objs)))))))
|
||||
|
||||
; how many activities are in the thread (root counts as 1)
|
||||
(define
|
||||
feed/thread-size
|
||||
(fn (stream root) (feed/count (feed/thread stream root))))
|
||||
42
lib/feed/trending.sx
Normal file
42
lib/feed/trending.sx
Normal file
@@ -0,0 +1,42 @@
|
||||
; feed/trending — what's hot right now: objects (or actors) ranked by activity
|
||||
; count within a recency window. Deterministic: count descending, ties broken by
|
||||
; key ascending (entries are pre-sorted by key, then stable grade-down by count).
|
||||
;
|
||||
; Requires: lib/feed/stream.sx, lib/feed/aggregate.sx (object/actor-counts),
|
||||
; lib/feed/rank.sx (feed/-desc-by).
|
||||
|
||||
; activities within (now-window, now]
|
||||
(define
|
||||
feed/-recent
|
||||
(fn
|
||||
(stream now window)
|
||||
(feed/filter
|
||||
stream
|
||||
(fn (a) (and (<= (get a :at) now) (> (get a :at) (- now window)))))))
|
||||
|
||||
; counts dict -> top-N entries {label key, :count n}, count desc, key asc
|
||||
(define
|
||||
feed/-top-counts
|
||||
(fn
|
||||
(counts label n)
|
||||
(let
|
||||
((entries (map (fn (k) (assoc {:count (get counts k)} label k)) (sort (keys counts)))))
|
||||
(take (feed/-desc-by entries (fn (e) (get e :count))) n))))
|
||||
|
||||
; top-N trending objects in the window
|
||||
(define
|
||||
feed/trending
|
||||
(fn
|
||||
(stream now window n)
|
||||
(feed/-top-counts
|
||||
(feed/object-counts (feed/-recent stream now window))
|
||||
:object n)))
|
||||
|
||||
; top-N most active actors in the window
|
||||
(define
|
||||
feed/trending-actors
|
||||
(fn
|
||||
(stream now window n)
|
||||
(feed/-top-counts
|
||||
(feed/actor-counts (feed/-recent stream now window))
|
||||
:actor n)))
|
||||
44
lib/search/api.sx
Normal file
44
lib/search/api.sx
Normal file
@@ -0,0 +1,44 @@
|
||||
;; search public API — assembles the canonical Haskell source from all layers.
|
||||
;; Tests and callers concatenate `search/src` with their own top-level bindings
|
||||
;; (e.g. "result = lookupTerm \"cat\" idx\n") and evaluate via the haskell-on-sx
|
||||
;; interpreter. Public Haskell entry points: indexDoc, lookupTerm, deleteDoc,
|
||||
;; docFreq, allTerms, tokens, positioned, evalQuery, parseQuery, searchQuery,
|
||||
;; rankTfIdf, rankBm25, topNTfIdf, topNBm25, fedIndex, aclFilter, searchTfIdfAcl,
|
||||
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf,
|
||||
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
|
||||
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
|
||||
;; nearDocs, expandTerm, synDocs, synRankTfIdf, queryTerms, searchRankTfIdf,
|
||||
;; searchRankBm25, suggestN, suggest.
|
||||
|
||||
(define
|
||||
search/src
|
||||
(str
|
||||
search/tokenize-src
|
||||
"\n"
|
||||
search/index-src
|
||||
"\n"
|
||||
search/query-src
|
||||
"\n"
|
||||
search/parse-src
|
||||
"\n"
|
||||
search/rank-src
|
||||
"\n"
|
||||
search/fed-src
|
||||
"\n"
|
||||
search/prefix-src
|
||||
"\n"
|
||||
search/page-src
|
||||
"\n"
|
||||
search/fuzzy-src
|
||||
"\n"
|
||||
search/highlight-src
|
||||
"\n"
|
||||
search/stem-src
|
||||
"\n"
|
||||
search/near-src
|
||||
"\n"
|
||||
search/syn-src
|
||||
"\n"
|
||||
search/rankq-src
|
||||
"\n"
|
||||
search/suggest-src))
|
||||
55
lib/search/conformance.conf
Normal file
55
lib/search/conformance.conf
Normal file
@@ -0,0 +1,55 @@
|
||||
# search-on-sx conformance config — sourced by lib/guest/conformance.sh.
|
||||
|
||||
LANG_NAME=search
|
||||
SCOREBOARD_DIR=lib/search
|
||||
MODE=counters
|
||||
COUNTERS_PASS=hk-test-pass
|
||||
COUNTERS_FAIL=hk-test-fail
|
||||
TIMEOUT_PER_SUITE=600
|
||||
|
||||
PRELOADS=(
|
||||
lib/haskell/tokenizer.sx
|
||||
lib/haskell/layout.sx
|
||||
lib/haskell/parser.sx
|
||||
lib/haskell/desugar.sx
|
||||
lib/haskell/runtime.sx
|
||||
lib/haskell/match.sx
|
||||
lib/haskell/eval.sx
|
||||
lib/haskell/map.sx
|
||||
lib/haskell/set.sx
|
||||
lib/haskell/testlib.sx
|
||||
lib/search/tokenize.sx
|
||||
lib/search/index.sx
|
||||
lib/search/query.sx
|
||||
lib/search/parse.sx
|
||||
lib/search/rank.sx
|
||||
lib/search/fed.sx
|
||||
lib/search/prefix.sx
|
||||
lib/search/page.sx
|
||||
lib/search/fuzzy.sx
|
||||
lib/search/highlight.sx
|
||||
lib/search/stem.sx
|
||||
lib/search/near.sx
|
||||
lib/search/syn.sx
|
||||
lib/search/rankq.sx
|
||||
lib/search/suggest.sx
|
||||
lib/search/api.sx
|
||||
lib/search/testlib.sx
|
||||
)
|
||||
|
||||
SUITES=(
|
||||
"index:lib/search/tests/index.sx"
|
||||
"boolean:lib/search/tests/boolean.sx"
|
||||
"parse:lib/search/tests/parse.sx"
|
||||
"rank:lib/search/tests/rank.sx"
|
||||
"integration:lib/search/tests/integration.sx"
|
||||
"prefix:lib/search/tests/prefix.sx"
|
||||
"page:lib/search/tests/page.sx"
|
||||
"fuzzy:lib/search/tests/fuzzy.sx"
|
||||
"highlight:lib/search/tests/highlight.sx"
|
||||
"stem:lib/search/tests/stem.sx"
|
||||
"near:lib/search/tests/near.sx"
|
||||
"syn:lib/search/tests/syn.sx"
|
||||
"rankq:lib/search/tests/rankq.sx"
|
||||
"suggest:lib/search/tests/suggest.sx"
|
||||
)
|
||||
3
lib/search/conformance.sh
Executable file
3
lib/search/conformance.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
# Thin wrapper — see lib/guest/conformance.sh and lib/search/conformance.conf.
|
||||
exec bash "$(dirname "$0")/../guest/conformance.sh" "$(dirname "$0")/conformance.conf" "$@"
|
||||
16
lib/search/fed.sx
Normal file
16
lib/search/fed.sx
Normal file
@@ -0,0 +1,16 @@
|
||||
;; search federation + ACL — Haskell source fragment. Depends on index + rank.
|
||||
;; Federation merges per-peer INDICES (not ranked results): each peer's local
|
||||
;; DocIds are relabelled to global ids `gid peer local = peer*1000 + local`
|
||||
;; (dedupe by (peer,doc-id) is automatic via the bijection), then posting lists
|
||||
;; are unioned per term. Ranking then runs once over the merged index, which is
|
||||
;; rank-correct. ACL is a post-rank filter: an injected `permit :: DocId -> Bool`
|
||||
;; predicate (viewer baked in by the caller) — never baked into the index.
|
||||
;; fedIndex :: [(PeerId, Index)] -> Index
|
||||
;; aclFilter :: (DocId -> Bool) -> [DocId] -> [DocId]
|
||||
;; searchTfIdfAcl :: (DocId -> Bool) -> [Term] -> Index -> [DocId]
|
||||
;; topNTfIdfAcl :: Int -> (DocId -> Bool) -> [Term] -> Index -> [DocId]
|
||||
;; searchBm25Acl :: (DocId -> Bool) -> Float -> Float -> [Term] -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/fed-src
|
||||
"gid peer local = peer * 1000 + local\nfedRelabelPosting peer p = (gid peer (fst p), snd p)\nfedRelabelEntry peer e = (fst e, map (fedRelabelPosting peer) (snd e))\nfedRelabelIndex peer idx = map (fedRelabelEntry peer) idx\nfedInsP p [] = [p]\nfedInsP p (q:qs) = if fst p < fst q then p : q : qs else if fst p == fst q then p : qs else q : fedInsP p qs\nfedMergePL a b = foldr fedInsP b a\nfedInsTerm t pl [] = [(t, pl)]\nfedInsTerm t pl (x:xs) = if t < fst x then (t, pl) : x : xs else if t == fst x then (fst x, fedMergePL pl (snd x)) : xs else x : fedInsTerm t pl xs\nfedMergeEntry idx e = fedInsTerm (fst e) (snd e) idx\nfedMergeTwo a b = foldl fedMergeEntry a b\nfedAddPeer acc pair = fedMergeTwo acc (fedRelabelIndex (fst pair) (snd pair))\nfedIndex pairs = foldl fedAddPeer emptyIndex pairs\naclFilter permit docs = filter permit docs\nsearchTfIdfAcl permit ts idx = aclFilter permit (rankTfIdf ts idx)\ntopNTfIdfAcl n permit ts idx = take n (aclFilter permit (rankTfIdf ts idx))\nsearchBm25Acl permit k1 b ts idx = aclFilter permit (rankBm25 k1 b ts idx)\n")
|
||||
12
lib/search/fuzzy.sx
Normal file
12
lib/search/fuzzy.sx
Normal file
@@ -0,0 +1,12 @@
|
||||
;; search fuzzy matching — Haskell source fragment. Depends on index + rank.
|
||||
;; Levenshtein edit distance (O(m*n) row-based DP — the naive recursive version is
|
||||
;; exponential and far too slow under load) expands a query term to all indexed
|
||||
;; terms within a max distance, then unions / ranks their docs.
|
||||
;; editDist :: String -> String -> Int
|
||||
;; fuzzyTerms :: Int -> String -> Index -> [Term] (sorted)
|
||||
;; fuzzyDocs :: Int -> String -> Index -> [DocId] (sorted union)
|
||||
;; fuzzyRankTfIdf :: Int -> String -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/fuzzy-src
|
||||
"edMin3 a b c = min a (min b c)\nedCost x y = if x == y then 0 else 1\nedUpto i n = if i > n then [] else i : edUpto (i + 1) n\nedLast [x] = x\nedLast (x:xs) = edLast xs\nedNrow x [] prev left = []\nedNrow x (y:ys) prev left = let v = edMin3 (head (tail prev) + 1) (left + 1) (head prev + edCost x y) in v : edNrow x ys (tail prev) v\nedRow x ys prev = let f = head prev + 1 in f : edNrow x ys prev f\nedRows [] ys prev = prev\nedRows (x:xs) ys prev = edRows xs ys (edRow x ys prev)\neditDist xs ys = edLast (edRows xs ys (edUpto 0 (length ys)))\nqWithinDist maxd term t = editDist term t <= maxd\nfuzzyTerms maxd term idx = filter (qWithinDist maxd term) (allTerms idx)\nfuzzyDocs maxd term idx = foldl (candStep idx) [] (fuzzyTerms maxd term idx)\nfuzzyRankTfIdf maxd term idx = rankTfIdf (fuzzyTerms maxd term idx) idx\n")
|
||||
10
lib/search/highlight.sx
Normal file
10
lib/search/highlight.sx
Normal file
@@ -0,0 +1,10 @@
|
||||
;; search highlight / snippet — Haskell source fragment. Depends on tokenize.
|
||||
;; Operates on document text (not the index): marks query-matching tokens with
|
||||
;; [..] and extracts a context window around the first match. Tokens are
|
||||
;; normalized (lowercase, punctuation-stripped) by `tokens`, matching index side.
|
||||
;; highlight :: [Term] -> String -> String
|
||||
;; snippet :: Int -> [Term] -> String -> String (ctx tokens each side of 1st match)
|
||||
|
||||
(define
|
||||
search/highlight-src
|
||||
"hlMark terms t = if elem t terms then \"[\" ++ t ++ \"]\" else t\nhighlight terms text = unwords (map (hlMark terms) (tokens text))\nhlIdxFrom terms [] i = 0 - 1\nhlIdxFrom terms (t:ts) i = if elem t terms then i else hlIdxFrom terms ts (i + 1)\nhlIdx terms toks = hlIdxFrom terms toks 0\nhlMax0 x = if x < 0 then 0 else x\nsnipStart ctx i = if i < 0 then 0 else hlMax0 (i - ctx)\nsnipToks ctx terms toks = unwords (map (hlMark terms) (take (2 * ctx + 1) (drop (snipStart ctx (hlIdx terms toks)) toks)))\nsnippet ctx terms text = snipToks ctx terms (tokens text)\n")
|
||||
15
lib/search/index.sx
Normal file
15
lib/search/index.sx
Normal file
@@ -0,0 +1,15 @@
|
||||
;; search inverted index — Haskell source fragment (depends on tokenize).
|
||||
;; Index = [(Term, [(DocId, [Pos])])], sorted by Term; postings sorted by DocId.
|
||||
;; Data.Map's public API lacks toList/keys/map/filter, so a sorted assoc-list
|
||||
;; index is used — it is the conceptual `Map Term [(DocId,[Pos])]` and exposes
|
||||
;; term iteration (allTerms) and df naturally for ranking.
|
||||
;; emptyIndex :: Index
|
||||
;; indexDoc :: DocId -> String -> Index -> Index (re-index replaces)
|
||||
;; lookupTerm :: Term -> Index -> [(DocId, [Pos])]
|
||||
;; deleteDoc :: DocId -> Index -> Index
|
||||
;; docFreq :: Term -> Index -> Int
|
||||
;; allTerms :: Index -> [Term]
|
||||
|
||||
(define
|
||||
search/index-src
|
||||
"emptyIndex = []\ngroupBump [] t p = [(t, [p])]\ngroupBump (g:gs) t p = if fst g == t then (t, snd g ++ [p]) : gs else g : groupBump gs t p\ngroupStep acc tp = groupBump acc (fst tp) (snd tp)\ngroupTok pairs = foldl groupStep [] pairs\ninsPosting d ps [] = [(d, ps)]\ninsPosting d ps (q:qs) = if d < fst q then (d, ps) : q : qs else if d == fst q then (d, ps) : qs else q : insPosting d ps qs\ninsTerm t d ps [] = [(t, [(d, ps)])]\ninsTerm t d ps (e:es) = if t < fst e then (t, [(d, ps)]) : e : es else if t == fst e then (fst e, insPosting d ps (snd e)) : es else e : insTerm t d ps es\nindexStep d ix tp = insTerm (fst tp) d (snd tp) ix\nindexDoc d text idx = foldl (indexStep d) idx (groupTok (positioned text))\nlookupTerm t idx = case lookup t idx of { Nothing -> []; Just pl -> pl }\ndocFreq t idx = length (lookupTerm t idx)\nallTerms idx = map fst idx\npostingKeep d q = fst q /= d\ndropTermDoc d e = (fst e, filter (postingKeep d) (snd e))\nplKeep e = not (null (snd e))\ndeleteDoc d idx = filter plKeep (map (dropTermDoc d) idx)\n")
|
||||
8
lib/search/near.sx
Normal file
8
lib/search/near.sx
Normal file
@@ -0,0 +1,8 @@
|
||||
;; search proximity (NEAR) — Haskell source fragment. Depends on query (posIn,
|
||||
;; docsWith, sortedInter). Finds docs where two terms occur within k positions of
|
||||
;; each other (unordered), using the positional postings.
|
||||
;; nearDocs :: Int -> Term -> Term -> Index -> [DocId] (sorted)
|
||||
|
||||
(define
|
||||
search/near-src
|
||||
"nrAbsDiff a b = if a > b then a - b else b - a\nnrCloseTo k x [] = False\nnrCloseTo k x (y:ys) = if nrAbsDiff x y <= k then True else nrCloseTo k x ys\nnrAnyClose k [] ys = False\nnrAnyClose k (x:xs) ys = if nrCloseTo k x ys then True else nrAnyClose k xs ys\nnearInDoc k t1 t2 d idx = nrAnyClose k (posIn t1 d idx) (posIn t2 d idx)\nnearHere k t1 t2 idx d = nearInDoc k t1 t2 d idx\nnearDocs k t1 t2 idx = filter (nearHere k t1 t2 idx) (sortedInter (docsWith t1 idx) (docsWith t2 idx))\n")
|
||||
11
lib/search/page.sx
Normal file
11
lib/search/page.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
;; search pagination — Haskell source fragment. Depends on rank.
|
||||
;; Windows a ranked result list by offset/limit (offset >= length -> empty;
|
||||
;; limit clamps to what remains).
|
||||
;; paginate :: Int -> Int -> [DocId] -> [DocId] (offset, limit)
|
||||
;; pageTfIdf :: Int -> Int -> [Term] -> Index -> [DocId]
|
||||
;; pageBm25 :: Int -> Int -> Float -> Float -> [Term] -> Index -> [DocId]
|
||||
;; resultCount :: [Term] -> Index -> Int
|
||||
|
||||
(define
|
||||
search/page-src
|
||||
"paginate off lim docs = take lim (drop off docs)\npageTfIdf off lim ts idx = paginate off lim (rankTfIdf ts idx)\npageBm25 off lim k1 b ts idx = paginate off lim (rankBm25 k1 b ts idx)\nresultCount ts idx = length (rankTfIdf ts idx)\n")
|
||||
18
lib/search/parse.sx
Normal file
18
lib/search/parse.sx
Normal file
@@ -0,0 +1,18 @@
|
||||
;; search query parser — Haskell source fragment. Depends on tokenize + query.
|
||||
;; Grammar (precedence OR < AND < NOT):
|
||||
;; expr = orExpr
|
||||
;; orExpr = andExpr (OR andExpr)*
|
||||
;; andExpr= notExpr ((AND | <implicit>) notExpr)* -- adjacency means AND
|
||||
;; notExpr= NOT notExpr | atom
|
||||
;; atom = '(' expr ')' | '"' word+ '"' | word
|
||||
;; Keywords AND/OR/NOT are case-insensitive; bare words are normalized via tokens.
|
||||
;; Gotchas: delimiters matched by ord (escaped char literals like '\"' break the
|
||||
;; haskell-on-sx tokenizer); an [] *pattern* inside a `case` alt also breaks the
|
||||
;; parser, so qNormTerm/qDropRP/showQ are written as multi-clause functions.
|
||||
;; parseQuery :: String -> Query
|
||||
;; searchQuery :: String -> Index -> [DocId]
|
||||
;; showQ :: Query -> String -- canonical render for tests/debug
|
||||
|
||||
(define
|
||||
search/parse-src
|
||||
"data QTok = TAnd | TOr | TNot | TLP | TRP | TWord String | TPhrase [String]\nqIsSpace c = ord c == 32\nqIsLP c = ord c == 40\nqIsRP c = ord c == 41\nqIsQuote c = ord c == 34\nqDelim c = qIsSpace c || qIsLP c || qIsRP c || qIsQuote c\nqReadWord [] = ([], [])\nqReadWord (c:cs) = if qDelim c then ([], c:cs) else let (w, rest) = qReadWord cs in (c:w, rest)\nqReadPhrase [] = ([], [])\nqReadPhrase (c:cs) = if qIsQuote c then ([], cs) else let (w, rest) = qReadPhrase cs in (c:w, rest)\ntoUpperCh c = chr (toUpper (ord c))\nqUpper w = joinChars (map toUpperCh w)\nqFirstTok [] = \"\"\nqFirstTok (x:xs) = x\nqNormTerm w = qFirstTok (tokens w)\nqClassify w = if qUpper w == \"AND\" then TAnd else if qUpper w == \"OR\" then TOr else if qUpper w == \"NOT\" then TNot else TWord (qNormTerm w)\nqPhraseTok cs = let (p, rest) = qReadPhrase cs in TPhrase (tokens p) : qtokens rest\nqWordTok cs = let (w, rest) = qReadWord cs in qClassify w : qtokens rest\nqtokens [] = []\nqtokens (c:cs) = if qIsSpace c then qtokens cs else if qIsLP c then TLP : qtokens cs else if qIsRP c then TRP : qtokens cs else if qIsQuote c then qPhraseTok cs else qWordTok (c:cs)\nqDropRP (q, (TRP:rest)) = (q, rest)\nqDropRP (q, ts) = (q, ts)\nparseAtom [] = (Term \"\", [])\nparseAtom (TLP:ts) = qDropRP (parseExpr ts)\nparseAtom (TPhrase ps : ts) = (Phrase ps, ts)\nparseAtom (TWord w : ts) = (Term w, ts)\nparseAtom ts = (Term \"\", ts)\nqWrapNot (q, ts) = (Not q, ts)\nparseNot (TNot:ts) = qWrapNot (parseNot ts)\nparseNot ts = parseAtom ts\nqStartsAtom (TWord w : ts) = True\nqStartsAtom (TPhrase p : ts) = True\nqStartsAtom (TLP : ts) = True\nqStartsAtom (TNot : ts) = True\nqStartsAtom ts = False\nqAndStep left ts = let (r, rest) = parseNot ts in parseAndR (And left r) rest\nparseAndR left (TAnd:ts) = qAndStep left ts\nparseAndR left ts = if qStartsAtom ts then qAndStep left ts else (left, ts)\nparseAnd ts = let (l, rest) = parseNot ts in parseAndR l rest\nparseOrR left (TOr:ts) = let (r, rest) = parseAnd ts in parseOrR (Or left r) rest\nparseOrR left ts = (left, ts)\nparseExpr ts = let (l, rest) = parseAnd ts in parseOrR l rest\nparseQuery s = fst (parseExpr (qtokens s))\nsearchQuery s idx = evalQuery idx (parseQuery s)\njoinSp [] = \"\"\njoinSp [x] = x\njoinSp (x:xs) = x ++ \"-\" ++ joinSp xs\nshowQ (Term t) = \"T:\" ++ t\nshowQ (And a b) = \"(\" ++ showQ a ++ \" & \" ++ showQ b ++ \")\"\nshowQ (Or a b) = \"(\" ++ showQ a ++ \" | \" ++ showQ b ++ \")\"\nshowQ (Not a) = \"!\" ++ showQ a\nshowQ (Phrase ts) = \"P:\" ++ joinSp ts\n")
|
||||
10
lib/search/prefix.sx
Normal file
10
lib/search/prefix.sx
Normal file
@@ -0,0 +1,10 @@
|
||||
;; search prefix / wildcard queries — Haskell source fragment. Depends on index +
|
||||
;; rank (reuses candStep / rankTfIdf). A prefix matches every indexed term that
|
||||
;; starts with it; the matching terms are unioned (OR) into a docid set.
|
||||
;; prefixTerms :: String -> Index -> [Term] (sorted, from allTerms)
|
||||
;; prefixDocs :: String -> Index -> [DocId] (sorted union)
|
||||
;; prefixRankTfIdf :: String -> Index -> [DocId] (ranked by the matched terms)
|
||||
|
||||
(define
|
||||
search/prefix-src
|
||||
"prefixTerms pre idx = filter (isPrefixOf pre) (allTerms idx)\nprefixDocs pre idx = foldl (candStep idx) [] (prefixTerms pre idx)\nprefixRankTfIdf pre idx = rankTfIdf (prefixTerms pre idx) idx\n")
|
||||
11
lib/search/query.sx
Normal file
11
lib/search/query.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
;; search query AST + boolean/phrase evaluation — Haskell source fragment.
|
||||
;; Depends on tokenize + index.
|
||||
;; data Query = Term String | And Query Query | Or Query Query
|
||||
;; | Not Query | Phrase [String]
|
||||
;; evalQuery :: Index -> Query -> [DocId] (sorted, unique)
|
||||
;; Boolean ops are linear merges over docid-sorted posting lists; Not uses
|
||||
;; allDocs as the universe; Phrase checks positional adjacency.
|
||||
|
||||
(define
|
||||
search/query-src
|
||||
"data Query = Term String | And Query Query | Or Query Query | Not Query | Phrase [String]\ndocsWith t idx = map fst (lookupTerm t idx)\nsortedUnion [] ys = ys\nsortedUnion xs [] = xs\nsortedUnion (x:xs) (y:ys) = if x < y then x : sortedUnion xs (y:ys) else if x > y then y : sortedUnion (x:xs) ys else x : sortedUnion xs ys\nsortedInter [] ys = []\nsortedInter xs [] = []\nsortedInter (x:xs) (y:ys) = if x < y then sortedInter xs (y:ys) else if x > y then sortedInter (x:xs) ys else x : sortedInter xs ys\nsortedDiff [] ys = []\nsortedDiff xs [] = xs\nsortedDiff (x:xs) (y:ys) = if x < y then x : sortedDiff xs (y:ys) else if x > y then sortedDiff (x:xs) ys else sortedDiff xs ys\nmergeDocs acc e = sortedUnion acc (map fst (snd e))\nallDocs idx = foldl mergeDocs [] idx\nposIn t d idx = case lookup d (lookupTerm t idx) of { Nothing -> []; Just ps -> ps }\nelemSorted x [] = False\nelemSorted x (y:ys) = if x == y then True else if x < y then False else elemSorted x ys\nphraseAtAll [] d idx p i = True\nphraseAtAll (t:ts) d idx p i = if elemSorted (p + i) (posIn t d idx) then phraseAtAll ts d idx p (i + 1) else False\nphraseStartsAt ts d idx p = phraseAtAll ts d idx p 0\nphraseInDoc [] d idx = True\nphraseInDoc (t0:rest) d idx = any (phraseStartsAt (t0:rest) d idx) (posIn t0 d idx)\nphraseHere ts idx d = phraseInDoc ts d idx\ninterStep idx acc tt = sortedInter acc (docsWith tt idx)\nphraseCands [] idx = allDocs idx\nphraseCands (t:ts) idx = foldl (interStep idx) (docsWith t idx) ts\nphraseDocs ts idx = filter (phraseHere ts idx) (phraseCands ts idx)\nevalQuery idx q = case q of { Term t -> docsWith t idx ; And a b -> sortedInter (evalQuery idx a) (evalQuery idx b) ; Or a b -> sortedUnion (evalQuery idx a) (evalQuery idx b) ; Not a -> sortedDiff (allDocs idx) (evalQuery idx a) ; Phrase ts -> phraseDocs ts idx }\n")
|
||||
14
lib/search/rank.sx
Normal file
14
lib/search/rank.sx
Normal file
@@ -0,0 +1,14 @@
|
||||
;; search ranking — Haskell source fragment. Depends on tokenize + index + query.
|
||||
;; Ranked retrieval over the candidate set (docs containing any query term).
|
||||
;; Scores are floats; ties broken by DocId ascending (deterministic).
|
||||
;; numDocs :: Index -> Int
|
||||
;; docFreq :: Term -> Index -> Int (from index)
|
||||
;; docLen :: DocId -> Index -> Int
|
||||
;; rankTfIdf :: [Term] -> Index -> [DocId]
|
||||
;; topNTfIdf :: Int -> [Term] -> Index -> [DocId]
|
||||
;; rankBm25 :: Float -> Float -> [Term] -> Index -> [DocId] (k1, b)
|
||||
;; topNBm25 :: Int -> Float -> Float -> [Term] -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/rank-src
|
||||
"numDocs idx = length (allDocs idx)\ntfIn t d idx = length (posIn t d idx)\nqIdf n df = if df == 0 then 0 else log (n / df)\nidf t idx = qIdf (numDocs idx) (docFreq t idx)\ntermScoreTf idx d t = tfIn t d idx * idf t idx\ntfidfDoc ts idx d = sum (map (termScoreTf idx d) ts)\ncandStep idx acc t = sortedUnion acc (docsWith t idx)\ncandDocs ts idx = foldl (candStep idx) [] ts\ncmpScore p1 p2 = if fst p1 > fst p2 then LT else if fst p1 < fst p2 then GT else compare (snd p1) (snd p2)\nmkPair f ts idx d = (f ts idx d, d)\nrankWith f ts idx = map snd (sortBy cmpScore (map (mkPair f ts idx) (candDocs ts idx)))\nrankTfIdf ts idx = rankWith tfidfDoc ts idx\ntopNTfIdf n ts idx = take n (rankTfIdf ts idx)\ntfAt d idx t = tfIn t d idx\ndocLen d idx = sum (map (tfAt d idx) (allTerms idx))\nlenAt idx d = docLen d idx\navgDocLen idx = sum (map (lenAt idx) (allDocs idx)) / numDocs idx\nbm25idf t idx = log ((numDocs idx - docFreq t idx + 0.5) / (docFreq t idx + 0.5) + 1)\nbm25Term k1 b avgdl idx d t = bm25idf t idx * (tfIn t d idx * (k1 + 1)) / (tfIn t d idx + k1 * (1 - b + b * docLen d idx / avgdl))\nbm25Doc k1 b ts idx d = sum (map (bm25Term k1 b (avgDocLen idx) idx d) ts)\nrankBm25 k1 b ts idx = rankWith (bm25Doc k1 b) ts idx\ntopNBm25 n k1 b ts idx = take n (rankBm25 k1 b ts idx)\n")
|
||||
11
lib/search/rankq.sx
Normal file
11
lib/search/rankq.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
;; search boolean-filtered ranked search — Haskell source fragment.
|
||||
;; Depends on parse (parseQuery/Query), query (evalQuery), rank (tfidfDoc/bm25Doc/
|
||||
;; cmpScore). Filters by the boolean query, then ranks the surviving docs by
|
||||
;; relevance over the query's leaf terms — the real-world filter-then-rank pattern.
|
||||
;; queryTerms :: Query -> [Term]
|
||||
;; searchRankTfIdf :: String -> Index -> [DocId]
|
||||
;; searchRankBm25 :: Float -> Float -> String -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/rankq-src
|
||||
"queryTerms (Term t) = [t]\nqueryTerms (And a b) = queryTerms a ++ queryTerms b\nqueryTerms (Or a b) = queryTerms a ++ queryTerms b\nqueryTerms (Not a) = queryTerms a\nqueryTerms (Phrase ts) = ts\nmkSubPair f terms idx d = (f terms idx d, d)\nrankSubsetWith f terms docs idx = map snd (sortBy cmpScore (map (mkSubPair f terms idx) docs))\nsearchRankTfIdf s idx = let q = parseQuery s in rankSubsetWith tfidfDoc (queryTerms q) (evalQuery idx q) idx\nsearchRankBm25 k1 b s idx = let q = parseQuery s in rankSubsetWith (bm25Doc k1 b) (queryTerms q) (evalQuery idx q) idx\n")
|
||||
23
lib/search/scoreboard.json
Normal file
23
lib/search/scoreboard.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"lang": "search",
|
||||
"total_passed": 234,
|
||||
"total_failed": 0,
|
||||
"total": 234,
|
||||
"suites": [
|
||||
{"name":"index","passed":18,"failed":0,"total":18},
|
||||
{"name":"boolean","passed":28,"failed":0,"total":28},
|
||||
{"name":"parse","passed":32,"failed":0,"total":32},
|
||||
{"name":"rank","passed":23,"failed":0,"total":23},
|
||||
{"name":"integration","passed":21,"failed":0,"total":21},
|
||||
{"name":"prefix","passed":14,"failed":0,"total":14},
|
||||
{"name":"page","passed":12,"failed":0,"total":12},
|
||||
{"name":"fuzzy","passed":18,"failed":0,"total":18},
|
||||
{"name":"highlight","passed":12,"failed":0,"total":12},
|
||||
{"name":"stem","passed":18,"failed":0,"total":18},
|
||||
{"name":"near","passed":9,"failed":0,"total":9},
|
||||
{"name":"syn","passed":9,"failed":0,"total":9},
|
||||
{"name":"rankq","passed":11,"failed":0,"total":11},
|
||||
{"name":"suggest","passed":9,"failed":0,"total":9}
|
||||
],
|
||||
"generated": "2026-06-07T00:44:05+00:00"
|
||||
}
|
||||
20
lib/search/scoreboard.md
Normal file
20
lib/search/scoreboard.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# search scoreboard
|
||||
|
||||
**234 / 234 passing** (0 failure(s)).
|
||||
|
||||
| Suite | Passed | Total | Status |
|
||||
|-------|--------|-------|--------|
|
||||
| index | 18 | 18 | ok |
|
||||
| boolean | 28 | 28 | ok |
|
||||
| parse | 32 | 32 | ok |
|
||||
| rank | 23 | 23 | ok |
|
||||
| integration | 21 | 21 | ok |
|
||||
| prefix | 14 | 14 | ok |
|
||||
| page | 12 | 12 | ok |
|
||||
| fuzzy | 18 | 18 | ok |
|
||||
| highlight | 12 | 12 | ok |
|
||||
| stem | 18 | 18 | ok |
|
||||
| near | 9 | 9 | ok |
|
||||
| syn | 9 | 9 | ok |
|
||||
| rankq | 11 | 11 | ok |
|
||||
| suggest | 9 | 9 | ok |
|
||||
15
lib/search/stem.sx
Normal file
15
lib/search/stem.sx
Normal file
@@ -0,0 +1,15 @@
|
||||
;; search stemming — Haskell source fragment. Depends on tokenize + index.
|
||||
;; Lightweight, deterministic English suffix stripping (recall-improving
|
||||
;; normalizer). Rules are checked most-specific first; conservative length guards
|
||||
;; avoid mangling short words. Not a full Porter stemmer.
|
||||
;; Gotcha: take/drop over a String yield char CODES (ints), not char strings, so
|
||||
;; rebuild strings with `stStr = joinChars . map chr`. (isSuffixOf's reverse also
|
||||
;; trips `++` on the String representation, hence the manual stEnds.)
|
||||
;; stem :: String -> String
|
||||
;; stemText :: String -> String (tokenize + stem + rejoin)
|
||||
;; stemTokens :: String -> [String]
|
||||
;; indexStemmed:: DocId -> String -> Index -> Index (index the stemmed text)
|
||||
|
||||
(define
|
||||
search/stem-src
|
||||
"stStr cs = joinChars (map chr cs)\nstEnds suf w = let n = length w in let m = length suf in if m > n then False else stStr (drop (n - m) w) == suf\nstDropEnd k w = stStr (take (length w - k) w)\nstem w = if stEnds \"ies\" w && length w >= 5 then stDropEnd 3 w ++ \"y\" else if stEnds \"ss\" w then w else if stEnds \"es\" w && length w >= 5 then stDropEnd 2 w else if stEnds \"s\" w && length w >= 4 then stDropEnd 1 w else if stEnds \"ing\" w && length w >= 6 then stDropEnd 3 w else if stEnds \"ed\" w && length w >= 5 then stDropEnd 2 w else w\nstemTokens s = map stem (tokens s)\nstemText s = unwords (stemTokens s)\nindexStemmed d text idx = indexDoc d (stemText text) idx\n")
|
||||
9
lib/search/suggest.sx
Normal file
9
lib/search/suggest.sx
Normal file
@@ -0,0 +1,9 @@
|
||||
;; search did-you-mean / spelling suggestion — Haskell source fragment.
|
||||
;; Depends on fuzzy (editDist) + index (allTerms). Ranks indexed terms by edit
|
||||
;; distance to a (possibly misspelled) query term; ties broken alphabetically.
|
||||
;; suggestN :: Int -> String -> Index -> [Term]
|
||||
;; suggest :: String -> Index -> Term ("" if the index has no terms)
|
||||
|
||||
(define
|
||||
search/suggest-src
|
||||
"sgMk term t = (editDist term t, t)\nsgPairs term idx = map (sgMk term) (allTerms idx)\nsgCmp p1 p2 = if fst p1 < fst p2 then LT else if fst p1 > fst p2 then GT else compare (snd p1) (snd p2)\nsuggestN n term idx = take n (map snd (sortBy sgCmp (sgPairs term idx)))\nsgHead [] = \"\"\nsgHead (x:xs) = x\nsuggest term idx = sgHead (suggestN 1 term idx)\n")
|
||||
10
lib/search/syn.sx
Normal file
10
lib/search/syn.sx
Normal file
@@ -0,0 +1,10 @@
|
||||
;; search synonym / query expansion — Haskell source fragment. Depends on index +
|
||||
;; rank. A synonym map is an assoc list [(Term, [Term])]; a query term is expanded
|
||||
;; to itself plus its synonyms, then the expanded set is unioned / ranked.
|
||||
;; expandTerm :: [(Term,[Term])] -> Term -> [Term]
|
||||
;; synDocs :: [(Term,[Term])] -> Term -> Index -> [DocId]
|
||||
;; synRankTfIdf :: [(Term,[Term])] -> Term -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/syn-src
|
||||
"synLookup synmap t = case lookup t synmap of { Nothing -> [] ; Just ss -> ss }\nexpandTerm synmap t = t : synLookup synmap t\nsynDocs synmap t idx = foldl (candStep idx) [] (expandTerm synmap t)\nsynRankTfIdf synmap t idx = rankTfIdf (expandTerm synmap t) idx\n")
|
||||
50
lib/search/testlib.sx
Normal file
50
lib/search/testlib.sx
Normal file
@@ -0,0 +1,50 @@
|
||||
;; search test helpers — convert forced haskell values to plain SX and run
|
||||
;; programs built on top of search/src. Reuses hk-test / counters from
|
||||
;; lib/haskell/testlib.sx (preloaded by the conformance config).
|
||||
|
||||
;; Recursively turn a forced HK value into plain SX:
|
||||
;; cons-list -> SX list, Tuple -> SX list, leaves unchanged.
|
||||
(define
|
||||
search-hk->sx
|
||||
(fn
|
||||
(v)
|
||||
(cond
|
||||
((and (list? v) (not (empty? v)) (= (first v) "[]")) (list))
|
||||
((and (list? v) (not (empty? v)) (= (first v) ":"))
|
||||
(cons
|
||||
(search-hk->sx (nth v 1))
|
||||
(search-hk->sx (nth v 2))))
|
||||
((and (list? v) (not (empty? v)) (= (first v) "Tuple"))
|
||||
(map search-hk->sx (rest v)))
|
||||
(:else v))))
|
||||
|
||||
;; Evaluate `extra` (extra top-level Haskell bindings) on top of search/src
|
||||
;; and return binding `name` as plain SX.
|
||||
(define
|
||||
search-eval
|
||||
(fn
|
||||
(extra name)
|
||||
(search-hk->sx
|
||||
(hk-deep-force
|
||||
(get (hk-eval-program (hk-core (str search/src extra))) name)))))
|
||||
|
||||
(define
|
||||
search-join
|
||||
(fn
|
||||
(sep xs)
|
||||
(cond
|
||||
((empty? xs) "")
|
||||
((empty? (rest xs)) (first xs))
|
||||
(:else (str (first xs) sep (search-join sep (rest xs)))))))
|
||||
|
||||
;; Batch many haskell expressions into ONE program evaluation (amortizes the
|
||||
;; cost of parsing/binding search/src — important under heavy CPU load).
|
||||
;; `setup` is extra top-level Haskell; `exprs` is a list of expression strings
|
||||
;; whose results form a single haskell list. Returns the SX list of results.
|
||||
(define
|
||||
search-batch
|
||||
(fn
|
||||
(setup exprs)
|
||||
(search-eval
|
||||
(str setup "\nresult = [" (search-join ", " exprs) "]\n")
|
||||
"result")))
|
||||
123
lib/search/tests/boolean.sx
Normal file
123
lib/search/tests/boolean.sx
Normal file
@@ -0,0 +1,123 @@
|
||||
;; Phase 2 — query AST + boolean/phrase evaluation (hand-built Query values).
|
||||
;; Corpus:
|
||||
;; doc 1 "the quick brown dog" -> the quick brown dog
|
||||
;; doc 2 "a quick brown fox" -> a quick brown fox
|
||||
;; doc 3 "the dog barks loudly" -> the dog barks loudly
|
||||
;; All queries run in ONE program evaluation (search-batch) to stay fast.
|
||||
|
||||
(define
|
||||
search-corpus
|
||||
"idx = indexDoc 3 \"the dog barks loudly\" (indexDoc 2 \"a quick brown fox\" (indexDoc 1 \"the quick brown dog\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
bool-cases
|
||||
(list
|
||||
(list
|
||||
"term in two docs"
|
||||
"evalQuery idx (Term \"quick\")"
|
||||
(list 1 2))
|
||||
(list
|
||||
"term in two docs (the)"
|
||||
"evalQuery idx (Term \"the\")"
|
||||
(list 1 3))
|
||||
(list "term in one doc" "evalQuery idx (Term \"fox\")" (list 2))
|
||||
(list "term absent" "evalQuery idx (Term \"zzz\")" (list))
|
||||
(list
|
||||
"term case-sensitive at AST level"
|
||||
"evalQuery idx (Term \"QUICK\")"
|
||||
(list))
|
||||
(list "term on empty index" "evalQuery emptyIndex (Term \"cat\")" (list))
|
||||
(list
|
||||
"and both terms"
|
||||
"evalQuery idx (And (Term \"quick\") (Term \"brown\"))"
|
||||
(list 1 2))
|
||||
(list
|
||||
"and overlap subset"
|
||||
"evalQuery idx (And (Term \"the\") (Term \"dog\"))"
|
||||
(list 1 3))
|
||||
(list
|
||||
"and disjoint is empty"
|
||||
"evalQuery idx (And (Term \"the\") (Term \"fox\"))"
|
||||
(list))
|
||||
(list
|
||||
"and right-nested"
|
||||
"evalQuery idx (And (Term \"the\") (And (Term \"dog\") (Term \"barks\")))"
|
||||
(list 3))
|
||||
(list
|
||||
"or two singletons"
|
||||
"evalQuery idx (Or (Term \"fox\") (Term \"barks\"))"
|
||||
(list 2 3))
|
||||
(list
|
||||
"or all docs"
|
||||
"evalQuery idx (Or (Term \"quick\") (Term \"the\"))"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"or with absent term"
|
||||
"evalQuery idx (Or (Term \"fox\") (Term \"zzz\"))"
|
||||
(list 2))
|
||||
(list "not term" "evalQuery idx (Not (Term \"the\"))" (list 2))
|
||||
(list "not term 2" "evalQuery idx (Not (Term \"quick\"))" (list 3))
|
||||
(list
|
||||
"and with not"
|
||||
"evalQuery idx (And (Term \"quick\") (Not (Term \"the\")))"
|
||||
(list 2))
|
||||
(list
|
||||
"double negation"
|
||||
"evalQuery idx (Not (Not (Term \"fox\")))"
|
||||
(list 2))
|
||||
(list
|
||||
"or of and with term"
|
||||
"evalQuery idx (Or (And (Term \"the\") (Term \"dog\")) (Term \"fox\"))"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"phrase adjacent both docs"
|
||||
"evalQuery idx (Phrase [\"quick\", \"brown\"])"
|
||||
(list 1 2))
|
||||
(list
|
||||
"phrase adjacent one doc"
|
||||
"evalQuery idx (Phrase [\"brown\", \"dog\"])"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase the quick"
|
||||
"evalQuery idx (Phrase [\"the\", \"quick\"])"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase dog barks"
|
||||
"evalQuery idx (Phrase [\"dog\", \"barks\"])"
|
||||
(list 3))
|
||||
(list
|
||||
"phrase non-adjacent empty"
|
||||
"evalQuery idx (Phrase [\"quick\", \"dog\"])"
|
||||
(list))
|
||||
(list
|
||||
"phrase order matters"
|
||||
"evalQuery idx (Phrase [\"brown\", \"quick\"])"
|
||||
(list))
|
||||
(list
|
||||
"phrase single term"
|
||||
"evalQuery idx (Phrase [\"dog\"])"
|
||||
(list 1 3))
|
||||
(list
|
||||
"phrase three terms"
|
||||
"evalQuery idx (Phrase [\"the\", \"dog\", \"barks\"])"
|
||||
(list 3))
|
||||
(list
|
||||
"and of phrase and term"
|
||||
"evalQuery idx (And (Phrase [\"quick\", \"brown\"]) (Term \"dog\"))"
|
||||
(list 1))
|
||||
(list
|
||||
"not of phrase"
|
||||
"evalQuery idx (Not (Phrase [\"quick\", \"brown\"]))"
|
||||
(list 3))))
|
||||
|
||||
(define
|
||||
bool-results
|
||||
(search-batch search-corpus (map (fn (c) (nth c 1)) bool-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth bool-results i) (nth c 2)))
|
||||
bool-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
74
lib/search/tests/fuzzy.sx
Normal file
74
lib/search/tests/fuzzy.sx
Normal file
@@ -0,0 +1,74 @@
|
||||
;; Extension — fuzzy matching via Levenshtein edit distance.
|
||||
;; Corpus: 1 "color flavor" 2 "colour kitten" 3 "colored"
|
||||
;; allTerms: color colored colour flavor kitten
|
||||
|
||||
(define
|
||||
fuzzy-setup
|
||||
"idx = indexDoc 3 \"colored\" (indexDoc 2 \"colour kitten\" (indexDoc 1 \"color flavor\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
fuzzy-cases
|
||||
(list
|
||||
(list
|
||||
"editDist substitution"
|
||||
"[editDist \"kitten\" \"sitten\"]"
|
||||
(list 1))
|
||||
(list "editDist equal" "[editDist \"abc\" \"abc\"]" (list 0))
|
||||
(list "editDist deletion" "[editDist \"abc\" \"ab\"]" (list 1))
|
||||
(list "editDist insertion" "[editDist \"ab\" \"abc\"]" (list 1))
|
||||
(list "editDist from empty" "[editDist \"\" \"abc\"]" (list 3))
|
||||
(list "editDist both empty" "[editDist \"\" \"\"]" (list 0))
|
||||
(list
|
||||
"editDist classic"
|
||||
"[editDist \"kitten\" \"sitting\"]"
|
||||
(list 3))
|
||||
(list
|
||||
"editDist color colour"
|
||||
"[editDist \"color\" \"colour\"]"
|
||||
(list 1))
|
||||
(list
|
||||
"editDist color colored"
|
||||
"[editDist \"color\" \"colored\"]"
|
||||
(list 2))
|
||||
(list
|
||||
"fuzzy terms dist 1"
|
||||
"fuzzyTerms 1 \"color\" idx"
|
||||
(list "color" "colour"))
|
||||
(list
|
||||
"fuzzy terms dist 2"
|
||||
"fuzzyTerms 2 \"color\" idx"
|
||||
(list "color" "colored" "colour"))
|
||||
(list "fuzzy terms exact" "fuzzyTerms 0 \"color\" idx" (list "color"))
|
||||
(list
|
||||
"fuzzy terms other word"
|
||||
"fuzzyTerms 1 \"flavour\" idx"
|
||||
(list "flavor"))
|
||||
(list
|
||||
"fuzzy docs dist 1"
|
||||
"fuzzyDocs 1 \"color\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"fuzzy docs dist 2"
|
||||
"fuzzyDocs 2 \"color\" idx"
|
||||
(list 1 2 3))
|
||||
(list "fuzzy docs none" "fuzzyDocs 1 \"zzzzz\" idx" (list))
|
||||
(list
|
||||
"fuzzy rank dist 1"
|
||||
"fuzzyRankTfIdf 1 \"color\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"fuzzy rank dist 2"
|
||||
"fuzzyRankTfIdf 2 \"color\" idx"
|
||||
(list 1 2 3))))
|
||||
|
||||
(define
|
||||
fuzzy-results
|
||||
(search-batch fuzzy-setup (map (fn (c) (nth c 1)) fuzzy-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth fuzzy-results i) (nth c 2)))
|
||||
fuzzy-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
66
lib/search/tests/highlight.sx
Normal file
66
lib/search/tests/highlight.sx
Normal file
@@ -0,0 +1,66 @@
|
||||
;; Extension — highlight + snippet over document text.
|
||||
;; Text: "the quick brown fox jumps"
|
||||
|
||||
(define
|
||||
hl-cases
|
||||
(list
|
||||
(list
|
||||
"highlight two terms"
|
||||
"highlight [\"quick\", \"fox\"] \"the quick brown fox jumps\""
|
||||
"the [quick] brown [fox] jumps")
|
||||
(list
|
||||
"highlight none"
|
||||
"highlight [] \"the quick brown fox jumps\""
|
||||
"the quick brown fox jumps")
|
||||
(list
|
||||
"highlight absent term"
|
||||
"highlight [\"zzz\"] \"the quick brown fox jumps\""
|
||||
"the quick brown fox jumps")
|
||||
(list
|
||||
"highlight first token"
|
||||
"highlight [\"the\"] \"the quick brown fox jumps\""
|
||||
"[the] quick brown fox jumps")
|
||||
(list
|
||||
"highlight normalizes text"
|
||||
"highlight [\"quick\"] \"The Quick, brown!\""
|
||||
"the [quick] brown")
|
||||
(list
|
||||
"snippet around middle"
|
||||
"snippet 1 [\"brown\"] \"the quick brown fox jumps\""
|
||||
"quick [brown] fox")
|
||||
(list
|
||||
"snippet at start"
|
||||
"snippet 1 [\"the\"] \"the quick brown fox jumps\""
|
||||
"[the] quick brown")
|
||||
(list
|
||||
"snippet near end"
|
||||
"snippet 1 [\"fox\"] \"the quick brown fox jumps\""
|
||||
"brown [fox] jumps")
|
||||
(list
|
||||
"snippet ctx zero"
|
||||
"snippet 0 [\"brown\"] \"the quick brown fox jumps\""
|
||||
"[brown]")
|
||||
(list
|
||||
"snippet clamps at end"
|
||||
"snippet 2 [\"jumps\"] \"the quick brown fox jumps\""
|
||||
"brown fox [jumps]")
|
||||
(list
|
||||
"snippet no match shows head"
|
||||
"snippet 1 [\"zzz\"] \"the quick brown fox jumps\""
|
||||
"the quick brown")
|
||||
(list
|
||||
"snippet wide window"
|
||||
"snippet 5 [\"brown\"] \"the quick brown fox jumps\""
|
||||
"the quick [brown] fox jumps")))
|
||||
|
||||
(define
|
||||
hl-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) hl-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth hl-results i) (nth c 2)))
|
||||
hl-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
88
lib/search/tests/index.sx
Normal file
88
lib/search/tests/index.sx
Normal file
@@ -0,0 +1,88 @@
|
||||
;; Phase 1 — tokenize + inverted index.
|
||||
;; All cases run in ONE program evaluation (search-batch) to stay fast under load.
|
||||
;; Scalar results (docFreq) are wrapped as singleton lists so the batch is a list
|
||||
;; of lists.
|
||||
|
||||
(define
|
||||
index-cases
|
||||
(list
|
||||
(list
|
||||
"tokens basic lowercases"
|
||||
"tokens \"The Cat sat\""
|
||||
(list "the" "cat" "sat"))
|
||||
(list
|
||||
"tokens strips punctuation"
|
||||
"tokens \"Hello, World!\""
|
||||
(list "hello" "world"))
|
||||
(list "tokens collapses whitespace" "tokens \" a b \"" (list "a" "b"))
|
||||
(list "tokens empty is empty" "tokens \"\"" (list))
|
||||
(list "tokens keeps digits" "tokens \"abc123 x9\"" (list "abc123" "x9"))
|
||||
(list
|
||||
"positioned attaches ordinals"
|
||||
"positioned \"a b a\""
|
||||
(list
|
||||
(list "a" 0)
|
||||
(list "b" 1)
|
||||
(list "a" 2)))
|
||||
(list
|
||||
"index + lookup single doc"
|
||||
"lookupTerm \"cat\" (indexDoc 1 \"the cat sat\" emptyIndex)"
|
||||
(list (list 1 (list 1))))
|
||||
(list
|
||||
"lookup missing term is empty"
|
||||
"lookupTerm \"dog\" (indexDoc 1 \"the cat sat\" emptyIndex)"
|
||||
(list))
|
||||
(list
|
||||
"lookup records all positions"
|
||||
"lookupTerm \"the\" (indexDoc 1 \"the cat the dog the\" emptyIndex)"
|
||||
(list (list 1 (list 0 2 4))))
|
||||
(list
|
||||
"multi-doc posting list sorted by docid"
|
||||
"lookupTerm \"x\" (indexDoc 1 \"x y\" (indexDoc 2 \"x z\" emptyIndex))"
|
||||
(list
|
||||
(list 1 (list 0))
|
||||
(list 2 (list 0))))
|
||||
(list
|
||||
"index/query case symmetry"
|
||||
"lookupTerm \"cat\" (indexDoc 1 \"CAT Cat cat\" emptyIndex)"
|
||||
(list (list 1 (list 0 1 2))))
|
||||
(list
|
||||
"re-index replaces a doc"
|
||||
"lookupTerm \"a\" (indexDoc 1 \"a a a\" (indexDoc 1 \"a\" emptyIndex))"
|
||||
(list (list 1 (list 0 1 2))))
|
||||
(list
|
||||
"delete removes a doc"
|
||||
"lookupTerm \"cat\" (deleteDoc 1 (indexDoc 1 \"the cat\" emptyIndex))"
|
||||
(list))
|
||||
(list
|
||||
"delete leaves other docs"
|
||||
"lookupTerm \"cat\" (deleteDoc 2 (indexDoc 2 \"big cat\" (indexDoc 1 \"the cat\" emptyIndex)))"
|
||||
(list (list 1 (list 1))))
|
||||
(list
|
||||
"docFreq counts docs"
|
||||
"[docFreq \"cat\" (indexDoc 2 \"a cat\" (indexDoc 1 \"the cat\" emptyIndex))]"
|
||||
(list 2))
|
||||
(list
|
||||
"docFreq zero for missing"
|
||||
"[docFreq \"zzz\" (indexDoc 1 \"a b\" emptyIndex)]"
|
||||
(list 0))
|
||||
(list
|
||||
"allTerms sorted and unique"
|
||||
"allTerms (indexDoc 1 \"banana apple cherry apple\" emptyIndex)"
|
||||
(list "apple" "banana" "cherry"))
|
||||
(list
|
||||
"allTerms merged across docs"
|
||||
"allTerms (indexDoc 2 \"d a\" (indexDoc 1 \"c b\" emptyIndex))"
|
||||
(list "a" "b" "c" "d"))))
|
||||
|
||||
(define
|
||||
index-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) index-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth index-results i) (nth c 2)))
|
||||
index-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
102
lib/search/tests/integration.sx
Normal file
102
lib/search/tests/integration.sx
Normal file
@@ -0,0 +1,102 @@
|
||||
;; Phase 4 — federation (merge per-peer indices) + ACL post-filter.
|
||||
;; Peers (global id = peer*1000 + local):
|
||||
;; peer 1: 1 "alpha beta" 2 "alpha gamma" -> 1001 1002
|
||||
;; peer 2: 1 "alpha delta" 2 "beta gamma" -> 2001 2002
|
||||
;; ACL predicates are injected (viewer baked in by the caller), applied post-rank.
|
||||
|
||||
(define
|
||||
fed-setup
|
||||
"p1 = indexDoc 2 \"alpha gamma\" (indexDoc 1 \"alpha beta\" emptyIndex)\np2 = indexDoc 2 \"beta gamma\" (indexDoc 1 \"alpha delta\" emptyIndex)\nfed = fedIndex [(1, p1), (2, p2)]\npermitP1 g = g < 2000\npermitNone g = False\npermitList g = elem g [1002, 2001]\n")
|
||||
|
||||
(define
|
||||
fed-cases
|
||||
(list
|
||||
(list
|
||||
"fed merges all docs"
|
||||
"sort (allDocs fed)"
|
||||
(list 1001 1002 2001 2002))
|
||||
(list
|
||||
"fed docFreq across peers"
|
||||
"[docFreq \"alpha\" fed]"
|
||||
(list 3))
|
||||
(list "fed docFreq beta" "[docFreq \"beta\" fed]" (list 2))
|
||||
(list "fed numDocs" "[numDocs fed]" (list 4))
|
||||
(list
|
||||
"fed term lookup spans peers"
|
||||
"map fst (lookupTerm \"gamma\" fed)"
|
||||
(list 1002 2002))
|
||||
(list
|
||||
"fed preserves positions"
|
||||
"lookupTerm \"beta\" fed"
|
||||
(list
|
||||
(list 1001 (list 1))
|
||||
(list 2002 (list 0))))
|
||||
(list
|
||||
"fed rank alpha tie by gid"
|
||||
"rankTfIdf [\"alpha\"] fed"
|
||||
(list 1001 1002 2001))
|
||||
(list
|
||||
"fed rank beta"
|
||||
"rankTfIdf [\"beta\"] fed"
|
||||
(list 1001 2002))
|
||||
(list
|
||||
"fed boolean and"
|
||||
"searchQuery \"alpha AND beta\" fed"
|
||||
(list 1001))
|
||||
(list
|
||||
"fed boolean or"
|
||||
"searchQuery \"delta OR barks\" fed"
|
||||
(list 2001))
|
||||
(list
|
||||
"fed phrase within peer1"
|
||||
"searchQuery \"\\\"alpha beta\\\"\" fed"
|
||||
(list 1001))
|
||||
(list
|
||||
"fed phrase within peer2"
|
||||
"searchQuery \"\\\"beta gamma\\\"\" fed"
|
||||
(list 2002))
|
||||
(list
|
||||
"fed phrase peer2 alpha delta"
|
||||
"searchQuery \"\\\"alpha delta\\\"\" fed"
|
||||
(list 2001))
|
||||
(list "fed empty peer list" "allDocs (fedIndex [])" (list))
|
||||
(list
|
||||
"fed single relabelled peer"
|
||||
"rankTfIdf [\"alpha\"] (fedIndex [(5, p1)])"
|
||||
(list 5001 5002))
|
||||
(list
|
||||
"acl peer1 only"
|
||||
"aclFilter permitP1 (rankTfIdf [\"alpha\"] fed)"
|
||||
(list 1001 1002))
|
||||
(list
|
||||
"acl allowlist preserves rank order"
|
||||
"aclFilter permitList (rankTfIdf [\"alpha\"] fed)"
|
||||
(list 1002 2001))
|
||||
(list
|
||||
"acl topN after filter"
|
||||
"topNTfIdfAcl 1 permitP1 [\"alpha\"] fed"
|
||||
(list 1001))
|
||||
(list
|
||||
"acl denies all"
|
||||
"aclFilter permitNone (rankTfIdf [\"alpha\"] fed)"
|
||||
(list))
|
||||
(list
|
||||
"acl on bm25"
|
||||
"searchBm25Acl permitP1 1.5 0.75 [\"alpha\"] fed"
|
||||
(list 1001 1002))
|
||||
(list
|
||||
"acl end-to-end tfidf"
|
||||
"searchTfIdfAcl permitP1 [\"alpha\"] fed"
|
||||
(list 1001 1002))))
|
||||
|
||||
(define
|
||||
fed-results
|
||||
(search-batch fed-setup (map (fn (c) (nth c 1)) fed-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth fed-results i) (nth c 2)))
|
||||
fed-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
49
lib/search/tests/near.sx
Normal file
49
lib/search/tests/near.sx
Normal file
@@ -0,0 +1,49 @@
|
||||
;; Extension — proximity (NEAR) search: terms within k positions, unordered.
|
||||
;; Corpus:
|
||||
;; 1 "the quick brown fox" the0 quick1 brown2 fox3
|
||||
;; 2 "quick the lazy fox dog" quick0 the1 lazy2 fox3 dog4
|
||||
;; 3 "fox runs quick" fox0 runs1 quick2
|
||||
|
||||
(define
|
||||
near-setup
|
||||
"idx = indexDoc 3 \"fox runs quick\" (indexDoc 2 \"quick the lazy fox dog\" (indexDoc 1 \"the quick brown fox\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
near-cases
|
||||
(list
|
||||
(list
|
||||
"near adjacent one doc"
|
||||
"nearDocs 1 \"quick\" \"brown\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"near adjacent both docs"
|
||||
"nearDocs 1 \"quick\" \"the\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"near within 2"
|
||||
"nearDocs 2 \"quick\" \"fox\" idx"
|
||||
(list 1 3))
|
||||
(list "near too far at k1" "nearDocs 1 \"quick\" \"fox\" idx" (list))
|
||||
(list
|
||||
"near unordered symmetric"
|
||||
"nearDocs 2 \"fox\" \"quick\" idx"
|
||||
(list 1 3))
|
||||
(list "near wider window" "nearDocs 5 \"the\" \"dog\" idx" (list 2))
|
||||
(list "near absent term" "nearDocs 1 \"quick\" \"zzz\" idx" (list))
|
||||
(list "near needs both terms" "nearDocs 3 \"brown\" \"dog\" idx" (list))
|
||||
(list
|
||||
"near same docs only"
|
||||
"nearDocs 3 \"fox\" \"runs\" idx"
|
||||
(list 3))))
|
||||
|
||||
(define
|
||||
near-results
|
||||
(search-batch near-setup (map (fn (c) (nth c 1)) near-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth near-results i) (nth c 2)))
|
||||
near-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
53
lib/search/tests/page.sx
Normal file
53
lib/search/tests/page.sx
Normal file
@@ -0,0 +1,53 @@
|
||||
;; Extension — result pagination (offset / limit) over ranked results.
|
||||
;; Corpus (tf of "x" descending): 1 x4 2 x3 3 x2 4 x1 5 y(no x)
|
||||
;; rankTfIdf ["x"] -> [1,2,3,4]
|
||||
|
||||
(define
|
||||
page-setup
|
||||
"idx = indexDoc 5 \"y\" (indexDoc 4 \"x\" (indexDoc 3 \"x x\" (indexDoc 2 \"x x x\" (indexDoc 1 \"x x x x other\" emptyIndex))))\n")
|
||||
|
||||
(define
|
||||
page-cases
|
||||
(list
|
||||
(list "first page" "pageTfIdf 0 2 [\"x\"] idx" (list 1 2))
|
||||
(list
|
||||
"second page"
|
||||
"pageTfIdf 2 2 [\"x\"] idx"
|
||||
(list 3 4))
|
||||
(list
|
||||
"sliding window"
|
||||
"pageTfIdf 1 2 [\"x\"] idx"
|
||||
(list 2 3))
|
||||
(list
|
||||
"limit exceeds remaining"
|
||||
"pageTfIdf 3 10 [\"x\"] idx"
|
||||
(list 4))
|
||||
(list "offset past end" "pageTfIdf 4 2 [\"x\"] idx" (list))
|
||||
(list "limit zero" "pageTfIdf 0 0 [\"x\"] idx" (list))
|
||||
(list
|
||||
"whole result"
|
||||
"pageTfIdf 0 10 [\"x\"] idx"
|
||||
(list 1 2 3 4))
|
||||
(list
|
||||
"paginate raw list"
|
||||
"paginate 1 2 [10, 20, 30, 40]"
|
||||
(list 20 30))
|
||||
(list "paginate raw past end" "paginate 9 2 [10, 20]" (list))
|
||||
(list
|
||||
"bm25 page window size"
|
||||
"[length (pageBm25 0 2 1.5 0.75 [\"x\"] idx)]"
|
||||
(list 2))
|
||||
(list "result count" "[resultCount [\"x\"] idx]" (list 4))
|
||||
(list "result count zero" "[resultCount [\"zzz\"] idx]" (list 0))))
|
||||
|
||||
(define
|
||||
page-results
|
||||
(search-batch page-setup (map (fn (c) (nth c 1)) page-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth page-results i) (nth c 2)))
|
||||
page-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
139
lib/search/tests/parse.sx
Normal file
139
lib/search/tests/parse.sx
Normal file
@@ -0,0 +1,139 @@
|
||||
;; Phase 2 — query parser (parseQuery / searchQuery).
|
||||
;; AST cases assert showQ (parseQuery s); search cases assert searchQuery s idx
|
||||
;; against the standard corpus. Each group runs in one batched program eval.
|
||||
;; doc 1 "the quick brown dog" doc 2 "a quick brown fox" doc 3 "the dog barks loudly"
|
||||
|
||||
(define
|
||||
parse-corpus
|
||||
"idx = indexDoc 3 \"the dog barks loudly\" (indexDoc 2 \"a quick brown fox\" (indexDoc 1 \"the quick brown dog\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
ast-cases
|
||||
(list
|
||||
(list "single term" "showQ (parseQuery \"cat\")" "T:cat")
|
||||
(list "term normalized" "showQ (parseQuery \"CAT\")" "T:cat")
|
||||
(list "explicit and" "showQ (parseQuery \"cat AND dog\")" "(T:cat & T:dog)")
|
||||
(list
|
||||
"lowercase and keyword"
|
||||
"showQ (parseQuery \"cat and dog\")"
|
||||
"(T:cat & T:dog)")
|
||||
(list "implicit and" "showQ (parseQuery \"cat dog\")" "(T:cat & T:dog)")
|
||||
(list "or" "showQ (parseQuery \"cat OR dog\")" "(T:cat | T:dog)")
|
||||
(list "not" "showQ (parseQuery \"NOT cat\")" "!T:cat")
|
||||
(list
|
||||
"and binds tighter than or"
|
||||
"showQ (parseQuery \"cat AND dog OR bird\")"
|
||||
"((T:cat & T:dog) | T:bird)")
|
||||
(list
|
||||
"or then and"
|
||||
"showQ (parseQuery \"cat OR dog AND bird\")"
|
||||
"(T:cat | (T:dog & T:bird))")
|
||||
(list
|
||||
"parens override precedence"
|
||||
"showQ (parseQuery \"(cat OR dog) AND bird\")"
|
||||
"((T:cat | T:dog) & T:bird)")
|
||||
(list
|
||||
"and with not"
|
||||
"showQ (parseQuery \"cat AND NOT dog\")"
|
||||
"(T:cat & !T:dog)")
|
||||
(list
|
||||
"two-word phrase"
|
||||
"showQ (parseQuery \"\\\"quick brown\\\"\")"
|
||||
"P:quick-brown")
|
||||
(list
|
||||
"three-word phrase"
|
||||
"showQ (parseQuery \"\\\"quick brown fox\\\"\")"
|
||||
"P:quick-brown-fox")
|
||||
(list
|
||||
"and left-assoc"
|
||||
"showQ (parseQuery \"a AND b AND c\")"
|
||||
"((T:a & T:b) & T:c)")
|
||||
(list
|
||||
"or left-assoc"
|
||||
"showQ (parseQuery \"a OR b OR c\")"
|
||||
"((T:a | T:b) | T:c)")
|
||||
(list
|
||||
"punctuation stripped"
|
||||
"showQ (parseQuery \"cat, dog!\")"
|
||||
"(T:cat & T:dog)")))
|
||||
|
||||
(define
|
||||
search-cases
|
||||
(list
|
||||
(list "term" "searchQuery \"quick\" idx" (list 1 2))
|
||||
(list
|
||||
"term normalized"
|
||||
"searchQuery \"QUICK\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"explicit and"
|
||||
"searchQuery \"quick AND brown\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"implicit and"
|
||||
"searchQuery \"quick brown\" idx"
|
||||
(list 1 2))
|
||||
(list "and disjoint" "searchQuery \"the AND fox\" idx" (list))
|
||||
(list "or" "searchQuery \"fox OR barks\" idx" (list 2 3))
|
||||
(list "not" "searchQuery \"NOT the\" idx" (list 2))
|
||||
(list "and not" "searchQuery \"quick AND NOT the\" idx" (list 2))
|
||||
(list
|
||||
"precedence and-or"
|
||||
"searchQuery \"the AND dog OR fox\" idx"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"precedence or-and"
|
||||
"searchQuery \"fox OR the AND dog\" idx"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"parens"
|
||||
"searchQuery \"the AND (dog OR fox)\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"phrase"
|
||||
"searchQuery \"\\\"quick brown\\\"\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"phrase one doc"
|
||||
"searchQuery \"\\\"brown dog\\\"\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase and term"
|
||||
"searchQuery \"\\\"quick brown\\\" AND dog\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"not phrase"
|
||||
"searchQuery \"NOT \\\"quick brown\\\"\" idx"
|
||||
(list 3))
|
||||
(list
|
||||
"implicit and terms"
|
||||
"searchQuery \"dog barks\" idx"
|
||||
(list 3))))
|
||||
|
||||
(define
|
||||
ast-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) ast-cases)))
|
||||
(define
|
||||
search-results
|
||||
(search-batch
|
||||
parse-corpus
|
||||
(map (fn (c) (nth c 1)) search-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test
|
||||
(str "ast: " (nth c 0))
|
||||
(nth ast-results i)
|
||||
(nth c 2)))
|
||||
ast-cases)
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test
|
||||
(str "search: " (nth c 0))
|
||||
(nth search-results i)
|
||||
(nth c 2)))
|
||||
search-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
63
lib/search/tests/prefix.sx
Normal file
63
lib/search/tests/prefix.sx
Normal file
@@ -0,0 +1,63 @@
|
||||
;; Extension — prefix / wildcard queries.
|
||||
;; Corpus: 1 "alpha alpine" 2 "beta apple" 3 "banana alpha"
|
||||
;; allTerms sorted: alpha alpine apple banana beta
|
||||
|
||||
(define
|
||||
prefix-setup
|
||||
"idx = indexDoc 3 \"banana alpha\" (indexDoc 2 \"beta apple\" (indexDoc 1 \"alpha alpine\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
prefix-cases
|
||||
(list
|
||||
(list
|
||||
"prefix terms two matches"
|
||||
"prefixTerms \"al\" idx"
|
||||
(list "alpha" "alpine"))
|
||||
(list
|
||||
"prefix terms narrower"
|
||||
"prefixTerms \"alp\" idx"
|
||||
(list "alpha" "alpine"))
|
||||
(list
|
||||
"prefix terms wide"
|
||||
"prefixTerms \"a\" idx"
|
||||
(list "alpha" "alpine" "apple"))
|
||||
(list "prefix terms single" "prefixTerms \"ban\" idx" (list "banana"))
|
||||
(list "prefix terms exact term" "prefixTerms \"beta\" idx" (list "beta"))
|
||||
(list "prefix terms none" "prefixTerms \"z\" idx" (list))
|
||||
(list
|
||||
"prefix docs union"
|
||||
"prefixDocs \"al\" idx"
|
||||
(list 1 3))
|
||||
(list "prefix docs single term" "prefixDocs \"ban\" idx" (list 3))
|
||||
(list
|
||||
"prefix docs wide"
|
||||
"prefixDocs \"a\" idx"
|
||||
(list 1 2 3))
|
||||
(list "prefix docs none" "prefixDocs \"z\" idx" (list))
|
||||
(list
|
||||
"prefix docs exact"
|
||||
"prefixDocs \"alpha\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"prefix rank ranks by matched terms"
|
||||
"prefixRankTfIdf \"al\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"prefix rank single doc"
|
||||
"prefixRankTfIdf \"ban\" idx"
|
||||
(list 3))
|
||||
(list "prefix rank empty" "prefixRankTfIdf \"z\" idx" (list))))
|
||||
|
||||
(define
|
||||
prefix-results
|
||||
(search-batch
|
||||
prefix-setup
|
||||
(map (fn (c) (nth c 1)) prefix-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth prefix-results i) (nth c 2)))
|
||||
prefix-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
90
lib/search/tests/rank.sx
Normal file
90
lib/search/tests/rank.sx
Normal file
@@ -0,0 +1,90 @@
|
||||
;; Phase 3 — ranking (TF-IDF, BM25, top-N). Deterministic: ties broken by DocId.
|
||||
;; Corpora:
|
||||
;; idx1: 1 "alpha alpha alpha gamma" 2 "alpha" 3 "beta"
|
||||
;; idx2: 1 "cat" 2 "cat cat dog elephant frog grape" 3 "zzz"
|
||||
;; idx3: 1 "kite" 2 "kite" (identical docs -> tiebreak)
|
||||
|
||||
(define
|
||||
rank-setup
|
||||
"idx1 = indexDoc 3 \"beta\" (indexDoc 2 \"alpha\" (indexDoc 1 \"alpha alpha alpha gamma\" emptyIndex))\nidx2 = indexDoc 3 \"zzz\" (indexDoc 2 \"cat cat dog elephant frog grape\" (indexDoc 1 \"cat\" emptyIndex))\nidx3 = indexDoc 2 \"kite\" (indexDoc 1 \"kite\" emptyIndex)\n")
|
||||
|
||||
(define
|
||||
rank-cases
|
||||
(list
|
||||
(list
|
||||
"tfidf tf ordering"
|
||||
"rankTfIdf [\"alpha\"] idx1"
|
||||
(list 1 2))
|
||||
(list
|
||||
"tfidf rare term boosts"
|
||||
"rankTfIdf [\"alpha\", \"beta\"] idx1"
|
||||
(list 1 3 2))
|
||||
(list
|
||||
"tfidf single-doc term"
|
||||
"rankTfIdf [\"gamma\"] idx1"
|
||||
(list 1))
|
||||
(list "tfidf absent term empty" "rankTfIdf [\"nope\"] idx1" (list))
|
||||
(list "tfidf empty query empty" "rankTfIdf [] idx1" (list))
|
||||
(list
|
||||
"tfidf candidate union tie by docid"
|
||||
"rankTfIdf [\"beta\", \"gamma\"] idx1"
|
||||
(list 1 3))
|
||||
(list
|
||||
"tfidf tf ordering idx2"
|
||||
"rankTfIdf [\"cat\"] idx2"
|
||||
(list 2 1))
|
||||
(list "topN tfidf 1" "topNTfIdf 1 [\"alpha\"] idx1" (list 1))
|
||||
(list
|
||||
"topN tfidf 2"
|
||||
"topNTfIdf 2 [\"alpha\", \"beta\"] idx1"
|
||||
(list 1 3))
|
||||
(list
|
||||
"topN exceeds results"
|
||||
"topNTfIdf 10 [\"gamma\"] idx1"
|
||||
(list 1))
|
||||
(list "topN zero" "topNTfIdf 0 [\"alpha\"] idx1" (list))
|
||||
(list
|
||||
"bm25 tf+length flips tfidf"
|
||||
"rankBm25 1.5 0.75 [\"cat\"] idx2"
|
||||
(list 1 2))
|
||||
(list
|
||||
"bm25 b=0 ignores length"
|
||||
"rankBm25 1.5 0.0 [\"cat\"] idx2"
|
||||
(list 2 1))
|
||||
(list
|
||||
"bm25 alpha idx1"
|
||||
"rankBm25 1.5 0.75 [\"alpha\"] idx1"
|
||||
(list 1 2))
|
||||
(list "bm25 absent empty" "rankBm25 1.5 0.75 [\"nope\"] idx1" (list))
|
||||
(list
|
||||
"bm25 single-doc term"
|
||||
"rankBm25 1.5 0.75 [\"gamma\"] idx1"
|
||||
(list 1))
|
||||
(list "bm25 topN 1" "topNBm25 1 1.5 0.75 [\"cat\"] idx2" (list 1))
|
||||
(list
|
||||
"bm25 same candidate set"
|
||||
"sort (rankBm25 1.5 0.75 [\"alpha\", \"beta\"] idx1)"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"tfidf stable tiebreak"
|
||||
"rankTfIdf [\"kite\"] idx3"
|
||||
(list 1 2))
|
||||
(list
|
||||
"bm25 stable tiebreak"
|
||||
"rankBm25 1.5 0.75 [\"kite\"] idx3"
|
||||
(list 1 2))
|
||||
(list "numDocs" "[numDocs idx1]" (list 3))
|
||||
(list "docLen counts tokens" "[docLen 1 idx1]" (list 4))
|
||||
(list "docFreq via index" "[docFreq \"alpha\" idx1]" (list 2))))
|
||||
|
||||
(define
|
||||
rank-results
|
||||
(search-batch rank-setup (map (fn (c) (nth c 1)) rank-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth rank-results i) (nth c 2)))
|
||||
rank-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
67
lib/search/tests/rankq.sx
Normal file
67
lib/search/tests/rankq.sx
Normal file
@@ -0,0 +1,67 @@
|
||||
;; Extension — boolean-filtered ranked search (filter then rank by relevance).
|
||||
;; Corpus:
|
||||
;; 1 "apple apple banana" apple2 banana1
|
||||
;; 2 "apple cherry" apple1 cherry1
|
||||
;; 3 "banana cherry" banana1 cherry1
|
||||
;; 4 "apple banana cherry" apple1 banana1 cherry1
|
||||
|
||||
(define
|
||||
rankq-setup
|
||||
"idx = indexDoc 4 \"apple banana cherry\" (indexDoc 3 \"banana cherry\" (indexDoc 2 \"apple cherry\" (indexDoc 1 \"apple apple banana\" emptyIndex)))\n")
|
||||
|
||||
(define
|
||||
rankq-cases
|
||||
(list
|
||||
(list
|
||||
"queryTerms and"
|
||||
"queryTerms (parseQuery \"apple AND banana\")"
|
||||
(list "apple" "banana"))
|
||||
(list
|
||||
"queryTerms or not"
|
||||
"queryTerms (parseQuery \"a OR NOT b\")"
|
||||
(list "a" "b"))
|
||||
(list
|
||||
"queryTerms phrase"
|
||||
"queryTerms (parseQuery \"\\\"x y\\\" OR z\")"
|
||||
(list "x" "y" "z"))
|
||||
(list
|
||||
"and filter ranked by tf"
|
||||
"searchRankTfIdf \"apple AND banana\" idx"
|
||||
(list 1 4))
|
||||
(list
|
||||
"single term ranked tie"
|
||||
"searchRankTfIdf \"cherry\" idx"
|
||||
(list 2 3 4))
|
||||
(list
|
||||
"or filter ranked"
|
||||
"searchRankTfIdf \"apple OR banana\" idx"
|
||||
(list 1 4 2 3))
|
||||
(list
|
||||
"and-not narrows then ranks"
|
||||
"searchRankTfIdf \"apple AND NOT banana\" idx"
|
||||
(list 2))
|
||||
(list
|
||||
"phrase filter ranked"
|
||||
"searchRankTfIdf \"\\\"apple banana\\\"\" idx"
|
||||
(list 1 4))
|
||||
(list "no matches" "searchRankTfIdf \"zzz\" idx" (list))
|
||||
(list
|
||||
"bm25 boolean ranked subset"
|
||||
"sort (searchRankBm25 1.5 0.75 \"apple OR banana\" idx)"
|
||||
(list 1 2 3 4))
|
||||
(list
|
||||
"bm25 and filter"
|
||||
"searchRankBm25 1.5 0.75 \"apple AND NOT banana\" idx"
|
||||
(list 2))))
|
||||
|
||||
(define
|
||||
rankq-results
|
||||
(search-batch rankq-setup (map (fn (c) (nth c 1)) rankq-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth rankq-results i) (nth c 2)))
|
||||
rankq-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
47
lib/search/tests/stem.sx
Normal file
47
lib/search/tests/stem.sx
Normal file
@@ -0,0 +1,47 @@
|
||||
;; Extension — stemming (suffix stripping). Scalar string results wrapped in [].
|
||||
|
||||
(define
|
||||
stem-cases
|
||||
(list
|
||||
(list "stem plural s" "[stem \"cats\"]" (list "cat"))
|
||||
(list "stem plural dogs" "[stem \"dogs\"]" (list "dog"))
|
||||
(list "stem keeps ss" "[stem \"pass\"]" (list "pass"))
|
||||
(list "stem short s unchanged" "[stem \"is\"]" (list "is"))
|
||||
(list "stem es boxes" "[stem \"boxes\"]" (list "box"))
|
||||
(list "stem es wishes" "[stem \"wishes\"]" (list "wish"))
|
||||
(list "stem ies cities" "[stem \"cities\"]" (list "city"))
|
||||
(list "stem ies parties" "[stem \"parties\"]" (list "party"))
|
||||
(list "stem ing jumping" "[stem \"jumping\"]" (list "jump"))
|
||||
(list "stem ing running literal" "[stem \"running\"]" (list "runn"))
|
||||
(list "stem ed jumped" "[stem \"jumped\"]" (list "jump"))
|
||||
(list "stem ed wanted" "[stem \"wanted\"]" (list "want"))
|
||||
(list "stem short ed unchanged" "[stem \"red\"]" (list "red"))
|
||||
(list "stem no suffix" "[stem \"cat\"]" (list "cat"))
|
||||
(list
|
||||
"stemText normalizes and stems"
|
||||
"[stemText \"Cats Running!\"]"
|
||||
(list "cat runn"))
|
||||
(list
|
||||
"stemTokens list"
|
||||
"stemTokens \"boxes and cats\""
|
||||
(list "box" "and" "cat"))
|
||||
(list
|
||||
"indexStemmed unifies plural"
|
||||
"map fst (lookupTerm \"cat\" (indexStemmed 2 \"a cat\" (indexStemmed 1 \"the cats\" emptyIndex)))"
|
||||
(list 1 2))
|
||||
(list
|
||||
"indexStemmed stem query"
|
||||
"map fst (lookupTerm (stem \"boxes\") (indexStemmed 1 \"many boxes\" emptyIndex))"
|
||||
(list 1))))
|
||||
|
||||
(define
|
||||
stem-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) stem-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth stem-results i) (nth c 2)))
|
||||
stem-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
42
lib/search/tests/suggest.sx
Normal file
42
lib/search/tests/suggest.sx
Normal file
@@ -0,0 +1,42 @@
|
||||
;; Extension — did-you-mean / spelling suggestion.
|
||||
;; Corpus terms (sorted): ample apple apply banana orange
|
||||
|
||||
(define
|
||||
suggest-setup
|
||||
"idx = indexDoc 1 \"apple apply ample banana orange\" emptyIndex\n")
|
||||
|
||||
(define
|
||||
suggest-cases
|
||||
(list
|
||||
(list "suggest exact term" "[suggest \"apple\" idx]" (list "apple"))
|
||||
(list
|
||||
"suggest misspelled banana"
|
||||
"[suggest \"bananna\" idx]"
|
||||
(list "banana"))
|
||||
(list
|
||||
"suggest missing letter orange"
|
||||
"[suggest \"orang\" idx]"
|
||||
(list "orange"))
|
||||
(list "suggest closest apply" "[suggest \"aply\" idx]" (list "apply"))
|
||||
(list "suggestN 1 banana" "suggestN 1 \"bananna\" idx" (list "banana"))
|
||||
(list
|
||||
"suggestN 2 ties alpha"
|
||||
"suggestN 2 \"aple\" idx"
|
||||
(list "ample" "apple"))
|
||||
(list "suggest empty term shortest" "[suggest \"\" idx]" (list "ample"))
|
||||
(list "suggest empty index" "[suggest \"apple\" emptyIndex]" (list ""))
|
||||
(list "suggestN empty index" "suggestN 1 \"apple\" emptyIndex" (list))))
|
||||
|
||||
(define
|
||||
suggest-results
|
||||
(search-batch
|
||||
suggest-setup
|
||||
(map (fn (c) (nth c 1)) suggest-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth suggest-results i) (nth c 2)))
|
||||
suggest-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
53
lib/search/tests/syn.sx
Normal file
53
lib/search/tests/syn.sx
Normal file
@@ -0,0 +1,53 @@
|
||||
;; Extension — synonym / query expansion.
|
||||
;; synmap: car -> automobile, vehicle ; big -> large
|
||||
;; Corpus: 1 "fast car" 2 "shiny automobile" 3 "big truck" 4 "large house" 5 "vehicle review"
|
||||
|
||||
(define
|
||||
syn-setup
|
||||
"synmap = [(\"car\", [\"automobile\", \"vehicle\"]), (\"big\", [\"large\"])]\nidx = indexDoc 5 \"vehicle review\" (indexDoc 4 \"large house\" (indexDoc 3 \"big truck\" (indexDoc 2 \"shiny automobile\" (indexDoc 1 \"fast car\" emptyIndex))))\n")
|
||||
|
||||
(define
|
||||
syn-cases
|
||||
(list
|
||||
(list
|
||||
"expand term with synonyms"
|
||||
"expandTerm synmap \"car\""
|
||||
(list "car" "automobile" "vehicle"))
|
||||
(list
|
||||
"expand single synonym"
|
||||
"expandTerm synmap \"big\""
|
||||
(list "big" "large"))
|
||||
(list "expand unknown term" "expandTerm synmap \"banana\"" (list "banana"))
|
||||
(list
|
||||
"syn docs union"
|
||||
"synDocs synmap \"car\" idx"
|
||||
(list 1 2 5))
|
||||
(list
|
||||
"syn docs single synonym"
|
||||
"synDocs synmap \"big\" idx"
|
||||
(list 3 4))
|
||||
(list
|
||||
"syn docs no synonyms"
|
||||
"synDocs synmap \"house\" idx"
|
||||
(list 4))
|
||||
(list "syn docs absent" "synDocs synmap \"plane\" idx" (list))
|
||||
(list
|
||||
"syn rank expanded"
|
||||
"synRankTfIdf synmap \"car\" idx"
|
||||
(list 1 2 5))
|
||||
(list
|
||||
"syn rank single"
|
||||
"synRankTfIdf synmap \"big\" idx"
|
||||
(list 3 4))))
|
||||
|
||||
(define
|
||||
syn-results
|
||||
(search-batch syn-setup (map (fn (c) (nth c 1)) syn-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth syn-results i) (nth c 2)))
|
||||
syn-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
8
lib/search/tokenize.sx
Normal file
8
lib/search/tokenize.sx
Normal file
@@ -0,0 +1,8 @@
|
||||
;; search tokenizer — Haskell source fragment.
|
||||
;; normalize (lowercase + strip punctuation), split on whitespace, attach positions.
|
||||
;; tokens :: String -> [String]
|
||||
;; positioned :: String -> [(String, Int)] -- 0-based ordinal positions
|
||||
|
||||
(define
|
||||
search/tokenize-src
|
||||
"lowerChar c = chr (toLower (ord c))\nnormChar c = if isAlphaNum c then lowerChar c else ' '\nisBlankCh c = c == ' '\ndropBlanks [] = []\ndropBlanks (c:cs) = if isBlankCh c then dropBlanks cs else c:cs\ntakeWord [] = []\ntakeWord (c:cs) = if isBlankCh c then [] else c : takeWord cs\nafterWord [] = []\nafterWord (c:cs) = if isBlankCh c then c:cs else afterWord cs\nsplitWords s = let s2 = dropBlanks s in if null s2 then [] else takeWord s2 : splitWords (afterWord s2)\nappendStr a b = a ++ b\njoinChars cs = foldr appendStr \"\" cs\ntokens s = map joinChars (splitWords (map normChar s))\nposFrom i [] = []\nposFrom i (x:xs) = (x, i) : posFrom (i + 1) xs\npositioned s = posFrom 0 (tokens s)\n")
|
||||
102
plans/acl-on-sx.md
Normal file
102
plans/acl-on-sx.md
Normal file
@@ -0,0 +1,102 @@
|
||||
# acl-on-sx: Access Control on Datalog
|
||||
|
||||
rose-ash needs fine-grained, explainable, federation-aware access control. Subjects
|
||||
(users, groups, roles, services) × actions (read, edit, comment, moderate, federate)
|
||||
× resources (pages, posts, threads, peers). Decisions must come with a trace — not just
|
||||
permit/deny, but **why**.
|
||||
|
||||
Datalog's bottom-up rule engine produces transparent permit/deny chains: the proof tree
|
||||
is the audit trail. Inheritance over groups + resource hierarchies is recursive Datalog
|
||||
in one rule. Federation extends naturally — fed-sx replicates ACL facts, peers reason
|
||||
over the union.
|
||||
|
||||
End-state: a Datalog-on-SX layer specifically for ACL, with explanation API, audit log,
|
||||
and federation extension. Reuses `lib/datalog/` evaluator and term model where possible.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/acl/conformance.sh` → **0/0** (not yet started)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only touch `lib/acl/**` and `plans/acl-on-sx.md`. Do **not** edit `spec/`,
|
||||
`hosts/`, `shared/`, `lib/datalog/**`, or other `lib/<lang>/`. You may **import**
|
||||
from `lib/datalog/` (its public API in `lib/datalog/datalog.sx`); do **not** copy or
|
||||
modify Datalog code.
|
||||
- **Shared-file issues** go under "Blockers" with a minimal repro; do not fix here.
|
||||
- **SX files:** use `sx-tree` MCP tools only.
|
||||
- **Architecture:** thin layer on top of `lib/datalog/`. Define schema, surface API,
|
||||
audit + federation hooks. The rule engine itself is Datalog's.
|
||||
- **Watch for shared patterns** going into `lib/guest/` — both acl-sx and mod-sx need
|
||||
rule-engine plumbing. If you find shared shape, flag it for extraction (don't
|
||||
extract yet — wait for mod-sx to start).
|
||||
- **Commits:** one feature per commit. Keep Progress log updated and tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
ACL declarations (SX) User query
|
||||
│ │
|
||||
▼ ▼
|
||||
lib/acl/schema.sx lib/acl/api.sx
|
||||
— subject sorts — (acl/permit? subj act res)
|
||||
— resource sorts — (acl/explain subj act res)
|
||||
— action sorts — (acl/audit subj act res :allowed?)
|
||||
— fact schema │
|
||||
│ ▼
|
||||
▼ lib/acl/engine.sx
|
||||
lib/acl/facts.sx — builds Datalog query
|
||||
— actor(id, kind) — invokes lib/datalog/
|
||||
— resource(id, kind) — extracts proof tree
|
||||
— member_of(actor, group) │
|
||||
— child_of(res, parent) ▼
|
||||
— grant(actor, act, res) lib/acl/audit.sx
|
||||
— deny (actor, act, res) — persistent decision log
|
||||
— query API
|
||||
```
|
||||
|
||||
## Phase 1 — Direct grants
|
||||
|
||||
- [ ] `lib/acl/schema.sx` — sorts: subject {user, group, role, service}, action,
|
||||
resource {page, post, thread, peer}
|
||||
- [ ] `lib/acl/facts.sx` — `actor`, `resource`, `grant`, `deny` predicates as Datalog
|
||||
EDB
|
||||
- [ ] `lib/acl/engine.sx` — `(permit? subj act res db)` reduces to Datalog query
|
||||
- [ ] `lib/acl/api.sx` — public `(acl/permit? ...)` taking implicit current db
|
||||
- [ ] `lib/acl/tests/direct.sx` — 15+ cases: direct grant, missing grant, explicit deny
|
||||
- [ ] `lib/acl/scoreboard.{json,md}` baseline
|
||||
- [ ] `lib/acl/conformance.sh` runs the suite
|
||||
|
||||
## Phase 2 — Inheritance
|
||||
|
||||
- [ ] `member_of(actor, group)` chain — group grants apply to members (transitive)
|
||||
- [ ] `child_of(res, parent)` chain — parent grants apply to children (transitive)
|
||||
- [ ] role expansion — role contains list of (action, resource) tuples
|
||||
- [ ] deny-overrides — explicit deny wins over inherited allow
|
||||
- [ ] `lib/acl/tests/inherit.sx` — 25+ cases: nested groups, deep resource trees,
|
||||
conflict resolution, deny precedence
|
||||
- [ ] document the deny-overrides choice in plan
|
||||
|
||||
## Phase 3 — Explanation + audit
|
||||
|
||||
- [ ] `(acl/explain subj act res)` → `{:allowed? T :proof <tree>}`
|
||||
- [ ] proof tree extracts from Datalog's derivation
|
||||
- [ ] `lib/acl/audit.sx` — append-only decision log (in-memory + serializer for disk)
|
||||
- [ ] `(acl/audit-tail n)` for recent decisions
|
||||
- [ ] `lib/acl/tests/explain.sx` — proof correctness, audit completeness
|
||||
|
||||
## Phase 4 — Federation
|
||||
|
||||
- [ ] peer trust facts — `peer(addr, kind)`, `trust(peer, level)`
|
||||
- [ ] delegated grants — `delegate(peer, actor, action, resource)`
|
||||
- [ ] cross-instance permit chain — query asks local + queries trusted peers via fed-sx
|
||||
- [ ] revocation propagation — fact retraction across federation
|
||||
- [ ] `lib/acl/tests/fed.sx` — federated grant chains (mock fed-sx transport in tests)
|
||||
|
||||
## Progress log
|
||||
|
||||
(loop fills this in)
|
||||
|
||||
## Blockers
|
||||
|
||||
(loop fills this in)
|
||||
93
plans/agent-briefings/acl-loop.md
Normal file
93
plans/agent-briefings/acl-loop.md
Normal file
@@ -0,0 +1,93 @@
|
||||
# acl-on-sx loop agent (single agent, queue-driven)
|
||||
|
||||
Role: iterates `plans/acl-on-sx.md` forever. **First subsystem loop after fed-sx.**
|
||||
Sits on `lib/datalog/` — rule engine reused, schema/api/audit/federation added on
|
||||
top. The deliverable isn't "implement Datalog ACL"; it's *also* to surface shared
|
||||
rule-engine plumbing into `lib/guest/` (the mod-sx loop will be the second consumer,
|
||||
validating extraction).
|
||||
|
||||
```
|
||||
description: acl-on-sx queue loop
|
||||
subagent_type: general-purpose
|
||||
run_in_background: true
|
||||
isolation: worktree
|
||||
```
|
||||
|
||||
## Prompt
|
||||
|
||||
You are the sole background agent working `/root/rose-ash/plans/acl-on-sx.md`.
|
||||
Isolated worktree, forever, one commit per feature. Push to `origin/loops/acl`
|
||||
after every commit.
|
||||
|
||||
## Restart baseline — check before iterating
|
||||
|
||||
1. Read `plans/acl-on-sx.md` — roadmap + Progress log.
|
||||
2. `ls lib/acl/` — pick up from the most advanced file.
|
||||
3. If `lib/acl/tests/*.sx` exist, run them via `bash lib/acl/conformance.sh`. Green
|
||||
before new work.
|
||||
4. If `lib/acl/scoreboard.md` exists, that's your baseline.
|
||||
5. Read `lib/datalog/datalog.sx` public API once — that's your substrate.
|
||||
|
||||
## The queue
|
||||
|
||||
Phase order per `plans/acl-on-sx.md`:
|
||||
|
||||
- **Phase 1** — direct grants. Schema, EDB facts, engine, api, 15+ tests
|
||||
- **Phase 2** — inheritance (member_of, child_of, role expansion, deny-overrides)
|
||||
- **Phase 3** — explanation + audit (proof tree, audit log)
|
||||
- **Phase 4** — federation (peer trust, delegation, cross-instance permit chain)
|
||||
|
||||
Within a phase, pick the checkbox that unlocks the most tests per effort.
|
||||
|
||||
Every iteration: implement → test → commit → tick `[ ]` → Progress log → next.
|
||||
|
||||
## Ground rules (hard)
|
||||
|
||||
- **Scope:** only `lib/acl/**` and `plans/acl-on-sx.md`. Do **not** edit `spec/`,
|
||||
`hosts/`, `shared/`, other `lib/<lang>/` dirs, `lib/stdlib.sx`, or `lib/` root.
|
||||
May **import** from `lib/datalog/` only (its public API).
|
||||
- **NEVER call `sx_build`.** 600s watchdog. If sx_server binary broken → Blockers
|
||||
entry, stop.
|
||||
- **Shared-file issues** → plan's Blockers with minimal repro.
|
||||
- **SX files:** `sx-tree` MCP tools ONLY. `sx_validate` after edits.
|
||||
- **Worktree:** commit, then push to `origin/loops/acl`. Never touch `main` or
|
||||
`architecture`.
|
||||
- **Commit granularity:** one feature per commit. Short factual messages
|
||||
(`acl: child_of resource inheritance + 8 tests`).
|
||||
- **Plan file:** update Progress log + tick boxes every commit.
|
||||
- **Watch for shared infrastructure** with future mod-sx (Prolog moderation). If you
|
||||
build a generic rule-engine adapter, note it in Progress log so the eventual
|
||||
`lib/guest/rules/` extraction has both consumers identified.
|
||||
|
||||
## ACL-specific gotchas
|
||||
|
||||
- **Datalog is bottom-up.** No goal-directed search. Don't reach for cut or
|
||||
backtracking — that's mod-sx's job. Your decisions emerge from fixpoint.
|
||||
- **Deny-overrides** is the policy: if both an allow and deny rule fire, deny wins.
|
||||
Encode this via stratified negation; document the choice clearly in plan.
|
||||
- **Inheritance termination:** recursive rules with `member_of` chains must
|
||||
terminate. Datalog guarantees this absent function symbols — don't introduce them
|
||||
in your schema.
|
||||
- **Proof tree shape:** Datalog's derivation graph is a DAG, not a tree, when the
|
||||
same fact is derived multiple ways. For audit, pick one canonical derivation
|
||||
(shortest, or first); document choice.
|
||||
- **Federation isn't transitive trust.** A peer's `delegate(...)` fact only applies
|
||||
if local `trust(peer, level)` covers the action class. Re-check trust on every
|
||||
query, not at fact-ingestion time.
|
||||
|
||||
## General gotchas (all loops)
|
||||
|
||||
- SX `do` = R7RS iteration. Use `begin` for multi-expr sequences.
|
||||
- `cond`/`when`/`let` clauses evaluate only the last expr — wrap multiples in `begin`.
|
||||
- `env-bind!` creates a binding; `env-set!` mutates an existing one (walks scope chain).
|
||||
- `sx_validate` after every structural edit.
|
||||
- `list?` returns false on raw JS Arrays — host data must be SX-converted.
|
||||
|
||||
## Style
|
||||
|
||||
- No comments in `.sx` unless non-obvious.
|
||||
- No new planning docs — update `plans/acl-on-sx.md` inline.
|
||||
- Short, factual commit messages.
|
||||
- One feature per iteration. Commit. Log. Push. Next.
|
||||
|
||||
Go. Start by reading the plan; find the first unchecked `[ ]`; implement it.
|
||||
99
plans/agent-briefings/feed-loop.md
Normal file
99
plans/agent-briefings/feed-loop.md
Normal file
@@ -0,0 +1,99 @@
|
||||
# feed-on-sx loop agent (single agent, queue-driven)
|
||||
|
||||
Role: iterates `plans/feed-on-sx.md` forever. **Activity feeds on APL** — timelines,
|
||||
notifications, fanout, ranking, all as APL array math on activity vectors. Densest
|
||||
possible expression of feed composition. Sits on `lib/apl/` (450+/450+ tests
|
||||
already); adds a feed-shaped vocabulary on top.
|
||||
|
||||
```
|
||||
description: feed-on-sx queue loop
|
||||
subagent_type: general-purpose
|
||||
run_in_background: true
|
||||
isolation: worktree
|
||||
```
|
||||
|
||||
## Prompt
|
||||
|
||||
You are the sole background agent working `/root/rose-ash/plans/feed-on-sx.md`.
|
||||
Isolated worktree, forever, one commit per feature. Push to `origin/loops/feed`
|
||||
after every commit.
|
||||
|
||||
## Restart baseline — check before iterating
|
||||
|
||||
1. Read `plans/feed-on-sx.md` — roadmap + Progress log.
|
||||
2. `ls lib/feed/` — pick up from the most advanced file.
|
||||
3. If `lib/feed/tests/*.sx` exist, run them via `bash lib/feed/conformance.sh`. Green
|
||||
before new work.
|
||||
4. If `lib/feed/scoreboard.md` exists, that's your baseline.
|
||||
5. Read `lib/apl/apl.sx` public API once — that's your substrate. Familiarize
|
||||
yourself with at least: `⍳ ⍴ / ⌽ ↑ ↓ ⌷ ∊ ∘.× /\ ⍋` (you will use all of these).
|
||||
|
||||
## The queue
|
||||
|
||||
Phase order per `plans/feed-on-sx.md`:
|
||||
|
||||
- **Phase 1** — stream model + basic ops (record schema, filter, sort, take)
|
||||
- **Phase 2** — **THE SHOWCASE**: fanout via outer product. activities `∘.×`
|
||||
followers → inbox matrix, flatten + dedupe
|
||||
- **Phase 3** — aggregation + ranking (group-by, velocity, recency, top-N)
|
||||
- **Phase 4** — visibility filter (acl-sx) + federation (fed-sx inbox + backfill)
|
||||
|
||||
Within a phase, pick the checkbox that unlocks the most tests per effort.
|
||||
|
||||
Every iteration: implement → test → commit → tick `[ ]` → Progress log → next.
|
||||
|
||||
## Ground rules (hard)
|
||||
|
||||
- **Scope:** only `lib/feed/**` and `plans/feed-on-sx.md`. Do **not** edit `spec/`,
|
||||
`hosts/`, `shared/`, other `lib/<lang>/` dirs, `lib/stdlib.sx`, or `lib/` root.
|
||||
May **import** from `lib/apl/` only (its public API).
|
||||
- **NEVER call `sx_build`.** 600s watchdog. If sx_server binary broken → Blockers
|
||||
entry, stop.
|
||||
- **Shared-file issues** → plan's Blockers with minimal repro.
|
||||
- **SX files:** `sx-tree` MCP tools ONLY. `sx_validate` after edits.
|
||||
- **Unicode in `.sx`:** raw UTF-8 only, never `\uXXXX` escapes. APL glyphs land
|
||||
directly in source.
|
||||
- **Worktree:** commit, then push to `origin/loops/feed`. Never touch `main` or
|
||||
`architecture`.
|
||||
- **Commit granularity:** one feature per commit. Short factual messages
|
||||
(`feed: outer-product fanout + dedupe by (actor,verb,object) + 9 tests`).
|
||||
- **Plan file:** update Progress log + tick boxes every commit.
|
||||
|
||||
## feed-specific gotchas
|
||||
|
||||
- **Activities are heterogeneous.** Different verbs carry different shapes
|
||||
(`:object` might be page-id, post-id, user-id). Don't over-normalize — keep
|
||||
`:tags` as a flexible bag. APL operations over heterogeneous records work fine
|
||||
via dict lookups; only the indexed fields need uniform shape.
|
||||
- **Fanout produces matrices fast.** N activities × M followers → NM items. Apply
|
||||
filter/dedupe early, not after materialization. Use guard predicates *inside*
|
||||
the outer product where possible (compose with `∘.{a v ⊢ ...}`).
|
||||
- **Dedupe key isn't always `(actor,verb,object)`.** For "alice liked X" and "bob
|
||||
liked X" the dedupe key is `(verb,object)` (collapse the actors into a list).
|
||||
For "alice posted X" each `:actor` is distinct. Each verb may want its own
|
||||
dedupe rule; codify these in `lib/feed/dedupe.sx`.
|
||||
- **Recency decay matters more than score precision.** Use a simple half-life decay
|
||||
(e.g. score × 0.5^(age/window)) rather than a clever curve. Calibrate the
|
||||
window via tests, not theory.
|
||||
- **Ranking should be deterministic on ties.** Always include a tiebreaker (id, or
|
||||
hash). Otherwise tests will flake.
|
||||
- **The ACL filter is per-viewer.** A timeline is computed *for* a user; the same
|
||||
candidate stream produces different timelines for different viewers. Don't
|
||||
cache pre-ACL timelines.
|
||||
|
||||
## General gotchas (all loops)
|
||||
|
||||
- SX `do` = R7RS iteration. Use `begin` for multi-expr sequences.
|
||||
- `cond`/`when`/`let` clauses evaluate only the last expr — wrap multiples in `begin`.
|
||||
- `env-bind!` creates a binding; `env-set!` mutates an existing one (walks scope chain).
|
||||
- `sx_validate` after every structural edit.
|
||||
- `list?` returns false on raw JS Arrays — host data must be SX-converted.
|
||||
|
||||
## Style
|
||||
|
||||
- No comments in `.sx` unless non-obvious.
|
||||
- No new planning docs — update `plans/feed-on-sx.md` inline.
|
||||
- Short, factual commit messages.
|
||||
- One feature per iteration. Commit. Log. Push. Next.
|
||||
|
||||
Go. Start by reading the plan; find the first unchecked `[ ]`; implement it.
|
||||
98
plans/agent-briefings/flow-loop.md
Normal file
98
plans/agent-briefings/flow-loop.md
Normal file
@@ -0,0 +1,98 @@
|
||||
# flow-on-sx loop agent (single agent, queue-driven)
|
||||
|
||||
Role: iterates `plans/flow-on-sx.md` forever. **Durable workflows on Scheme** — the
|
||||
call/cc + delimited continuation showcase that justifies pulling R7RS into
|
||||
production. art-dag's natural successor: DAG-of-tasks with pause/resume across
|
||||
process restarts. fed-sx extension turns local flows into distributed ones.
|
||||
|
||||
```
|
||||
description: flow-on-sx queue loop
|
||||
subagent_type: general-purpose
|
||||
run_in_background: true
|
||||
isolation: worktree
|
||||
```
|
||||
|
||||
## Prompt
|
||||
|
||||
You are the sole background agent working `/root/rose-ash/plans/flow-on-sx.md`.
|
||||
Isolated worktree, forever, one commit per feature. Push to `origin/loops/flow`
|
||||
after every commit.
|
||||
|
||||
## Restart baseline — check before iterating
|
||||
|
||||
1. Read `plans/flow-on-sx.md` — roadmap + Progress log.
|
||||
2. `ls lib/flow/` — pick up from the most advanced file.
|
||||
3. If `lib/flow/tests/*.sx` exist, run them via `bash lib/flow/conformance.sh`. Green
|
||||
before new work.
|
||||
4. If `lib/flow/scoreboard.md` exists, that's your baseline.
|
||||
5. Read `lib/scheme/scheme.sx` public API once — that's your substrate.
|
||||
|
||||
## The queue
|
||||
|
||||
Phase order per `plans/flow-on-sx.md`:
|
||||
|
||||
- **Phase 1** — declarative DAG: `defflow`, `sequence`, `parallel`, sync runtime,
|
||||
basic api
|
||||
- **Phase 2** — control flow + error handling: `cond`, `retry`, `timeout`,
|
||||
`try-catch`
|
||||
- **Phase 3** — **THE SHOWCASE**: `suspend`/`resume` via `call/cc`, persistent
|
||||
store, crash recovery
|
||||
- **Phase 4** — distributed nodes via fed-sx (remote-node, handoff, replication)
|
||||
|
||||
Within a phase, pick the checkbox that unlocks the most tests per effort.
|
||||
|
||||
Every iteration: implement → test → commit → tick `[ ]` → Progress log → next.
|
||||
|
||||
## Ground rules (hard)
|
||||
|
||||
- **Scope:** only `lib/flow/**` and `plans/flow-on-sx.md`. Do **not** edit `spec/`,
|
||||
`hosts/`, `shared/`, other `lib/<lang>/` dirs, `lib/stdlib.sx`, or `lib/` root.
|
||||
May **import** from `lib/scheme/` only (its public API).
|
||||
- **NEVER call `sx_build`.** 600s watchdog. If sx_server binary broken → Blockers
|
||||
entry, stop.
|
||||
- **Shared-file issues** → plan's Blockers with minimal repro.
|
||||
- **SX files:** `sx-tree` MCP tools ONLY. `sx_validate` after edits.
|
||||
- **Worktree:** commit, then push to `origin/loops/flow`. Never touch `main` or
|
||||
`architecture`.
|
||||
- **Commit granularity:** one feature per commit. Short factual messages
|
||||
(`flow: retry combinator with exponential backoff + 6 tests`).
|
||||
- **Plan file:** update Progress log + tick boxes every commit.
|
||||
|
||||
## flow-specific gotchas
|
||||
|
||||
- **Continuations must be re-entrant.** Phase 3's `suspend` captures a continuation
|
||||
that may be re-entered after a process restart. That means: no captured file
|
||||
descriptors, no captured sockets, no captured live runtime references that won't
|
||||
survive serialization. State referenced by the continuation must be plain SX data
|
||||
or live in the flow store.
|
||||
- **call/cc, not call-with-escape-continuation.** R7RS distinguishes. Use the full
|
||||
call/cc for resume; escape-only continuations cannot be re-entered. Read
|
||||
`lib/scheme/r7rs.md` (or equivalent) to confirm semantics.
|
||||
- **`parallel` in Phase 1 is sequential.** Don't try threading until Phase 3+. Just
|
||||
evaluate branches in order, collect results, return joined value. Document the
|
||||
semantics clearly so users don't assume true concurrency.
|
||||
- **Retry doesn't retry continuations.** If a node has already suspended, retry on
|
||||
resume doesn't re-run it from scratch — it resumes. `retry` only applies to
|
||||
exceptions raised before suspend. Be explicit in the API.
|
||||
- **Cancellation invalidates the continuation.** `(flow/cancel id)` must remove the
|
||||
stored continuation so a stale `resume` cannot wake it. Document semantics.
|
||||
- **Timeouts in pure SX are tricky.** Without a scheduler, `timeout` is a budget on
|
||||
step count or wall-clock probed at safe points. Pick one approach (probably step
|
||||
budget for determinism) and document.
|
||||
|
||||
## General gotchas (all loops)
|
||||
|
||||
- SX `do` = R7RS iteration. Use `begin` for multi-expr sequences.
|
||||
- `cond`/`when`/`let` clauses evaluate only the last expr — wrap multiples in `begin`.
|
||||
- `env-bind!` creates a binding; `env-set!` mutates an existing one (walks scope chain).
|
||||
- `sx_validate` after every structural edit.
|
||||
- `list?` returns false on raw JS Arrays — host data must be SX-converted.
|
||||
|
||||
## Style
|
||||
|
||||
- No comments in `.sx` unless non-obvious.
|
||||
- No new planning docs — update `plans/flow-on-sx.md` inline.
|
||||
- Short, factual commit messages.
|
||||
- One feature per iteration. Commit. Log. Push. Next.
|
||||
|
||||
Go. Start by reading the plan; find the first unchecked `[ ]`; implement it.
|
||||
106
plans/agent-briefings/kernel-loop.md
Normal file
106
plans/agent-briefings/kernel-loop.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# kernel-on-sx loop agent (single agent, queue-driven)
|
||||
|
||||
Role: iterates `plans/kernel-on-sx.md` forever. **First chisel of the Phase B stratification work** — natural successor to env-as-value, validates SX's reflection story (first-class environments, evaluators, operatives). Goal isn't just "implement Kernel"; it's *also* to surface common patterns into `lib/guest/` (specifically motivating a future `lib/guest/reflective/` sub-layer). One feature per commit.
|
||||
|
||||
```
|
||||
description: kernel-on-sx queue loop
|
||||
subagent_type: general-purpose
|
||||
run_in_background: true
|
||||
isolation: worktree
|
||||
```
|
||||
|
||||
## DO NOT START WITHOUT THE PREREQUISITES
|
||||
|
||||
This loop **must not** start until the lib-guest core kits are in place. Kernel's parser consumes `lib/guest/core/lex.sx` and `lib/guest/core/pratt.sx` (s-expression-shaped, minimal demand); its evaluator's pattern dispatch consumes `lib/guest/core/match.sx`.
|
||||
|
||||
**Pre-flight check:**
|
||||
```
|
||||
ls /root/rose-ash/lib/guest/lex.sx /root/rose-ash/lib/guest/pratt.sx \
|
||||
/root/rose-ash/lib/guest/match.sx /root/rose-ash/lib/guest/ast.sx
|
||||
```
|
||||
If any of those `lib/guest/*.sx` files are missing, **stop and report**. Do not start.
|
||||
|
||||
## Prompt
|
||||
|
||||
You are the sole background agent working `/root/rose-ash/plans/kernel-on-sx.md`. You run in an isolated git worktree on branch `loops/kernel`. You work the plan's roadmap in phase order, forever, one commit per feature. Push to `origin/loops/kernel` after every commit.
|
||||
|
||||
## Restart baseline — check before iterating
|
||||
|
||||
1. Read `plans/kernel-on-sx.md` — Roadmap + Progress log + Blockers tell you where you are.
|
||||
2. Run the pre-flight check above. If any lib/guest kit is missing, stop immediately and update the plan's Blockers section.
|
||||
3. `ls lib/kernel/` — pick up from the most advanced file that exists. If the directory does not exist, you are at Phase 1.
|
||||
4. If `lib/kernel/tests/*.sx` exist, run them via the epoch protocol against `sx_server.exe`. They must be green before new work.
|
||||
|
||||
## The queue
|
||||
|
||||
Phase order per `plans/kernel-on-sx.md`:
|
||||
|
||||
- **Phase 1** — Parser (s-expression reader, minimal — consumes `lib/guest/lex` + `lib/guest/pratt`)
|
||||
- **Phase 2** — Core evaluator with first-class environments
|
||||
- **Phase 3** — `$vau` / `$lambda` / `wrap` / `unwrap` (the operative–applicative distinction)
|
||||
- **Phase 4** — Standard environment construction
|
||||
- **Phase 5** — Encapsulations (Kernel's opaque-type idiom)
|
||||
- **Phase 6** — Hygienic operatives (Shutt's later work — operatives that don't capture)
|
||||
- **Phase 7** — Propose `lib/guest/reflective/` (extraction phase — see chiselling discipline)
|
||||
|
||||
Within a phase, pick the checkbox with the best tests-per-effort ratio.
|
||||
|
||||
Every iteration: implement → test → commit → tick `[ ]` in plan → append Progress log → push → next.
|
||||
|
||||
## Lib/guest chiselling discipline (the defining feature of this loop)
|
||||
|
||||
You are not just implementing Kernel — you are *chiselling* the substrate to surface what `lib/guest/reflective/` should contain. Every commit must end with a one-line **"chisel note"** appended to the plan's Progress log entry, in this format:
|
||||
|
||||
```
|
||||
chisel: <one of: consumes-X | shapes-reflective | proposes-Y | nothing>
|
||||
```
|
||||
|
||||
- `consumes-X` — this commit used an existing `lib/guest/X` kit (e.g., `consumes-pratt`, `consumes-match`).
|
||||
- `shapes-reflective` — this commit revealed something about what `lib/guest/reflective/` should look like (e.g., env-reification helper signatures, applicative-vs-operative dispatch protocol). Add a paragraph to the plan's "lib/guest feedback loop" section describing the insight.
|
||||
- `proposes-Y` — this commit revealed a gap in another existing kit (e.g., `match.sx` doesn't quite handle X). Open a Blockers entry describing the gap.
|
||||
- `nothing` — pure Kernel work that didn't touch the substrate or lib/guest story (rare; if you write this twice in a row, stop and reflect on why).
|
||||
|
||||
**Phase 7 (extraction)** is **gated** by the two-consumer rule. Kernel alone is one consumer. The natural second consumer is a future MetaScheme port, a Common-Lisp meta-evaluator port, or a Kernel dialect (cKanren-style). **Until a second consumer exists, do NOT actually extract** — instead, mark Phase 7 `[partial — pending second consumer]` and document the proposed `lib/guest/reflective/` API surface in the plan's progress log. The extraction itself happens later, when a second consumer materialises.
|
||||
|
||||
This discipline is the point of the loop, not a bookkeeping tax. The chisel notes are what tell us — at the end of Kernel's run — whether a `lib/guest/reflective/` sub-layer is real or just one-language-shaped.
|
||||
|
||||
## Ground rules (hard)
|
||||
|
||||
- **Scope:** only `lib/kernel/**` and `plans/kernel-on-sx.md`. Do **not** edit `spec/`, `hosts/`, `shared/`, `lib/guest/**` (read-only consumer at this phase), or other `lib/<lang>/`.
|
||||
- **Consume `lib/guest/core/`** wherever it covers a need. Hand-rolling defeats the chiselling goal.
|
||||
- **Do not extract into `lib/guest/reflective/` from this loop.** That's Phase 7 territory, gated by the two-consumer rule. Until there's a second consumer, document the API surface only.
|
||||
- **Substrate gaps** (env-as-value not exposing X, `eval` semantics drift, JIT not handling reflective patterns) → Blockers entry with minimal repro. Do **not** fix substrate from this loop. Substrate work belongs to `sx-improvements.md` / `jit-perf-regression.md`.
|
||||
- **NEVER call `sx_build`.** 600s watchdog will kill you. If `sx_server.exe` is broken, add a Blockers entry and stop.
|
||||
- **SX files:** `sx-tree` MCP tools ONLY. `sx_validate` after every edit. Never `Edit`/`Read`/`Write` on `.sx`.
|
||||
- **Worktree:** commit, then push to `origin/loops/kernel`. Never touch `main`. Never push to `architecture`.
|
||||
- **Commit granularity:** one feature per commit. Short factual messages: `kernel: $vau operative + 6 tests`.
|
||||
- **Plan file:** update Progress log + tick boxes every commit. Include the chisel note.
|
||||
- **If blocked** for two iterations on the same issue, add to Blockers and move on.
|
||||
|
||||
## Kernel-specific gotchas
|
||||
|
||||
- **Operatives don't evaluate their arguments.** `$vau` builds an operative; the body sees the *unevaluated* argument expressions plus the dynamic environment. This is the opposite of every other guest in the set. `(define-via-vau)` builds a binding by calling `eval` inside the body on the (still-syntax) argument.
|
||||
- **Applicatives wrap operatives.** `(wrap op)` produces an applicative that evaluates its args first, then calls `op` with the values. `$lambda` is sugar for `wrap` ∘ `$vau`.
|
||||
- **Dynamic vs static environments.** Operative body sees both: the static env where the `$vau` was created (closure-style), AND the dynamic env where the call happens (passed as the env-param). Different from lexical-only languages.
|
||||
- **No special forms in the evaluator.** `$if`, `$define!`, `$lambda` are all just operatives bound in the standard environment. The evaluator is `lookup-and-call` — no hardcoded switch on symbols. This is the whole point: the language is reified as data.
|
||||
- **`eval` is a primitive callable on user environments.** This is where SX's env-as-value matters most. If env-as-value isn't fully landed in the substrate, this is where it'll break.
|
||||
- **Encapsulations (Phase 5) are Kernel's opaque-types idiom.** `make-encapsulation-type` returns three operatives: encapsulator (constructs), predicate (tests), decapsulator (extracts). Used to define promises, streams, modules.
|
||||
- **Hygienic operatives (Phase 6) are research-grade.** Shutt's later work. Operatives that don't accidentally capture caller bindings. Likely uses scope sets / frame stamps. Treat as exploration, not implementation-deadline.
|
||||
|
||||
## General gotchas (all loops)
|
||||
|
||||
- SX `do` = R7RS iteration. Use `begin` for multi-expr sequences.
|
||||
- `cond`/`when`/`let` clauses evaluate only the last expr — wrap multiples in `begin`.
|
||||
- `env-bind!` creates a binding; `env-set!` mutates an existing one (walks scope chain).
|
||||
- `sx_validate` after every structural edit.
|
||||
- `list?` returns false on raw JS Arrays — host data must be SX-converted.
|
||||
- Shell heredoc `||` gets eaten — escape or use `case`.
|
||||
|
||||
## Style
|
||||
|
||||
- No comments in `.sx` unless non-obvious.
|
||||
- No new planning docs — update `plans/kernel-on-sx.md` inline.
|
||||
- Short, factual commit messages with chisel note: `kernel: $vau operative + 6 tests [shapes-reflective]`.
|
||||
- One feature per iteration. Commit. Log. Push. Next.
|
||||
|
||||
Go. Run the pre-flight check. If lib/guest kits are missing, stop. Otherwise read the plan, find the first unchecked `[ ]`, implement it. Remember: every commit ends with a chisel note, and Phase 7 extraction waits for a second consumer.
|
||||
110
plans/agent-briefings/search-loop.md
Normal file
110
plans/agent-briefings/search-loop.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# search-on-sx loop agent (single agent, queue-driven)
|
||||
|
||||
Role: iterates `plans/search-on-sx.md` forever. **Full-text + structured search on
|
||||
Haskell** — tokenize, inverted index, query AST, boolean + phrase + ranked
|
||||
queries (TF-IDF / BM25), ACL-aware post-filter, federated index merge. Typed ADTs
|
||||
make query parsing clean; lazy lists make posting-list iteration efficient. Sits on
|
||||
`lib/haskell/` (1514/1514 already green); adds a search-shaped vocabulary on top.
|
||||
|
||||
```
|
||||
description: search-on-sx queue loop
|
||||
subagent_type: general-purpose
|
||||
run_in_background: true
|
||||
isolation: worktree
|
||||
```
|
||||
|
||||
## Prompt
|
||||
|
||||
You are the sole background agent working `plans/search-on-sx.md`. Isolated
|
||||
worktree `/root/rose-ash-loops/search` on branch `loops/search`, forever, one
|
||||
commit per feature. Push to `origin/loops/search` after every commit. Never touch
|
||||
`main` or `architecture`.
|
||||
|
||||
## Restart baseline — check before iterating
|
||||
|
||||
1. Read `plans/search-on-sx.md` — roadmap + Progress log.
|
||||
2. `ls lib/search/` — pick up from the most advanced file.
|
||||
3. If `lib/search/tests/*.sx` exist, run them via `bash lib/search/conformance.sh`.
|
||||
Green before new work.
|
||||
4. If `lib/search/scoreboard.md` exists, that's your baseline.
|
||||
5. Read the `lib/haskell/` public API once — that's your substrate. `lib/haskell/
|
||||
haskell.sx` exists; also study `runtime.sx`, `eval.sx`, `parser.sx`, `infer.sx`,
|
||||
`match.sx`, `map.sx`, `set.sx`, `testlib.sx`. Learn how to declare ADTs, pattern
|
||||
match, and use the `Map`/`Set` helpers before writing index code. Verify the real
|
||||
exported names with sx_find_all / grep — don't assume from the plan's sketch.
|
||||
|
||||
## The queue
|
||||
|
||||
Phase order per `plans/search-on-sx.md`:
|
||||
|
||||
- **Phase 1** — tokenize + inverted index + simple term lookup
|
||||
(`Map Term [(DocId,[Pos])]`, insert/lookup, `(search/index doc)`,
|
||||
`(search/query term)`).
|
||||
- **Phase 2** — query AST + boolean/phrase eval (Term | And | Or | Not | Phrase;
|
||||
posting-list set ops; positional phrase match).
|
||||
- **Phase 3** — ranking (TF-IDF, BM25), top-N.
|
||||
- **Phase 4** — ACL-aware post-filter + federation (merge per-peer indices).
|
||||
|
||||
Within a phase, pick the checkbox that unlocks the most tests per effort.
|
||||
|
||||
Every iteration: implement → test → commit → tick `[ ]` → Progress log → next.
|
||||
|
||||
## Ground rules (hard)
|
||||
|
||||
- **Scope:** only `lib/search/**` and `plans/search-on-sx.md`. Do **not** edit
|
||||
`spec/`, `hosts/`, `shared/`, other `lib/<lang>/` dirs, `lib/stdlib.sx`, or
|
||||
`lib/` root. May **import** from `lib/haskell/` only (its public API). Do **not**
|
||||
modify Haskell.
|
||||
- **NEVER call `sx_build`.** 600s watchdog. If the sx_server binary is broken →
|
||||
Blockers entry, stop. Run tests by invoking the sx_server binary directly from a
|
||||
conformance.sh (model it on `lib/haskell/conformance.sh`), pointing `SX_SERVER`
|
||||
at `/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe` — fresh
|
||||
worktrees have no `_build/`, so the relative path won't resolve.
|
||||
- **Shared-file issues** → plan's Blockers with minimal repro; don't fix here.
|
||||
- **SX files:** `sx-tree` MCP tools ONLY. **They take `file:` not `path:`** — a
|
||||
wrong key yields `Yojson Type_error("Expected string, got null")`, which looks
|
||||
like a broken binary but is just a param mismatch. `sx_validate` after edits.
|
||||
Path-based edits (`sx_replace_node`) count comment headers in their indices and
|
||||
can clobber the wrong node — re-read after, or prefer `sx_write_file` for small
|
||||
files.
|
||||
- **Unicode in `.sx`:** raw UTF-8 only, never `\uXXXX` escapes.
|
||||
- **Commit granularity:** one feature per commit. Short factual messages
|
||||
(`search: phrase query positional match + 7 tests`). Push to `origin/loops/search`.
|
||||
- **Plan file:** update Progress log (newest first) + tick boxes every commit.
|
||||
|
||||
## search-specific gotchas
|
||||
|
||||
- **Posting lists are the hot path.** Keep them sorted by DocId so boolean AND/OR
|
||||
are linear merges, not nested scans. Phrase match needs positions, so store
|
||||
`(DocId, [Pos])` — don't drop positions early to save space; you can't recover them.
|
||||
- **Tokenization decides recall.** Normalize consistently (lowercase, strip
|
||||
punctuation) on BOTH index and query side, or queries silently miss. Test the
|
||||
index/query symmetry explicitly.
|
||||
- **Ranking must be deterministic on ties.** TF-IDF/BM25 scores collide; always
|
||||
add a stable tiebreak (DocId ascending) or tests flake.
|
||||
- **ACL filter is per-viewer and post-ranking.** Filter the result list against the
|
||||
viewer, after scoring — never bake visibility into the index (the same index
|
||||
serves all viewers). Inject the permit predicate; don't hardwire an ACL module
|
||||
that doesn't exist yet.
|
||||
- **Federation merges indices, not results.** Merging per-peer inverted indices
|
||||
(union posting lists per term) is cleaner and rank-correct vs merging ranked
|
||||
result lists. Mock peer indices in tests.
|
||||
|
||||
## General gotchas (all loops)
|
||||
|
||||
- SX `do` = R7RS iteration. Use `begin` for multi-expr sequences.
|
||||
- `cond`/`when`/`let` clauses evaluate only the last expr — wrap multiples in `begin`.
|
||||
- `let` is parallel, not sequential — nest `let`s when a binding references an earlier one.
|
||||
- `env-bind!` creates a binding; `env-set!` mutates an existing one (walks scope chain).
|
||||
- `sx_validate` after every structural edit.
|
||||
- Namespace-prefix all guest helpers (`search/...`) — short/host-colliding names
|
||||
get silently shadowed or hang the runtime.
|
||||
|
||||
## Style
|
||||
|
||||
- No comments in `.sx` unless non-obvious.
|
||||
- No new planning docs — update `plans/search-on-sx.md` inline.
|
||||
- Short, factual commit messages.
|
||||
- One feature per iteration. Commit. Log. Push. Next.
|
||||
|
||||
Go. Start by reading the plan; find the first unchecked `[ ]`; implement it.
|
||||
176
plans/feed-on-sx.md
Normal file
176
plans/feed-on-sx.md
Normal file
@@ -0,0 +1,176 @@
|
||||
# feed-on-sx: Activity Feeds on APL
|
||||
|
||||
Timelines, notifications, activity aggregation. The math is array math: filter, sort,
|
||||
reduce, scan, outer product. APL is the densest possible expression of feed
|
||||
composition — a fanout-and-rank pipeline reads as a single line.
|
||||
|
||||
rose-ash needs: per-user home timeline, notification feed, activity stream digestion,
|
||||
backfill for new follows, deduplication across cross-posts. Every operation is an
|
||||
array-shaped transformation.
|
||||
|
||||
End-state: an APL-flavored layer on `lib/apl/` with feed-specific combinators
|
||||
(`fanout`, `dedupe`, `score`, `rank`), an SX adapter for callers who don't want raw
|
||||
APL, ACL visibility filtering via `lib/acl/`, federation via fed-sx.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/feed/conformance.sh` → **189/189** (Phases 1–4 + TF-IDF, notifications, home, smart-dedupe, trending, mute, pagination, threading)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only touch `lib/feed/**` and `plans/feed-on-sx.md`. Do **not** edit
|
||||
`spec/`, `hosts/`, `shared/`, `lib/apl/**`, or other `lib/<lang>/`. You may
|
||||
**import** from `lib/apl/` (public API in `lib/apl/apl.sx`); do **not** modify APL.
|
||||
- **Shared-file issues** go under "Blockers" with a minimal repro; do not fix here.
|
||||
- **SX files:** use `sx-tree` MCP tools only.
|
||||
- **Architecture:** an activity is a small dict (`{:actor :verb :object :at :tags}`); a
|
||||
stream is an APL vector of such dicts. Operations are APL primitives lifted onto
|
||||
this shape. SX adapter exposes ergonomic API to non-APL callers.
|
||||
- **Unicode:** raw UTF-8 in `.sx` files. APL glyphs land directly.
|
||||
- **Commits:** one feature per commit. Keep Progress log updated and tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
Raw activities (any shape) Per-user view
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/feed/normalize.sx lib/feed/timeline.sx
|
||||
— {:actor :verb :object — (timeline user)
|
||||
:at :tags} record — applies filter ∘ rank ∘ take
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/feed/stream.sx lib/feed/rank.sx
|
||||
— APL vector of activities — velocity, recency
|
||||
— filter, sort, take — TF-IDF-ish over :tags
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/feed/fanout.sx lib/feed/dedupe.sx
|
||||
— followers vector — group by :object
|
||||
— activities ∘.× followers — collapse cross-posts
|
||||
— flatten + dedupe
|
||||
│
|
||||
▼
|
||||
lib/feed/api.sx lib/feed/fed.sx
|
||||
— (feed/post activity) — inbox via fed-sx
|
||||
— (feed/timeline user) — backfill on subscribe
|
||||
— (feed/notify user)
|
||||
```
|
||||
|
||||
## Phase 1 — Stream model + basic ops
|
||||
|
||||
- [x] `lib/feed/normalize.sx` — activity record schema; coerce arbitrary inputs
|
||||
- [x] `lib/feed/stream.sx` — APL vector representation; filter by predicate; sort by
|
||||
`:at`; take N (`↑`); reverse (`⌽`)
|
||||
- [x] `lib/feed/api.sx` — `(feed/post activity)`, `(feed/all)`
|
||||
- [x] `lib/feed/tests/basic.sx` — 30 cases: normalize defaults, filter, sort, take, api
|
||||
- [x] `lib/feed/scoreboard.{json,md}`
|
||||
- [x] `lib/feed/conformance.sh`
|
||||
|
||||
## Phase 2 — Fanout via outer product
|
||||
|
||||
- [x] follower graph: `followers user → vector of user ids` (`feed/follow-graph`,
|
||||
`feed/followers`; graph = `{followee -> (followers)}` dict)
|
||||
- [x] fanout: activities `∘.×` audience → matrix via `apl-outer feed/-mk-event`
|
||||
- [x] flatten to inbox events vector (`feed/-flatten` rank-2 → rank-1)
|
||||
- [x] dedupe — `feed/dedupe-inbox` by `(to, actor, verb, object)`; also
|
||||
`feed/dedupe-activities` `(actor verb object)` and `feed/dedupe-collapse`
|
||||
`(verb object)` for cross-actor likes
|
||||
- [x] `lib/feed/tests/fanout.sx` — 29 cases: small graph, mutual follow, star
|
||||
(high-fanout), empty graph, unfollowed actor, cross-post dedupe
|
||||
|
||||
## Phase 3 — Aggregation + ranking
|
||||
|
||||
- [x] group-by — `feed/group-by`/`feed/group-count` key-reduce; `feed/by-actor-day`
|
||||
buckets `(actor, day)` via `feed/day` (string-joined keys)
|
||||
- [x] velocity score — `feed/velocity` counts actor's activities in `(at-window, at]`
|
||||
- [x] recency score — `feed/recency` half-life decay `0.5^(age/hl)`
|
||||
- [x] composite rank — `feed/composite` weighted sum of `(weight scorer)` parts
|
||||
- [x] top-N per timeline — `feed/top` = rank then take
|
||||
- [x] `lib/feed/tests/rank.sx` — 24 cases: decay shape, velocity burst, stable
|
||||
tie-break, top-N, composite
|
||||
|
||||
## Phase 4 — Visibility filter + federation
|
||||
|
||||
`lib/acl/` and fed-sx don't exist yet and are out of scope (import `lib/apl/`
|
||||
only), so ACL/transport are injected: `permit?`, `remote?`, `send-fn`, `fetch-fn`
|
||||
are function parameters. Real acl-sx / fed-sx wire in at the call site unchanged.
|
||||
|
||||
- [x] ACL filter — `feed/visible stream viewer permit?`; default `feed/permit-acl?`
|
||||
reads `:visible-to` allowlist (+ author-sees-own); per-viewer, never cached
|
||||
- [x] fed-sx outbound — `feed/federate`/`feed/deliver` fan out then partition
|
||||
local vs remote inboxes; remote events handed to injected `send-fn`
|
||||
- [x] fed-sx inbound — `feed/inbound` normalizes + `feed/ingest` dedupes peer
|
||||
activities into the local stream
|
||||
- [x] backfill on subscribe — `feed/backfill local fetch-fn peer-id`
|
||||
- [x] `lib/feed/tests/integration.sx` — 22 cases incl. end-to-end
|
||||
`feed/timeline` (federated → ACL for viewer → recency rank → top-N)
|
||||
|
||||
## Progress log
|
||||
|
||||
- **Phase 1 done (30/30).** Stream = APL rank-1 array whose ravel holds activity
|
||||
dicts. `normalize.sx` (record schema + accessors), `stream.sx` (filter via `/`
|
||||
compress, sort via `⍋` grade-up [stable], take via `↑`, reverse via `⌽`,
|
||||
by-actor/verb/object/since predicates), `api.sx` (mutable log: post/all/reset!/size).
|
||||
Substrate: `apl-compress`, `apl-grade-up`, `apl-take`, `apl-reverse`, `make-array`.
|
||||
Grade-up returns 1-based indices (⎕IO=1), is stable on ties → deterministic sort.
|
||||
- **Phase 2 done (59/59 total).** `fanout.sx` (graph + `apl-outer` showcase),
|
||||
`dedupe.sx` (per-key dedupe, first-wins stable). Key APL gotcha: `scalar?` is
|
||||
true for ANY dict and `disclose` nils a non-array dict, so an apl-outer combiner
|
||||
MUST `enclose` its event dict — apl-outer discloses it back intact. `apl-unique`
|
||||
preserves first-occurrence order; dict `keys` order is NOT stable, so
|
||||
`feed/audience` sorts (else recipient ordering flakes). `apl-compress` needs a
|
||||
rank-1 array, so the (activity×follower) matrix is flattened to its ravel before
|
||||
the edge-guard filter.
|
||||
- **Phase 3 done (83/83 total).** `aggregate.sx` (group-by/count, day buckets) +
|
||||
`rank.sx` (recency/velocity/engagement scorers, composite, top-N). `sort` is
|
||||
single-arg ascending only — no comparator — so ranking uses a stable two-pass
|
||||
`apl-grade-down` (by :at desc, then by score desc) for deterministic tie-breaks.
|
||||
Dict keys must be strings, so composite group keys are string-joined ("actor#day").
|
||||
- **Phase 4 done (105/105 total).** `acl.sx` (per-viewer `feed/visible`,
|
||||
`feed/timeline` capstone) + `fed.sx` (merge/ingest/inbound/backfill/federate/
|
||||
deliver). ACL/transport are dependency-injected (permit?/remote?/send-fn/fetch-fn)
|
||||
since lib/acl + fed-sx don't exist. `feed/normalize` now MERGEs defaults over the
|
||||
raw dict (was projecting to 5 keys) so extra metadata (:visible-to, peer fields)
|
||||
survives — matches the "flexible bag" principle.
|
||||
|
||||
## Roadmap is complete (all 4 phases). Possible follow-ups:
|
||||
|
||||
- Wire real acl-sx once `lib/acl/` exists (swap injected `permit?`).
|
||||
- Wire real fed-sx transport (swap `send-fn`/`fetch-fn`).
|
||||
- [x] TF-IDF over `:tags` for content ranking — `content.sx`: `feed/tag-df`,
|
||||
`feed/tag-idf` (log N/df), `feed/tfidf-score`, `feed/by-relevance`; 15 tests.
|
||||
Composes as a scorer with rank.sx. (120/120 total.)
|
||||
- [x] Notification feed (verb-filtered, per-recipient) — `notify.sx`:
|
||||
`feed/notifications`, `feed/notify-verbs`, `feed/notify-digest` (collapses
|
||||
"X, Y liked Z" by (verb,object), sorted-deterministic); 8 tests. (128/128 total.)
|
||||
- [x] **Capstone** `feed/home` — the whole pipeline as one line: fanout ∘ inbox ∘
|
||||
dedupe ∘ ACL ∘ rank ∘ take (`home.sx`); 6 tests incl. per-viewer ACL + cross-post
|
||||
dedupe. (134/134 total.)
|
||||
- [x] Per-verb dedupe rules (briefing gotcha #3) — `feed/dedupe-smart` /
|
||||
`feed/smart-key`: reactions (like/follow/boost/...) collapse cross-actor on
|
||||
(verb,object); posts stay distinct per actor. `feed/collapse-verbs` is
|
||||
rebindable policy; 9 tests. (143/143 total.)
|
||||
- [x] Trending — `feed/trending` / `feed/trending-actors`: objects/actors ranked
|
||||
by activity count in a recency window, count-desc with key-asc tiebreak
|
||||
(`trending.sx`); 11 tests. (154/154 total.)
|
||||
- [x] Mute/block — `feed/mute-actors` / `feed/mute-tags` / `feed/mute-objects` /
|
||||
`feed/apply-prefs`: viewer-controlled per-request filtering (complements ACL's
|
||||
author-controlled visibility) (`mute.sx`); 9 tests. (163/163 total.)
|
||||
- [x] Pagination — `feed/page`/`feed/page-count` (offset) + `feed/before`/
|
||||
`feed/after`/`feed/page-before`/`feed/next-cursor` (cursor by :at, stable under
|
||||
inserts) (`page.sx`); 14 tests. (177/177 total.)
|
||||
- [x] Threading — `feed/replies`/`feed/reply-count`/`feed/thread`/
|
||||
`feed/thread-objects`/`feed/thread-size`: conversation closure over `:reply-to`
|
||||
(transitive fixpoint), chronological (`thread.sx`); 12 tests. (189/189 total.)
|
||||
|
||||
(none)
|
||||
|
||||
## Notes for next iteration
|
||||
|
||||
- sx-tree MCP tools take `file:` NOT `path:` (CLAUDE.md is stale). Wrong key →
|
||||
`Yojson Type_error("Expected string, got null")`. Looks like a broken binary, isn't.
|
||||
- sx_server binary lives in main repo: `/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe`
|
||||
(worktree has no `_build`). conformance.sh already points there with relative fallback.
|
||||
- Phase 2 substrate verified available: `apl-outer` (∘.×), `apl-member` (∊),
|
||||
`apl-unique`, `apl-iota` (1-based).
|
||||
108
plans/flow-on-sx.md
Normal file
108
plans/flow-on-sx.md
Normal file
@@ -0,0 +1,108 @@
|
||||
# flow-on-sx: Durable DAG Workflows on Scheme
|
||||
|
||||
rose-ash needs workflows that survive restarts: content pipelines (write → review →
|
||||
publish → federate), scheduled jobs (digest emails), multi-step user flows (signup,
|
||||
confirm, onboard). art-dag is the precedent — DAG-of-tasks with pause/resume at IO
|
||||
boundaries.
|
||||
|
||||
Scheme's `call/cc` + delimited continuations make pause/resume natural: a `suspend`
|
||||
captures the continuation, serializes it as part of the flow record, and `resume`
|
||||
re-enters at exactly that point. No state-machine bookkeeping by hand. R7RS-small is
|
||||
already at 2644/2644 (see kernel/architecture status).
|
||||
|
||||
End-state: a Scheme-on-SX layer over the existing scheme runtime, with combinators
|
||||
for sequence/parallel/branch/retry/timeout/suspend, persistent flow store, and a
|
||||
federation extension via fed-sx for remote-node execution.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/flow/conformance.sh` → **0/0** (not yet started)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only touch `lib/flow/**` and `plans/flow-on-sx.md`. Do **not** edit
|
||||
`spec/`, `hosts/`, `shared/`, `lib/scheme/**`, or other `lib/<lang>/`. You may
|
||||
**import** from `lib/scheme/` (public API via `lib/scheme/scheme.sx`); do **not**
|
||||
modify Scheme.
|
||||
- **Shared-file issues** go under "Blockers" with a minimal repro; do not fix here.
|
||||
- **SX files:** use `sx-tree` MCP tools only.
|
||||
- **Architecture:** flow combinators are Scheme macros + procedures. Runtime is a
|
||||
driver loop that walks the flow graph and invokes `call/cc` at `suspend` points.
|
||||
Persistence layer serializes the continuation + open file/socket placeholders are
|
||||
forbidden (continuations must be resumable across process restart).
|
||||
- **art-dag awareness:** read `plans/art-dag*` if it exists for design lineage; do not
|
||||
import code.
|
||||
- **Commits:** one feature per commit. Keep Progress log updated and tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
(defflow publish
|
||||
(sequence
|
||||
(write-content)
|
||||
(parallel
|
||||
(review)
|
||||
(spell-check))
|
||||
(cond approved?
|
||||
(sequence (publish) (federate))
|
||||
(notify-author))))
|
||||
│
|
||||
▼
|
||||
lib/flow/spec.sx lib/flow/runtime.sx lib/flow/store.sx
|
||||
— defflow — driver loop — append-only flow log
|
||||
— sequence/parallel — node dispatch — checkpoint serialize
|
||||
— cond/retry/timeout — call/cc at suspend — restart loader
|
||||
— suspend/resume │ │
|
||||
▼ ▼
|
||||
lib/flow/api.sx lib/flow/remote.sx
|
||||
— (flow/start name args) — fed-sx adapter
|
||||
— (flow/resume id value) — node-on-peer execution
|
||||
— (flow/cancel id) — failure handling
|
||||
```
|
||||
|
||||
## Phase 1 — Declarative DAG + sequential execution
|
||||
|
||||
- [ ] `lib/flow/spec.sx` — `defflow` macro, `sequence` combinator
|
||||
- [ ] node = Scheme thunk; output threads to next node (data flow)
|
||||
- [ ] `parallel` combinator (sequential semantics for now — TRUE parallelism in Phase 3)
|
||||
- [ ] runtime executes a flow synchronously, returns final value
|
||||
- [ ] `lib/flow/api.sx` — `(flow/start name args)` entry point
|
||||
- [ ] `lib/flow/tests/basic.sx` — 15+ cases: linear sequence, nested sequences,
|
||||
data flow between nodes, parallel-with-join
|
||||
- [ ] `lib/flow/scoreboard.{json,md}`
|
||||
- [ ] `lib/flow/conformance.sh`
|
||||
|
||||
## Phase 2 — Control flow + error handling
|
||||
|
||||
- [ ] `cond` combinator — predicate selects branch
|
||||
- [ ] `retry n [backoff]` — re-runs node up to n times on exception
|
||||
- [ ] `timeout ms` — bounds node execution
|
||||
- [ ] `try-catch` — exception handler with reified error
|
||||
- [ ] error model — exceptions vs explicit `(fail :reason ...)` results
|
||||
- [ ] `lib/flow/tests/control.sx` — 25+ cases: each combinator + composition
|
||||
|
||||
## Phase 3 — Suspend / resume (the showcase)
|
||||
|
||||
- [ ] `(suspend reason)` — `call/cc` captures continuation, returns flow-id to caller
|
||||
- [ ] `lib/flow/store.sx` — serialize flow state (continuation + open vars)
|
||||
- [ ] `(flow/resume id value)` — load continuation, inject value, re-enter
|
||||
- [ ] `(flow/cancel id)` — explicit termination
|
||||
- [ ] crash recovery — on restart, scan store for paused flows, mark resumable
|
||||
- [ ] `lib/flow/tests/suspend.sx` — pause-resume scenarios, cancellation, "restart"
|
||||
scenarios (simulated by re-loading store)
|
||||
|
||||
## Phase 4 — Distributed nodes via fed-sx
|
||||
|
||||
- [ ] `(remote-node addr fn args)` — execute node on a federation peer
|
||||
- [ ] failure semantics — retry on different peer, fall through to local
|
||||
- [ ] persistence across instances — flow state replicates via fed-sx
|
||||
- [ ] handoff — flow started here can resume on a peer if the local instance is down
|
||||
- [ ] `lib/flow/tests/distributed.sx` — federated flow scenarios (mock fed-sx in tests)
|
||||
|
||||
## Progress log
|
||||
|
||||
(loop fills this in)
|
||||
|
||||
## Blockers
|
||||
|
||||
(loop fills this in)
|
||||
112
plans/mod-on-sx.md
Normal file
112
plans/mod-on-sx.md
Normal file
@@ -0,0 +1,112 @@
|
||||
# mod-on-sx: Moderation on Prolog
|
||||
|
||||
rose-ash needs moderation infrastructure: reports flagged by users, automated
|
||||
classifications (spam, abuse), tiered escalation (auto → human → appeal), audit
|
||||
trails. Each decision is the conclusion of a backtracking search over evidence and
|
||||
policy rules — exactly what Prolog does.
|
||||
|
||||
Where acl-sx says "may this happen?", mod-sx says "should this stay?" The former is
|
||||
a positive decision (proof of grant); the latter often a negative one (proof of
|
||||
violation), and policy chains naturally backtrack: if the first rule doesn't apply,
|
||||
try the next.
|
||||
|
||||
End-state: a Prolog-on-SX layer for moderation policy declaration and evaluation,
|
||||
with persistent report lifecycle, audit log, escalation state machine, and
|
||||
federation extension.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/mod/conformance.sh` → **0/0** (not yet started)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only touch `lib/mod/**` and `plans/mod-on-sx.md`. Do **not** edit
|
||||
`spec/`, `hosts/`, `shared/`, `lib/prolog/**`, or other `lib/<lang>/`. You may
|
||||
**import** from `lib/prolog/` (public API in `lib/prolog/prolog.sx`); do **not**
|
||||
modify Prolog.
|
||||
- **Shared-file issues** go under "Blockers" with a minimal repro; do not fix here.
|
||||
- **SX files:** use `sx-tree` MCP tools only.
|
||||
- **Architecture:** policies are Prolog rules over `report(...)` and `evidence(...)`
|
||||
facts. Decisions are query results. Proof trees become audit records. The state
|
||||
machine for report lifecycle is separate (an SX module on top).
|
||||
- **Shared with acl-sx:** rule-engine plumbing may be liftable into `lib/guest/`.
|
||||
Watch for it; flag in Progress log but do not extract until both subsystems are
|
||||
past Phase 2.
|
||||
- **Commits:** one feature per commit. Keep Progress log updated and tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
Report Decision
|
||||
{:by :about :reason :at} {:action :proof :next-state}
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/mod/schema.sx lib/mod/engine.sx
|
||||
— report/4, evidence/2, — query Prolog with report fact
|
||||
classification/3 predicates — extract proof tree
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/mod/policy.sx lib/mod/lifecycle.sx
|
||||
— rule syntax → Prolog — state machine
|
||||
— action heads: — open → triaged → decided
|
||||
{:keep :hide :remove — appeal handling
|
||||
:escalate :ban} │
|
||||
│ ▼
|
||||
▼ lib/mod/audit.sx
|
||||
lib/mod/api.sx — append-only decision log
|
||||
— (mod/report ...) — proof tree persistence
|
||||
— (mod/decide report) — query API
|
||||
— (mod/appeal id)
|
||||
│
|
||||
▼
|
||||
lib/mod/fed.sx
|
||||
— cross-instance reports via fed-sx
|
||||
— decision sharing / trust model
|
||||
```
|
||||
|
||||
## Phase 1 — Report representation + simple policy
|
||||
|
||||
- [ ] `lib/mod/schema.sx` — `report(id, by, about, reason)`, `evidence(id, kind, val)`,
|
||||
`policy-action(report, action)` predicates as Prolog facts/rules
|
||||
- [ ] `lib/mod/policy.sx` — rule declarations: `(defrule action :when conditions)`
|
||||
desugars to Prolog clause
|
||||
- [ ] `lib/mod/engine.sx` — `(decide report-id)` runs Prolog query, returns first
|
||||
matching action
|
||||
- [ ] `lib/mod/api.sx` — `(mod/report by about reason)`, `(mod/decide id)`
|
||||
- [ ] `lib/mod/tests/decide.sx` — 15+ cases: spam keyword → hide, repeated reports →
|
||||
escalate, no rule matches → keep
|
||||
- [ ] `lib/mod/scoreboard.{json,md}`
|
||||
- [ ] `lib/mod/conformance.sh`
|
||||
|
||||
## Phase 2 — Evidence + audit trail
|
||||
|
||||
- [ ] evidence accumulation — additional facts asserted before query
|
||||
- [ ] proof tree from Prolog derivation tree
|
||||
- [ ] `lib/mod/audit.sx` — append-only log (decision + proof + evidence snapshot)
|
||||
- [ ] `(mod/audit id)` retrieval
|
||||
- [ ] `lib/mod/tests/audit.sx` — proof correctness, trail completeness
|
||||
|
||||
## Phase 3 — Escalation + lifecycle state machine
|
||||
|
||||
- [ ] state machine: `:open → :triaged → :decided → :appealed → :final`
|
||||
- [ ] auto-tier: first-pass rules decide quick cases
|
||||
- [ ] human-tier: rules that emit `:escalate` move to next state
|
||||
- [ ] appeal: re-runs with appeal evidence, may override prior decision
|
||||
- [ ] `(mod/appeal id new-evidence)` API
|
||||
- [ ] `lib/mod/tests/escalation.sx` — full lifecycle traversal cases
|
||||
|
||||
## Phase 4 — Federation
|
||||
|
||||
- [ ] cross-instance reports — peer raises report about local content (or vice versa)
|
||||
- [ ] decision sharing — actions taken locally propagate to peers via fed-sx
|
||||
- [ ] trust model — peer's decision is advisory unless `(trust peer :mod)` is granted
|
||||
- [ ] revocation — undo applied moderation if proof was invalidated
|
||||
- [ ] `lib/mod/tests/fed.sx` — federated decision chains (mock fed-sx in tests)
|
||||
|
||||
## Progress log
|
||||
|
||||
(loop fills this in)
|
||||
|
||||
## Blockers
|
||||
|
||||
(loop fills this in)
|
||||
208
plans/search-on-sx.md
Normal file
208
plans/search-on-sx.md
Normal file
@@ -0,0 +1,208 @@
|
||||
# search-on-sx: Full-text + structured search on Haskell
|
||||
|
||||
rose-ash needs search across pages, posts, threads, federated content. Tokenize,
|
||||
index, query, rank, filter by visibility. Typed ADTs make query parsing clean,
|
||||
lazy lists make posting-list iteration efficient, and Haskell-on-SX is at 1514/1514.
|
||||
|
||||
End-state: a Haskell-on-SX layer with inverted index, query AST, boolean +
|
||||
phrase + ranked queries (TF-IDF, BM25), ACL-aware post-filter, and a federation
|
||||
extension that merges per-peer indices.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/search/conformance.sh` → **122/122** (Phases 1–4 complete)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only touch `lib/search/**` and `plans/search-on-sx.md`. Do **not** edit
|
||||
`spec/`, `hosts/`, `shared/`, `lib/haskell/**`, or other `lib/<lang>/`. You may
|
||||
**import** from `lib/haskell/` (public API in `lib/haskell/haskell.sx`); do **not**
|
||||
modify Haskell.
|
||||
- **Shared-file issues** go under "Blockers" with a minimal repro; do not fix here.
|
||||
- **SX files:** use `sx-tree` MCP tools only.
|
||||
- **Architecture:** index = `Map Term [(DocId, [Pos])]`. Query AST = ADT. Eval =
|
||||
fold of posting lists with set ops + ranking math. Ranking is pure (no IO until
|
||||
result emission).
|
||||
- **Commits:** one feature per commit. Keep Progress log updated and tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
Document Query
|
||||
{:id :text :tags} "alice AND bob OR phrase \"x y\""
|
||||
│ │
|
||||
▼ ▼
|
||||
lib/search/tokenize.sx lib/search/parse.sx
|
||||
— tokenize :: Text → [Term] — parse :: Text → Query
|
||||
— normalize (lowercase, strip) — Query = Term | And | Or
|
||||
— (optionally) stem | Not | Phrase
|
||||
│ │
|
||||
▼ ▼
|
||||
lib/search/index.sx lib/search/eval.sx
|
||||
— Map Term [(DocId, [Pos])] — eval :: Index → Query → [DocId]
|
||||
— insert / delete / lookup — boolean + phrase positions
|
||||
— persistence (optional later) │
|
||||
│ ▼
|
||||
└────────────────► lib/search/rank.sx
|
||||
— TF-IDF / BM25 scoring
|
||||
— top-N
|
||||
│
|
||||
▼
|
||||
lib/search/api.sx
|
||||
— (search/index doc)
|
||||
— (search/query q)
|
||||
— (search/top n q)
|
||||
│
|
||||
▼
|
||||
lib/search/fed.sx
|
||||
— federated query (merge peer results)
|
||||
— ACL filter post-merge
|
||||
```
|
||||
|
||||
## Phase 1 — Tokenize + index
|
||||
|
||||
- [x] `lib/search/tokenize.sx` — normalize (lowercase, strip punctuation), split on
|
||||
whitespace, return positions
|
||||
- [x] `lib/search/index.sx` — inverted index data structure; `indexDoc`, `deleteDoc`,
|
||||
`lookupTerm`, `docFreq`, `allTerms`. (Data.Map's public API lacks
|
||||
toList/keys/map/filter, so a sorted assoc-list `[(Term,[(DocId,[Pos])])]` is used —
|
||||
the conceptual `Map Term [(DocId,[Pos])]` with free term iteration.)
|
||||
- [x] `lib/search/api.sx` — assembles `search/src` (tokenize + index); Haskell entry
|
||||
points `indexDoc` / `lookupTerm`
|
||||
- [x] `lib/search/tests/index.sx` — 18 cases: tokenize, insert + lookup, update,
|
||||
delete, multi-doc, positions, docFreq, allTerms
|
||||
- [x] `lib/search/scoreboard.{json,md}`
|
||||
- [x] `lib/search/conformance.sh`
|
||||
|
||||
## Phase 2 — Query AST + boolean evaluation
|
||||
|
||||
- [x] Query ADT: `Term String | And Query Query | Or Query Query | Not Query |
|
||||
Phrase [String]` (in `lib/search/query.sx`)
|
||||
- [x] `lib/search/parse.sx` — query syntax parser: tokenizer + recursive-descent
|
||||
(OR < AND < NOT precedence, implicit AND on adjacency, quoted phrases, parens,
|
||||
case-insensitive keywords); `parseQuery`, `searchQuery`, `showQ`
|
||||
- [x] `lib/search/query.sx` — boolean eval via set ops on docid-sorted posting lists
|
||||
(sortedUnion/Inter/Diff, Not over allDocs universe)
|
||||
- [x] phrase eval — positional adjacency check (phraseInDoc / phraseStartsAt)
|
||||
- [x] `lib/search/tests/boolean.sx` — 28 cases: term, and, or, not, phrase,
|
||||
composition (parser edge cases move to the parse.sx suite)
|
||||
|
||||
## Phase 3 — Ranking
|
||||
|
||||
- [x] document frequency — `docFreq`/`idf`/`bm25idf` derived from the index
|
||||
(posting-list length); no separate df store needed
|
||||
- [x] TF-IDF scoring (`rankTfIdf`)
|
||||
- [x] BM25 scoring, configurable k1/b (`rankBm25 k1 b`)
|
||||
- [x] top-N retrieval (`topNTfIdf`/`topNBm25` — sortBy + take; stable DocId tiebreak)
|
||||
- [x] `lib/search/tests/rank.sx` — 23 cases: TF-IDF tf/idf behavior, BM25 length-norm
|
||||
+ tf-saturation flips vs TF-IDF, b-parameter effect, tiebreak stability, top-N
|
||||
|
||||
## Phase 4 — ACL filter + federation
|
||||
|
||||
- [x] post-filter — `aclFilter`/`searchTfIdfAcl`/`topNTfIdfAcl`/`searchBm25Acl` take an
|
||||
injected `permit :: DocId -> Bool` predicate, applied post-rank (never in the index)
|
||||
- [x] federated query — `fedIndex :: [(PeerId, Index)] -> Index` merges per-peer
|
||||
inverted indices (union posting lists per term); rank/search run once over the merge
|
||||
- [x] merge policy — relabel local DocIds to global `gid = peer*1000 + local`
|
||||
(bijection ⇒ dedupe by (peer,doc-id) is automatic); ranking interleaves peers by score
|
||||
- [x] `lib/search/tests/integration.sx` — 21 cases: index merge, cross-peer df/lookup,
|
||||
position preservation, boolean/phrase over the merge, ACL filter + top-N + bm25
|
||||
|
||||
## Extensions (post-roadmap, search-shaped vocabulary)
|
||||
|
||||
- [x] prefix / wildcard queries (`prefixTerms`, `prefixDocs`, `prefixRankTfIdf`) — 14 tests
|
||||
- [x] fuzzy matching — edit distance term expansion (`editDist`, `fuzzyTerms`,
|
||||
`fuzzyDocs`, `fuzzyRankTfIdf`) — 18 tests
|
||||
- [x] result pagination (offset / limit) — `paginate`, `pageTfIdf`, `pageBm25`,
|
||||
`resultCount` — 12 tests
|
||||
- [x] snippet / highlight generation (`highlight`, `snippet`) — 12 tests
|
||||
- [x] stemming (suffix stripping) — `stem`, `stemText`, `stemTokens`, `indexStemmed`
|
||||
— 18 tests
|
||||
- [x] proximity / NEAR — `nearDocs k t1 t2` (unordered, within k positions) — 9 tests
|
||||
- [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests
|
||||
- [x] boolean-filtered ranked search — `queryTerms`, `searchRankTfIdf`,
|
||||
`searchRankBm25` (filter by boolean query, rank survivors by relevance) — 11 tests
|
||||
- [x] did-you-mean / spelling suggestion — `suggest`, `suggestN` (closest indexed
|
||||
terms by edit distance, alphabetical tiebreak) — 9 tests
|
||||
|
||||
## Progress log
|
||||
|
||||
- **Extension: did-you-mean / spelling suggestion (234/234 total).** `suggest`/`suggestN`
|
||||
rank indexed terms by edit distance to a (misspelled) query term, alphabetical
|
||||
tiebreak. 9 tests.
|
||||
- **Extension: boolean-filtered ranked search (225/225 total).** `searchRankTfIdf`/
|
||||
`searchRankBm25` parse a boolean query, filter docs via evalQuery, then rank the
|
||||
survivors by relevance over the query's leaf terms (`queryTerms`) — the real-world
|
||||
filter-then-rank pattern. 11 tests.
|
||||
- **Extension: synonyms/query expansion (214/214 total).** A synonym map
|
||||
`[(Term,[Term])]` expands a query term to itself + synonyms (`expandTerm`); `synDocs`
|
||||
unions, `synRankTfIdf` ranks the expanded set. 9 tests.
|
||||
- **Extension: proximity/NEAR (205/205 total).** `nearDocs k t1 t2 idx` returns docs
|
||||
where both terms occur within k positions (unordered), candidates = posting
|
||||
intersection, filtered on the positional postings. 9 tests.
|
||||
- **Extension: stemming (196/196 total).** Deterministic English suffix stripping
|
||||
(`stem`), `stemText`/`stemTokens`, `indexStemmed`. Two haskell-on-sx gotchas: take/drop
|
||||
over a String yield char CODES not char strings (rebuild via `joinChars . map chr`),
|
||||
and isSuffixOf's `reverse` trips `++` on the String repr (manual suffix compare). All
|
||||
five planned extensions now done; the loop can keep adding search vocabulary. 18 tests.
|
||||
- **Extension: highlight/snippet (178/178 total).** `highlight terms text` marks
|
||||
query-matching (normalized) tokens with [..]; `snippet ctx terms text` extracts a
|
||||
context window around the first match. 12 tests.
|
||||
- **Extension: fuzzy matching (166/166 total).** Levenshtein `editDist` as an O(m*n)
|
||||
row-based DP (the naive recursive version is exponential and times out under load),
|
||||
`fuzzyTerms`/`fuzzyDocs`/`fuzzyRankTfIdf` expand a term to indexed terms within a max
|
||||
edit distance. 18 tests.
|
||||
- **Extension: pagination (148/148 total).** `paginate off lim` windows a ranked list
|
||||
(take lim . drop off); `pageTfIdf`/`pageBm25` + `resultCount`. 12 tests. Note the
|
||||
full conformance now runs 8 suites sequentially and needs an overall timeout ~1900s
|
||||
under the heavy box load.
|
||||
- **Extension: prefix/wildcard queries (136/136 total).** `prefixTerms` matches every
|
||||
indexed term starting with a prefix (via allTerms + isPrefixOf); `prefixDocs` unions
|
||||
their docs; `prefixRankTfIdf` ranks treating the matched terms as the query. 14 tests.
|
||||
- **Phase 4 complete — federation + ACL (122/122 total). Roadmap done.** `fedIndex`
|
||||
merges per-peer inverted indices (union posting lists per term) after relabelling
|
||||
local DocIds to global `gid = peer*1000 + local` — the bijection makes (peer,doc-id)
|
||||
dedupe automatic and keeps positions, so ranking runs once over the merge and
|
||||
interleaves peers by score (rank-correct). ACL is a post-rank `filter` over an
|
||||
injected `permit :: DocId -> Bool` (viewer baked in by the caller) — never in the
|
||||
index; `searchTfIdfAcl`/`topNTfIdfAcl`/`searchBm25Acl`. 21 integration tests.
|
||||
- **Phase 3 complete — ranking (101/101 total).** TF-IDF (`rankTfIdf`) and BM25
|
||||
(`rankBm25 k1 b`) over the candidate set (docs containing any query term), scores
|
||||
as floats with deterministic DocId-ascending tiebreak; `topNTfIdf`/`topNBm25` via
|
||||
sortBy+take. df/idf derived from posting-list length (no separate df store). 23
|
||||
tests incl. a BM25-vs-TF-IDF flip (length-norm + tf-saturation) and the b-parameter
|
||||
effect. Float division/`log`/float literals all work in haskell-on-sx.
|
||||
- **Phase 2 complete — parser (78/78 total).** Query tokenizer (ord-based
|
||||
delimiters, quoted phrases) + recursive-descent parser with OR<AND<NOT precedence,
|
||||
implicit AND on adjacency, parens, case-insensitive keywords. `parseQuery`,
|
||||
`searchQuery`, `showQ` (canonical render for AST tests). 32 tests in parse.sx.
|
||||
**haskell-on-sx parser gotchas hit while writing this (see parse.sx header):**
|
||||
(1) escaped char literals like `'\"'` break the tokenizer — match delimiters by
|
||||
`ord c == 34`; (2) an `[]` *pattern* inside a `case` alt breaks the parser — use
|
||||
multi-clause functions instead; (3) `case`/constructor patterns and `let (a,b)=..`
|
||||
are fine. Embedded Haskell string literals in a `.sx` source string need single
|
||||
`\"`, not `\\\"`.
|
||||
- **Phase 2 boolean/phrase eval (46/46 total).** Query ADT
|
||||
`Term|And|Or|Not|Phrase` + `evalQuery :: Index -> Query -> [DocId]` in query.sx.
|
||||
Boolean ops are linear merges over docid-sorted posting lists; Not subtracts from
|
||||
the allDocs universe; Phrase checks positional adjacency. 28 tests in boolean.sx.
|
||||
Refactored both suites to **batch all cases into one program eval** (search-batch
|
||||
in testlib) — under the heavy CPU load on this box (~11 on 2 cores), 18–28 separate
|
||||
hk-eval-program calls timed out; one combined eval per suite is ~20× faster.
|
||||
Parser (parse.sx) is the remaining Phase 2 box.
|
||||
- **Phase 1 complete (18/18).** Tokenizer (lowercase + strip punctuation + positions),
|
||||
inverted index as sorted assoc-list `[(Term,[(DocId,[Pos])])]`, indexDoc/deleteDoc/
|
||||
lookupTerm/docFreq/allTerms. Search lib is Haskell source assembled into `search/src`
|
||||
and evaluated via the haskell-on-sx interpreter; tests reuse `hk-test` counters and a
|
||||
`search-eval` helper that forces HK values to plain SX. conformance.sh models
|
||||
lib/haskell (MODE=counters, COUNTERS_PASS/FAIL=hk-test-pass/fail).
|
||||
|
||||
## Blockers
|
||||
|
||||
- **None.** Note: the box is heavily CPU-oversubscribed by sibling loop agents
|
||||
(load ~11 on 2 cores); each program eval is ~10× slower than nominal, so suite
|
||||
timeout is set to 600s. Runs are correct, just slow.
|
||||
- **Data.Map public API gap (informational, not fixing):** the haskell-on-sx
|
||||
`import Data.Map` binds only empty/singleton/insert/lookup/member/size/null/delete/
|
||||
insertWith/adjust/findWithDefault — no toList/keys/elems/map/filter/unionWith. Index
|
||||
uses a pure assoc-list instead so term iteration and federation merge stay simple.
|
||||
Reference in New Issue
Block a user