Compare commits
15 Commits
loops/pers
...
loops/sear
| Author | SHA1 | Date | |
|---|---|---|---|
| 5d62d08e1c | |||
| db2a5dc6ab | |||
| cfa68c3db3 | |||
| cf4e613e43 | |||
| 911a2f57c0 | |||
| 7231cb651f | |||
| 5945b51cfd | |||
| 3ab8270a58 | |||
| 9d3b775b25 | |||
| 77ab827b91 | |||
| a3f9d4f6c9 | |||
| 4c84decc01 | |||
| 0f0da0319c | |||
| b8cf3eb1b8 | |||
| e2de5a4675 |
@@ -1,10 +0,0 @@
|
||||
; persist/api — the public entry point. persist/open returns a backend (the
|
||||
; in-memory one by default; pass a custom backend to inject file/pg/ipfs-ref).
|
||||
; All facet functions take this backend as their first argument.
|
||||
; Requires: lib/persist/backend.sx, lib/persist/log.sx, lib/persist/kv.sx.
|
||||
|
||||
(define
|
||||
persist/open
|
||||
(fn
|
||||
(&rest args)
|
||||
(if (= (len args) 0) (persist/mem-backend) (first args))))
|
||||
@@ -1,34 +0,0 @@
|
||||
; persist/backend — the injected storage protocol. Every facet (log, kv,
|
||||
; snapshot) goes through a backend dict, never touching storage directly, so
|
||||
; file/pg/ipfs-ref backends swap in unchanged. A backend is a dict of fns:
|
||||
; {:append :read :last-seq :truncate-through :streams
|
||||
; :kv-get :kv-put :kv-delete :kv-has? :kv-keys}
|
||||
; The in-memory backend is the test default. State is three dicts held in a
|
||||
; closure and mutated with set!: logs (stream -> event list), seqs (stream ->
|
||||
; last assigned seq — a monotonic high-water mark that survives compaction so
|
||||
; truncating the log prefix never lets a future append reuse a seq), kv. The
|
||||
; stream catalog comes from seqs, so a fully-compacted stream still lists.
|
||||
|
||||
(define
|
||||
persist/mem-backend
|
||||
(fn
|
||||
()
|
||||
(let ((logs {}) (seqs {}) (kv {})) {:truncate-through (fn (stream n) (let ((cur (get logs stream))) (set! logs (assoc logs stream (filter (fn (e) (> (persist/event-seq e) n)) (if cur cur (list))))))) :kv-keys (fn () (keys kv)) :read (fn (stream) (let ((cur (get logs stream))) (if cur cur (list)))) :kv-has? (fn (key) (has-key? kv key)) :last-seq (fn (stream) (let ((s (get seqs stream))) (if s s 0))) :streams (fn () (keys seqs)) :append (fn (stream event) (begin (let ((cur (get logs stream))) (set! logs (assoc logs stream (append (if cur cur (list)) event)))) (set! seqs (assoc seqs stream (persist/event-seq event))))) :kv-delete (fn (key) (set! kv (dissoc kv key))) :kv-put (fn (key val) (set! kv (assoc kv key val))) :kv-get (fn (key) (get kv key))})))
|
||||
|
||||
; protocol accessors — call a backend op by keyword
|
||||
(define
|
||||
persist/backend-append
|
||||
(fn (b stream event) ((get b :append) stream event)))
|
||||
(define persist/backend-read (fn (b stream) ((get b :read) stream)))
|
||||
(define
|
||||
persist/backend-last-seq
|
||||
(fn (b stream) ((get b :last-seq) stream)))
|
||||
(define persist/backend-streams (fn (b) ((get b :streams))))
|
||||
(define
|
||||
persist/backend-truncate
|
||||
(fn (b stream n) ((get b :truncate-through) stream n)))
|
||||
(define persist/backend-kv-get (fn (b key) ((get b :kv-get) key)))
|
||||
(define persist/backend-kv-put (fn (b key val) ((get b :kv-put) key val)))
|
||||
(define persist/backend-kv-delete (fn (b key) ((get b :kv-delete) key)))
|
||||
(define persist/backend-kv-has? (fn (b key) ((get b :kv-has?) key)))
|
||||
(define persist/backend-kv-keys (fn (b) ((get b :kv-keys))))
|
||||
@@ -1,40 +0,0 @@
|
||||
; persist/batch — commit several events to a stream as one contiguous block.
|
||||
; Each spec is (type at data). Plain append-batch always appends; the -expect
|
||||
; form is the transactional commit: it checks the stream is still at `expected`
|
||||
; before writing ANY event, so a batch is all-or-nothing under a concurrent
|
||||
; writer (conflict is a value, not a partial write). For an order + its line
|
||||
; items, an audit entry + its reason, etc. Requires: lib/persist/log.sx.
|
||||
|
||||
; append a list of (type at data) specs as one block; returns the stored events
|
||||
; (a real cons-list, in order, with contiguous seqs)
|
||||
(define
|
||||
persist/append-batch
|
||||
(fn
|
||||
(b stream specs)
|
||||
(reverse
|
||||
(reduce
|
||||
(fn
|
||||
(acc spec)
|
||||
(cons
|
||||
(persist/append
|
||||
b
|
||||
stream
|
||||
(first spec)
|
||||
(nth spec 1)
|
||||
(nth spec 2))
|
||||
acc))
|
||||
(list)
|
||||
specs))))
|
||||
|
||||
; transactional batch: commit all specs only if the stream is still at expected,
|
||||
; else return a conflict and write nothing
|
||||
(define
|
||||
persist/append-batch-expect
|
||||
(fn
|
||||
(b stream expected specs)
|
||||
(let
|
||||
((actual (persist/last-seq b stream)))
|
||||
(if
|
||||
(= actual expected)
|
||||
(persist/append-batch b stream specs)
|
||||
{:actual actual :expected expected :conflict true}))))
|
||||
@@ -1,66 +0,0 @@
|
||||
; persist/blob — large objects (images, media) are NOT persist's to hold. They
|
||||
; live in a content-addressed store (artdag/IPFS); persist stores only a
|
||||
; reference: {:cid :size :mime}. The blob store is a SEPARATE injected
|
||||
; dependency with its own transport (perform in production, a mock content store
|
||||
; in tests), distinct from the event/kv backend. The invariant: a blob ref that
|
||||
; lands in the log or kv carries the CID + metadata and never the bytes.
|
||||
; Requires: lib/persist/backend.sx.
|
||||
|
||||
(define persist/blob-ref (fn (cid size mime) {:mime mime :size size :cid cid}))
|
||||
(define persist/blob-ref? (fn (r) (has-key? r :cid)))
|
||||
(define persist/blob-cid (fn (r) (get r :cid)))
|
||||
(define persist/blob-size (fn (r) (get r :size)))
|
||||
(define persist/blob-mime (fn (r) (get r :mime)))
|
||||
|
||||
; blob store protocol over an injectable transport
|
||||
(define persist/blob-io (fn (transport) {:put (fn (bytes mime) (transport {:op "blob/put" :args (list bytes mime)})) :get (fn (cid) (transport {:op "blob/get" :args (list cid)})) :has? (fn (cid) (transport {:op "blob/has?" :args (list cid)}))}))
|
||||
|
||||
; production blob store — transport is the kernel's perform
|
||||
(define
|
||||
persist/blob-store-backend
|
||||
(fn () (persist/blob-io (fn (req) (perform req)))))
|
||||
|
||||
; store bytes via the blob backend; return ONLY the ref (cid + metadata) — this
|
||||
; is what the caller persists in the log/kv. The bytes never enter persist.
|
||||
(define
|
||||
persist/blob-store
|
||||
(fn
|
||||
(blob bytes mime)
|
||||
(let
|
||||
((cid ((get blob :put) bytes mime)))
|
||||
(persist/blob-ref cid (len bytes) mime))))
|
||||
|
||||
(define
|
||||
persist/blob-fetch
|
||||
(fn (blob ref) ((get blob :get) (persist/blob-cid ref))))
|
||||
(define
|
||||
persist/blob-exists?
|
||||
(fn (blob ref) ((get blob :has?) (persist/blob-cid ref))))
|
||||
|
||||
; mock content-addressed store (stands in for artdag/IPFS). CID is a
|
||||
; deterministic content address: identical bytes dedupe to one CID. A real
|
||||
; store computes a SHA3/IPFS CID host-side; the prefix keeps the mock readable.
|
||||
(define persist/blob-cid-of (fn (bytes) (str "cid:" bytes)))
|
||||
|
||||
(define
|
||||
persist/blob-serve
|
||||
(fn
|
||||
(store req)
|
||||
(let
|
||||
((op (get req :op)) (args (get req :args)))
|
||||
(cond
|
||||
((equal? op "blob/put")
|
||||
(let
|
||||
((cid (persist/blob-cid-of (first args))))
|
||||
(begin (persist/backend-kv-put store cid (first args)) cid)))
|
||||
((equal? op "blob/get") (persist/backend-kv-get store (first args)))
|
||||
((equal? op "blob/has?")
|
||||
(persist/backend-kv-has? store (first args)))
|
||||
(else (error (str "persist/blob-serve: unknown op " op)))))))
|
||||
|
||||
(define
|
||||
persist/blob-mock-transport
|
||||
(fn (store) (fn (req) (persist/blob-serve store req))))
|
||||
(define
|
||||
persist/mock-blob
|
||||
(fn (store) (persist/blob-io (persist/blob-mock-transport store))))
|
||||
@@ -1,35 +0,0 @@
|
||||
; persist/catalog — enumerate the streams a backend holds. The catalog is the
|
||||
; set of streams ever appended to (from the seq high-water marks), so a stream
|
||||
; whose log has been fully compacted still appears. $-prefixed streams are
|
||||
; reserved for internal indexes (e.g. the $global commit index) and are hidden
|
||||
; from the public catalog; use streams-all to see them. For admin, global ops,
|
||||
; and cross-stream tooling. Requires: lib/persist/backend.sx, lib/persist/log.sx.
|
||||
|
||||
(define persist/reserved-stream? (fn (s) (starts-with? s "$")))
|
||||
|
||||
; every stream including reserved internal indexes
|
||||
(define persist/streams-all (fn (b) (persist/backend-streams b)))
|
||||
|
||||
; public streams (reserved internal indexes hidden)
|
||||
(define
|
||||
persist/streams
|
||||
(fn
|
||||
(b)
|
||||
(filter
|
||||
(fn (s) (not (persist/reserved-stream? s)))
|
||||
(persist/streams-all b))))
|
||||
|
||||
(define persist/stream-count (fn (b) (len (persist/streams b))))
|
||||
(define
|
||||
persist/stream-exists?
|
||||
(fn (b stream) (contains? (persist/streams b) stream)))
|
||||
|
||||
; total logical events across all public streams (sum of high-water marks)
|
||||
(define
|
||||
persist/total-events
|
||||
(fn
|
||||
(b)
|
||||
(reduce
|
||||
(fn (acc s) (+ acc (persist/last-seq b s)))
|
||||
0
|
||||
(persist/streams b))))
|
||||
@@ -1,43 +0,0 @@
|
||||
; persist/compaction — once a snapshot subsumes a log prefix, those events are
|
||||
; dead weight: replay starts from the snapshot, so events with seq <= the
|
||||
; snapshot's seq are never folded again. Compaction checkpoints then truncates
|
||||
; that prefix. The seq counter is monotonic (backend high-water mark) so future
|
||||
; appends keep climbing — the surviving tail keeps its original seqs and replay
|
||||
; from the snapshot still equals a full replay of the pre-compaction log.
|
||||
; Policy is explicit: compact when the uncompacted tail reaches `every` events.
|
||||
; Requires: lib/persist/snapshot.sx, lib/persist/log.sx.
|
||||
|
||||
; events accumulated since the last snapshot for name
|
||||
(define
|
||||
persist/uncompacted
|
||||
(fn
|
||||
(b stream name seed)
|
||||
(-
|
||||
(persist/last-seq b stream)
|
||||
(persist/project-seq (persist/snapshot-load b name seed)))))
|
||||
|
||||
; policy: should we compact yet? tail since snapshot >= every
|
||||
(define
|
||||
persist/should-compact?
|
||||
(fn
|
||||
(b stream name every seed)
|
||||
(>= (persist/uncompacted b stream name seed) every)))
|
||||
|
||||
; checkpoint then drop the snapshotted prefix; returns the new snapshot state
|
||||
(define
|
||||
persist/compact
|
||||
(fn
|
||||
(b stream name step seed)
|
||||
(let
|
||||
((state (persist/checkpoint b stream name step seed)))
|
||||
(begin (persist/truncate b stream (persist/project-seq state)) state))))
|
||||
|
||||
; compact only if the policy fires; always returns the current snapshot state
|
||||
(define
|
||||
persist/maybe-compact
|
||||
(fn
|
||||
(b stream name step seed every)
|
||||
(if
|
||||
(persist/should-compact? b stream name every seed)
|
||||
(persist/compact b stream name step seed)
|
||||
(persist/snapshot-load b name seed))))
|
||||
@@ -1,24 +0,0 @@
|
||||
; persist/concurrency — optimistic concurrency for the log facet. The caller
|
||||
; passes the seq it believes is current (the last-seq it last observed). If the
|
||||
; stream has advanced since, the append is refused and a conflict VALUE is
|
||||
; returned — never a crash, never a silent overwrite. The caller re-reads the
|
||||
; tail and retries. This is the substrate-level answer to "two writers, one
|
||||
; stream": the loser gets a result it can act on.
|
||||
; Requires: lib/persist/log.sx.
|
||||
|
||||
(define
|
||||
persist/append-expect
|
||||
(fn
|
||||
(b stream expected type at data)
|
||||
(let
|
||||
((actual (persist/last-seq b stream)))
|
||||
(if
|
||||
(= actual expected)
|
||||
(persist/append b stream type at data)
|
||||
{:actual actual :expected expected :conflict true}))))
|
||||
|
||||
(define
|
||||
persist/conflict?
|
||||
(fn (r) (if (has-key? r :conflict) (get r :conflict) false)))
|
||||
(define persist/conflict-expected (fn (r) (get r :expected)))
|
||||
(define persist/conflict-actual (fn (r) (get r :actual)))
|
||||
@@ -1,128 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# lib/persist/conformance.sh — run persist test suites, emit scoreboard.json + scoreboard.md.
|
||||
|
||||
set -uo pipefail
|
||||
cd "$(git rev-parse --show-toplevel)"
|
||||
|
||||
SX_SERVER="${SX_SERVER:-/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
SX_SERVER="hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||
fi
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
echo "ERROR: sx_server.exe not found." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
SUITES=(event log kv project subscribe concurrency snapshot compaction durable blob view cas catalog query batch upcast idempotency global example-acl recovery)
|
||||
|
||||
OUT_JSON="lib/persist/scoreboard.json"
|
||||
OUT_MD="lib/persist/scoreboard.md"
|
||||
|
||||
run_suite() {
|
||||
local suite=$1
|
||||
local file="lib/persist/tests/${suite}.sx"
|
||||
local TMP
|
||||
TMP=$(mktemp)
|
||||
cat > "$TMP" << EPOCHS
|
||||
(epoch 1)
|
||||
(load "spec/stdlib.sx")
|
||||
(load "lib/r7rs.sx")
|
||||
(load "lib/persist/event.sx")
|
||||
(load "lib/persist/backend.sx")
|
||||
(load "lib/persist/log.sx")
|
||||
(load "lib/persist/kv.sx")
|
||||
(load "lib/persist/project.sx")
|
||||
(load "lib/persist/concurrency.sx")
|
||||
(load "lib/persist/snapshot.sx")
|
||||
(load "lib/persist/compaction.sx")
|
||||
(load "lib/persist/durable.sx")
|
||||
(load "lib/persist/blob.sx")
|
||||
(load "lib/persist/view.sx")
|
||||
(load "lib/persist/catalog.sx")
|
||||
(load "lib/persist/query.sx")
|
||||
(load "lib/persist/batch.sx")
|
||||
(load "lib/persist/upcast.sx")
|
||||
(load "lib/persist/idempotency.sx")
|
||||
(load "lib/persist/global.sx")
|
||||
(load "lib/persist/examples/acl.sx")
|
||||
(load "lib/persist/subscribe.sx")
|
||||
(load "lib/persist/api.sx")
|
||||
(epoch 2)
|
||||
(eval "(define persist-test-pass 0)")
|
||||
(eval "(define persist-test-fail 0)")
|
||||
(eval "(define persist-test (fn (name got expected) (if (equal? got expected) (set! persist-test-pass (+ persist-test-pass 1)) (set! persist-test-fail (+ persist-test-fail 1)))))")
|
||||
(epoch 3)
|
||||
(load "${file}")
|
||||
(epoch 4)
|
||||
(eval "(list persist-test-pass persist-test-fail)")
|
||||
EPOCHS
|
||||
|
||||
local OUTPUT
|
||||
OUTPUT=$(timeout 300 "$SX_SERVER" < "$TMP" 2>/dev/null)
|
||||
rm -f "$TMP"
|
||||
|
||||
local LINE
|
||||
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 4 / {getline; print; exit}')
|
||||
if [ -z "$LINE" ]; then
|
||||
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 4 \([0-9]+ [0-9]+\)\)' | tail -1 \
|
||||
| sed -E 's/^\(ok 4 //; s/\)$//')
|
||||
fi
|
||||
|
||||
local P F
|
||||
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/')
|
||||
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/')
|
||||
P=${P:-0}
|
||||
F=${F:-0}
|
||||
echo "${P} ${F}"
|
||||
}
|
||||
|
||||
declare -A SUITE_PASS
|
||||
declare -A SUITE_FAIL
|
||||
TOTAL_PASS=0
|
||||
TOTAL_FAIL=0
|
||||
|
||||
echo "Running persist conformance suite..." >&2
|
||||
for s in "${SUITES[@]}"; do
|
||||
read -r p f < <(run_suite "$s")
|
||||
SUITE_PASS[$s]=$p
|
||||
SUITE_FAIL[$s]=$f
|
||||
TOTAL_PASS=$((TOTAL_PASS + p))
|
||||
TOTAL_FAIL=$((TOTAL_FAIL + f))
|
||||
printf " %-12s %d/%d\n" "$s" "$p" "$((p+f))" >&2
|
||||
done
|
||||
|
||||
# scoreboard.json
|
||||
{
|
||||
printf '{\n'
|
||||
printf ' "suites": {\n'
|
||||
first=1
|
||||
for s in "${SUITES[@]}"; do
|
||||
if [ $first -eq 0 ]; then printf ',\n'; fi
|
||||
printf ' "%s": {"pass": %d, "fail": %d}' "$s" "${SUITE_PASS[$s]}" "${SUITE_FAIL[$s]}"
|
||||
first=0
|
||||
done
|
||||
printf '\n },\n'
|
||||
printf ' "total_pass": %d,\n' "$TOTAL_PASS"
|
||||
printf ' "total_fail": %d,\n' "$TOTAL_FAIL"
|
||||
printf ' "total": %d\n' "$((TOTAL_PASS + TOTAL_FAIL))"
|
||||
printf '}\n'
|
||||
} > "$OUT_JSON"
|
||||
|
||||
# scoreboard.md
|
||||
{
|
||||
printf '# persist Conformance Scoreboard\n\n'
|
||||
printf '_Generated by `lib/persist/conformance.sh`_\n\n'
|
||||
printf '| Suite | Pass | Fail | Total |\n'
|
||||
printf '|-------|-----:|-----:|------:|\n'
|
||||
for s in "${SUITES[@]}"; do
|
||||
p=${SUITE_PASS[$s]}
|
||||
f=${SUITE_FAIL[$s]}
|
||||
printf '| %s | %d | %d | %d |\n' "$s" "$p" "$f" "$((p+f))"
|
||||
done
|
||||
printf '| **Total** | **%d** | **%d** | **%d** |\n' "$TOTAL_PASS" "$TOTAL_FAIL" "$((TOTAL_PASS + TOTAL_FAIL))"
|
||||
} > "$OUT_MD"
|
||||
|
||||
echo "Wrote $OUT_JSON and $OUT_MD" >&2
|
||||
echo "Total: $TOTAL_PASS pass, $TOTAL_FAIL fail" >&2
|
||||
|
||||
[ "$TOTAL_FAIL" -eq 0 ]
|
||||
@@ -1,71 +0,0 @@
|
||||
; persist/durable — a backend whose every op crosses the kernel's IO-suspension
|
||||
; boundary. Each op performs an IO request {:op "persist/..." :args (...)};
|
||||
; under the real kernel `perform` suspends the CEK machine and the host (file,
|
||||
; pg, ipfs-ref) services the request and resumes with the result — so the facet
|
||||
; code above (log/kv/project/snapshot/compaction) never changes. The TRANSPORT
|
||||
; is injectable: production passes the kernel's perform; tests pass a mock
|
||||
; servicer over an in-memory disk. Same request shapes either way, so the whole
|
||||
; existing facet stack runs unchanged on the mock-durable backend.
|
||||
; Requires: lib/persist/backend.sx.
|
||||
|
||||
; request encoders — the exact payloads the durable backend performs
|
||||
(define persist/req-append (fn (stream event) {:op "persist/append" :args (list stream event)}))
|
||||
(define persist/req-read (fn (stream) {:op "persist/read" :args (list stream)}))
|
||||
(define persist/req-last-seq (fn (stream) {:op "persist/last-seq" :args (list stream)}))
|
||||
(define persist/req-streams (fn () {:op "persist/streams" :args (list)}))
|
||||
(define persist/req-truncate (fn (stream n) {:op "persist/truncate" :args (list stream n)}))
|
||||
(define persist/req-kv-get (fn (key) {:op "persist/kv-get" :args (list key)}))
|
||||
(define persist/req-kv-put (fn (key val) {:op "persist/kv-put" :args (list key val)}))
|
||||
(define persist/req-kv-delete (fn (key) {:op "persist/kv-delete" :args (list key)}))
|
||||
(define persist/req-kv-has? (fn (key) {:op "persist/kv-has?" :args (list key)}))
|
||||
(define persist/req-kv-keys (fn () {:op "persist/kv-keys" :args (list)}))
|
||||
|
||||
; a backend parameterized over a transport (req -> response)
|
||||
(define persist/io-backend (fn (transport) {:truncate-through (fn (stream n) (transport (persist/req-truncate stream n))) :kv-keys (fn () (transport (persist/req-kv-keys))) :read (fn (stream) (transport (persist/req-read stream))) :kv-has? (fn (key) (transport (persist/req-kv-has? key))) :last-seq (fn (stream) (transport (persist/req-last-seq stream))) :streams (fn () (transport (persist/req-streams))) :append (fn (stream event) (transport (persist/req-append stream event))) :kv-delete (fn (key) (transport (persist/req-kv-delete key))) :kv-put (fn (key val) (transport (persist/req-kv-put key val))) :kv-get (fn (key) (transport (persist/req-kv-get key)))}))
|
||||
|
||||
; production backend — transport is the kernel's perform (suspends; host resumes)
|
||||
(define
|
||||
persist/durable-backend
|
||||
(fn () (persist/io-backend (fn (req) (perform req)))))
|
||||
|
||||
; reference host: service one request against a disk (any backend protocol impl).
|
||||
; This is what a real host plugs into the kernel's IO resolver, and the mock-IO
|
||||
; harness for tests: it never touches a real disk, just an in-memory backend.
|
||||
(define
|
||||
persist/serve
|
||||
(fn
|
||||
(disk req)
|
||||
(let
|
||||
((op (get req :op)) (args (get req :args)))
|
||||
(cond
|
||||
((equal? op "persist/append")
|
||||
(persist/backend-append disk (first args) (nth args 1)))
|
||||
((equal? op "persist/read")
|
||||
(persist/backend-read disk (first args)))
|
||||
((equal? op "persist/last-seq")
|
||||
(persist/backend-last-seq disk (first args)))
|
||||
((equal? op "persist/streams") (persist/backend-streams disk))
|
||||
((equal? op "persist/truncate")
|
||||
(persist/backend-truncate disk (first args) (nth args 1)))
|
||||
((equal? op "persist/kv-get")
|
||||
(persist/backend-kv-get disk (first args)))
|
||||
((equal? op "persist/kv-put")
|
||||
(persist/backend-kv-put disk (first args) (nth args 1)))
|
||||
((equal? op "persist/kv-delete")
|
||||
(persist/backend-kv-delete disk (first args)))
|
||||
((equal? op "persist/kv-has?")
|
||||
(persist/backend-kv-has? disk (first args)))
|
||||
((equal? op "persist/kv-keys") (persist/backend-kv-keys disk))
|
||||
(else (error (str "persist/serve: unknown op " op)))))))
|
||||
|
||||
; mock transport: a perform-replacement that services against a disk in-process
|
||||
(define
|
||||
persist/mock-transport
|
||||
(fn (disk) (fn (req) (persist/serve disk req))))
|
||||
|
||||
; a durable backend wired to a mock disk — exercises the full io-backend path
|
||||
; (request-encode -> serve -> disk) with no suspension, so the existing facet
|
||||
; suite runs against it unchanged.
|
||||
(define
|
||||
persist/mock-durable
|
||||
(fn (disk) (persist/io-backend (persist/mock-transport disk))))
|
||||
@@ -1,13 +0,0 @@
|
||||
; persist/event — an event is the unit of the log facet:
|
||||
; {:stream :seq :type :at :data}
|
||||
; stream = which append-only stream, seq = 1-based position within it,
|
||||
; type = event kind, at = caller-supplied timestamp (never a clock here:
|
||||
; replay must stay pure), data = payload dict.
|
||||
|
||||
(define persist/event (fn (stream seq type at data) {:data data :type type :at at :stream stream :seq seq}))
|
||||
|
||||
(define persist/event-stream (fn (e) (get e :stream)))
|
||||
(define persist/event-seq (fn (e) (get e :seq)))
|
||||
(define persist/event-type (fn (e) (get e :type)))
|
||||
(define persist/event-at (fn (e) (get e :at)))
|
||||
(define persist/event-data (fn (e) (get e :data)))
|
||||
@@ -1,79 +0,0 @@
|
||||
; persist/examples/acl — a WORKED MIGRATION REFERENCE. A subsystem (acl grants:
|
||||
; who may access what) currently hand-rolls an in-memory mutable map that loses
|
||||
; every grant on restart and keeps no audit trail. This shows the same subsystem
|
||||
; rebuilt on persist. It is the template other subsystem loops copy; it does NOT
|
||||
; touch the real lib/acl (out of this loop's scope).
|
||||
;
|
||||
; BEFORE — hand-rolled, ephemeral, no history, no concurrency safety:
|
||||
; (define acl-grants {}) ; resource -> principal list (mutable)
|
||||
; (define acl-grant! (fn (r p) (set! acl-grants (assoc acl-grants r (cons p (get acl-grants r))))))
|
||||
; (define acl-revoke! (fn (r p) (set! acl-grants (assoc acl-grants r (remove p ...)))))
|
||||
; (define acl-can? (fn (r p) (contains? (get acl-grants r) p)))
|
||||
; ;; vanishes on restart; "when/why was X granted?" is unanswerable.
|
||||
;
|
||||
; AFTER — on persist. Grants/revokes are EVENTS (history matters), the current
|
||||
; grant set is a PROJECTION, checks read a materialized VIEW, and the audit trail
|
||||
; is a time-windowed query. Every fn takes a backend `b`, so the same code runs
|
||||
; on the in-memory backend today and the durable backend unchanged.
|
||||
; Requires: lib/persist/log.sx, lib/persist/project.sx, lib/persist/view.sx,
|
||||
; lib/persist/query.sx.
|
||||
|
||||
(define acl/stream (fn (resource) (str "acl/" resource)))
|
||||
|
||||
; write side — grant/revoke append events (the history is the source of truth)
|
||||
(define
|
||||
acl/grant
|
||||
(fn
|
||||
(b resource principal at)
|
||||
(persist/append b (acl/stream resource) "granted" at {:principal principal})))
|
||||
(define
|
||||
acl/revoke
|
||||
(fn
|
||||
(b resource principal at)
|
||||
(persist/append b (acl/stream resource) "revoked" at {:principal principal})))
|
||||
|
||||
; fold step: grant adds a principal (once), revoke removes it
|
||||
(define
|
||||
acl/step
|
||||
(fn
|
||||
(set e)
|
||||
(let
|
||||
((p (get (persist/event-data e) :principal)))
|
||||
(if
|
||||
(equal? (persist/event-type e) "granted")
|
||||
(if (contains? set p) set (append set p))
|
||||
(filter (fn (x) (not (equal? x p))) set)))))
|
||||
|
||||
; read side — current grant set + membership check (replays the log)
|
||||
(define
|
||||
acl/grants
|
||||
(fn
|
||||
(b resource)
|
||||
(persist/project-fold b (acl/stream resource) acl/step (list))))
|
||||
(define
|
||||
acl/can?
|
||||
(fn (b resource principal) (contains? (acl/grants b resource) principal)))
|
||||
|
||||
; materialized view — attach to a hub for O(1) checks that stay current on write
|
||||
(define
|
||||
acl/view
|
||||
(fn
|
||||
(resource)
|
||||
(persist/view
|
||||
(str "acl-current/" resource)
|
||||
(acl/stream resource)
|
||||
acl/step
|
||||
(list))))
|
||||
(define
|
||||
acl/can-fast?
|
||||
(fn
|
||||
(b resource principal)
|
||||
(contains? (persist/view-peek b (acl/view resource)) principal)))
|
||||
|
||||
; audit — grants/revokes for a resource in a time window (the new capability the
|
||||
; hand-rolled version could never answer)
|
||||
(define
|
||||
acl/audit-window
|
||||
(fn
|
||||
(b resource from to)
|
||||
(persist/read-window b (acl/stream resource) from to)))
|
||||
@@ -1,55 +0,0 @@
|
||||
; persist/global — a global commit ordering across streams. Per-stream seqs only
|
||||
; order within a stream; a unified timeline (e.g. feed's home feed, a global
|
||||
; audit trail) needs a single order across streams. `persist/gappend` appends to
|
||||
; the target stream and then records a pointer in a reserved $global index whose
|
||||
; own seq IS the global commit position. Reading the index in order and
|
||||
; resolving each pointer yields every event in commit order. This is opt-in:
|
||||
; streams that don't need global ordering use plain persist/append and never
|
||||
; touch $global. Determinism: the order is the $global append order, replayed
|
||||
; identically. Requires: lib/persist/log.sx, lib/persist/catalog.sx.
|
||||
|
||||
(define persist/global-stream "$global")
|
||||
|
||||
; append with a global commit position. Returns the stored stream event; the
|
||||
; event's global position is the seq of its pointer in $global.
|
||||
(define
|
||||
persist/gappend
|
||||
(fn
|
||||
(b stream type at data)
|
||||
(let
|
||||
((ev (persist/append b stream type at data)))
|
||||
(begin (persist/append b persist/global-stream "ref" at {:stream stream :seq (persist/event-seq ev)}) ev))))
|
||||
|
||||
; the global index: pointer events in commit order (each pointer's seq = gpos)
|
||||
(define persist/global-log (fn (b) (persist/read b persist/global-stream)))
|
||||
|
||||
; the current global commit position (count of globally-ordered appends)
|
||||
(define
|
||||
persist/global-pos
|
||||
(fn (b) (persist/last-seq b persist/global-stream)))
|
||||
|
||||
; resolve a pointer event to the actual stream event it references
|
||||
(define
|
||||
persist/resolve-ref
|
||||
(fn
|
||||
(b ptr)
|
||||
(let
|
||||
((d (persist/event-data ptr)))
|
||||
(first (persist/read-from b (get d :stream) (get d :seq))))))
|
||||
|
||||
; every globally-ordered event, in commit order
|
||||
(define
|
||||
persist/read-global
|
||||
(fn
|
||||
(b)
|
||||
(map (fn (ptr) (persist/resolve-ref b ptr)) (persist/global-log b))))
|
||||
|
||||
; pointer events at or after a global position (incremental global consumers)
|
||||
(define
|
||||
persist/global-from
|
||||
(fn (b gpos) (persist/read-from b persist/global-stream gpos)))
|
||||
|
||||
; fold over all events in global commit order
|
||||
(define
|
||||
persist/project-global
|
||||
(fn (b step seed) (reduce step seed (persist/read-global b))))
|
||||
@@ -1,28 +0,0 @@
|
||||
; persist/idempotency — exactly-once append under retries. A command retried
|
||||
; after a network blip must not append its event twice. The caller supplies an
|
||||
; idempotency key; the first append for that (stream, key) stores the event and
|
||||
; remembers the key in the kv facet; a repeat returns the SAME event without
|
||||
; appending. Because the marker lives in kv, idempotency holds across a restart
|
||||
; too. Keyed per stream. Requires: lib/persist/log.sx, lib/persist/kv.sx.
|
||||
|
||||
(define persist/idem-key (fn (stream key) (str "idem/" stream "/" key)))
|
||||
|
||||
; true if an append-once has already been recorded for (stream, key)
|
||||
(define
|
||||
persist/seen?
|
||||
(fn (b stream key) (persist/kv-has? b (persist/idem-key stream key))))
|
||||
|
||||
; append at most once per (stream, key). Returns the stored event either way —
|
||||
; freshly appended on first use, the remembered one on a repeat.
|
||||
(define
|
||||
persist/append-once
|
||||
(fn
|
||||
(b stream key type at data)
|
||||
(let
|
||||
((k (persist/idem-key stream key)))
|
||||
(if
|
||||
(persist/kv-has? b k)
|
||||
(persist/kv-get b k)
|
||||
(let
|
||||
((ev (persist/append b stream type at data)))
|
||||
(begin (persist/kv-put b k ev) ev))))))
|
||||
@@ -1,44 +0,0 @@
|
||||
; persist/kv — the kv facet: current-state values, no history. For things
|
||||
; whose history does NOT matter (stock counts, config, profiles, session
|
||||
; blobs) and where projections materialize their read models.
|
||||
; Requires: lib/persist/backend.sx.
|
||||
|
||||
(define persist/kv-get (fn (b key) (persist/backend-kv-get b key)))
|
||||
(define
|
||||
persist/kv-put
|
||||
(fn (b key val) (begin (persist/backend-kv-put b key val) val)))
|
||||
(define persist/kv-delete (fn (b key) (persist/backend-kv-delete b key)))
|
||||
(define persist/kv-has? (fn (b key) (persist/backend-kv-has? b key)))
|
||||
(define persist/kv-keys (fn (b) (persist/backend-kv-keys b)))
|
||||
|
||||
; get with a default when the key is absent
|
||||
(define
|
||||
persist/kv-get-or
|
||||
(fn
|
||||
(b key dflt)
|
||||
(if (persist/kv-has? b key) (persist/kv-get b key) dflt)))
|
||||
|
||||
; read-modify-write: apply f to the current value (or dflt if absent), store result
|
||||
(define
|
||||
persist/kv-update
|
||||
(fn
|
||||
(b key dflt f)
|
||||
(persist/kv-put b key (f (persist/kv-get-or b key dflt)))))
|
||||
|
||||
; compare-and-swap: set key to new ONLY if its current value equals expected.
|
||||
; Returns new on success, or a conflict value {:conflict true :expected :actual}
|
||||
; the caller can re-read and retry on. The kv analogue of log append-expect.
|
||||
(define
|
||||
persist/kv-cas
|
||||
(fn
|
||||
(b key expected new)
|
||||
(let
|
||||
((actual (persist/kv-get b key)))
|
||||
(if (equal? actual expected) (persist/kv-put b key new) {:actual actual :expected expected :conflict true}))))
|
||||
|
||||
; create-only: put a value only if the key is absent; conflict if it exists
|
||||
(define
|
||||
persist/kv-put-new
|
||||
(fn
|
||||
(b key val)
|
||||
(if (persist/kv-has? b key) {:actual (persist/kv-get b key) :conflict true :reason "exists"} (persist/kv-put b key val))))
|
||||
@@ -1,43 +0,0 @@
|
||||
; persist/log — the log facet: append-only event streams. seq is assigned from
|
||||
; a monotonic per-stream high-water mark (1-based) held by the backend, so it
|
||||
; keeps climbing even after the log prefix is compacted away. Reads return the
|
||||
; events currently stored, oldest-first.
|
||||
; Requires: lib/persist/event.sx, lib/persist/backend.sx.
|
||||
|
||||
; logical last seq assigned in a stream (0 if none) — survives compaction
|
||||
(define
|
||||
persist/last-seq
|
||||
(fn (b stream) (persist/backend-last-seq b stream)))
|
||||
|
||||
; number of events physically stored in a stream (shrinks on compaction)
|
||||
(define
|
||||
persist/count
|
||||
(fn (b stream) (len (persist/backend-read b stream))))
|
||||
|
||||
; append an event, auto-assigning the next seq. Returns the stored event.
|
||||
(define
|
||||
persist/append
|
||||
(fn
|
||||
(b stream type at data)
|
||||
(let
|
||||
((seq (+ 1 (persist/last-seq b stream))))
|
||||
(let
|
||||
((ev (persist/event stream seq type at data)))
|
||||
(begin (persist/backend-append b stream ev) ev)))))
|
||||
|
||||
; read all events currently stored in a stream, oldest-first
|
||||
(define persist/read (fn (b stream) (persist/backend-read b stream)))
|
||||
|
||||
; read events with seq >= from
|
||||
(define
|
||||
persist/read-from
|
||||
(fn
|
||||
(b stream from)
|
||||
(filter
|
||||
(fn (e) (>= (persist/event-seq e) from))
|
||||
(persist/read b stream))))
|
||||
|
||||
; drop events with seq <= n (compaction); the seq counter is untouched
|
||||
(define
|
||||
persist/truncate
|
||||
(fn (b stream n) (persist/backend-truncate b stream n)))
|
||||
@@ -1,30 +0,0 @@
|
||||
; persist/project — a projection folds a stream's events into a read model.
|
||||
; A projection state is {:value v :seq s} where s is the last seq folded in,
|
||||
; so a projection can resume incrementally from where it left off (replay only
|
||||
; the tail). step : (value event) -> value. Determinism: step must be pure —
|
||||
; time lives on the event (event-at), never a clock here.
|
||||
; Requires: lib/persist/event.sx, lib/persist/log.sx.
|
||||
|
||||
; fold the tail (events with seq > prior's seq) onto a prior projection state
|
||||
(define
|
||||
persist/project-resume
|
||||
(fn
|
||||
(b stream step prior)
|
||||
(let
|
||||
((tail (persist/read-from b stream (+ 1 (get prior :seq)))))
|
||||
(reduce (fn (acc e) {:value (step (get acc :value) e) :seq (persist/event-seq e)}) prior tail))))
|
||||
|
||||
; project the whole stream from seed
|
||||
(define
|
||||
persist/project
|
||||
(fn (b stream step seed) (persist/project-resume b stream step {:value seed :seq 0})))
|
||||
|
||||
(define persist/project-value (fn (p) (get p :value)))
|
||||
(define persist/project-seq (fn (p) (get p :seq)))
|
||||
|
||||
; convenience: project and return just the value
|
||||
(define
|
||||
persist/project-fold
|
||||
(fn
|
||||
(b stream step seed)
|
||||
(persist/project-value (persist/project b stream step seed))))
|
||||
@@ -1,54 +0,0 @@
|
||||
; persist/query — read-side helpers over a stream: slice by seq range, filter by
|
||||
; timestamp / type / predicate. Pure reads composed from persist/read, no
|
||||
; backend changes. The log is bad at ad-hoc relational queries (project into a
|
||||
; kv read model for those) but these cover the common log scans: an audit window
|
||||
; by time, a type filter, a since-cursor for incremental consumers.
|
||||
; Requires: lib/persist/log.sx.
|
||||
|
||||
; events with seq in [from, to] inclusive
|
||||
(define
|
||||
persist/read-between
|
||||
(fn
|
||||
(b stream from to)
|
||||
(filter
|
||||
(fn
|
||||
(e)
|
||||
(and (>= (persist/event-seq e) from) (<= (persist/event-seq e) to)))
|
||||
(persist/read b stream))))
|
||||
|
||||
; events at or after a timestamp (events carry :at; never a clock here)
|
||||
(define
|
||||
persist/read-since
|
||||
(fn
|
||||
(b stream at)
|
||||
(filter (fn (e) (>= (persist/event-at e) at)) (persist/read b stream))))
|
||||
|
||||
; events whose :at is in [from, to] inclusive — an audit window
|
||||
(define
|
||||
persist/read-window
|
||||
(fn
|
||||
(b stream from to)
|
||||
(filter
|
||||
(fn
|
||||
(e)
|
||||
(and (>= (persist/event-at e) from) (<= (persist/event-at e) to)))
|
||||
(persist/read b stream))))
|
||||
|
||||
; events matching a predicate (e -> truthy)
|
||||
(define
|
||||
persist/read-where
|
||||
(fn (b stream pred) (filter pred (persist/read b stream))))
|
||||
|
||||
; events of a given type
|
||||
(define
|
||||
persist/read-by-type
|
||||
(fn
|
||||
(b stream type)
|
||||
(filter
|
||||
(fn (e) (equal? (persist/event-type e) type))
|
||||
(persist/read b stream))))
|
||||
|
||||
; count events matching a predicate
|
||||
(define
|
||||
persist/count-where
|
||||
(fn (b stream pred) (len (persist/read-where b stream pred))))
|
||||
@@ -1,27 +0,0 @@
|
||||
{
|
||||
"suites": {
|
||||
"event": {"pass": 6, "fail": 0},
|
||||
"log": {"pass": 9, "fail": 0},
|
||||
"kv": {"pass": 13, "fail": 0},
|
||||
"project": {"pass": 9, "fail": 0},
|
||||
"subscribe": {"pass": 9, "fail": 0},
|
||||
"concurrency": {"pass": 8, "fail": 0},
|
||||
"snapshot": {"pass": 11, "fail": 0},
|
||||
"compaction": {"pass": 11, "fail": 0},
|
||||
"durable": {"pass": 15, "fail": 0},
|
||||
"blob": {"pass": 14, "fail": 0},
|
||||
"view": {"pass": 11, "fail": 0},
|
||||
"cas": {"pass": 11, "fail": 0},
|
||||
"catalog": {"pass": 10, "fail": 0},
|
||||
"query": {"pass": 9, "fail": 0},
|
||||
"batch": {"pass": 10, "fail": 0},
|
||||
"upcast": {"pass": 9, "fail": 0},
|
||||
"idempotency": {"pass": 9, "fail": 0},
|
||||
"global": {"pass": 11, "fail": 0},
|
||||
"example-acl": {"pass": 10, "fail": 0},
|
||||
"recovery": {"pass": 6, "fail": 0}
|
||||
},
|
||||
"total_pass": 201,
|
||||
"total_fail": 0,
|
||||
"total": 201
|
||||
}
|
||||
@@ -1,27 +0,0 @@
|
||||
# persist Conformance Scoreboard
|
||||
|
||||
_Generated by `lib/persist/conformance.sh`_
|
||||
|
||||
| Suite | Pass | Fail | Total |
|
||||
|-------|-----:|-----:|------:|
|
||||
| event | 6 | 0 | 6 |
|
||||
| log | 9 | 0 | 9 |
|
||||
| kv | 13 | 0 | 13 |
|
||||
| project | 9 | 0 | 9 |
|
||||
| subscribe | 9 | 0 | 9 |
|
||||
| concurrency | 8 | 0 | 8 |
|
||||
| snapshot | 11 | 0 | 11 |
|
||||
| compaction | 11 | 0 | 11 |
|
||||
| durable | 15 | 0 | 15 |
|
||||
| blob | 14 | 0 | 14 |
|
||||
| view | 11 | 0 | 11 |
|
||||
| cas | 11 | 0 | 11 |
|
||||
| catalog | 10 | 0 | 10 |
|
||||
| query | 9 | 0 | 9 |
|
||||
| batch | 10 | 0 | 10 |
|
||||
| upcast | 9 | 0 | 9 |
|
||||
| idempotency | 9 | 0 | 9 |
|
||||
| global | 11 | 0 | 11 |
|
||||
| example-acl | 10 | 0 | 10 |
|
||||
| recovery | 6 | 0 | 6 |
|
||||
| **Total** | **201** | **0** | **201** |
|
||||
@@ -1,40 +0,0 @@
|
||||
; persist/snapshot — checkpoint a projection so a read model rebuilds as
|
||||
; snapshot + tail instead of a full replay. A snapshot is just a projection
|
||||
; state {:value :seq} stored in the kv facet under a namespaced key. The
|
||||
; headline property (tested both ways): snapshot + tail == full replay. Replay
|
||||
; is pure — it depends only on the stored snapshot and the log tail, never a
|
||||
; clock. Requires: lib/persist/project.sx, lib/persist/kv.sx.
|
||||
|
||||
(define persist/snapshot-key (fn (name) (str "snapshot/" name)))
|
||||
|
||||
; load the stored snapshot for name, or a fresh {:value seed :seq 0} if none
|
||||
(define
|
||||
persist/snapshot-load
|
||||
(fn
|
||||
(b name seed)
|
||||
(persist/kv-get-or b (persist/snapshot-key name) {:value seed :seq 0})))
|
||||
|
||||
; store a projection state as the snapshot for name; returns the state
|
||||
(define
|
||||
persist/snapshot-save
|
||||
(fn (b name state) (persist/kv-put b (persist/snapshot-key name) state)))
|
||||
|
||||
(define
|
||||
persist/snapshot-exists?
|
||||
(fn (b name) (persist/kv-has? b (persist/snapshot-key name))))
|
||||
|
||||
; replay = snapshot + tail: load the snapshot then fold events after it
|
||||
(define
|
||||
persist/replay
|
||||
(fn
|
||||
(b stream name step seed)
|
||||
(persist/project-resume b stream step (persist/snapshot-load b name seed))))
|
||||
|
||||
; replay then persist the new snapshot; returns the updated state
|
||||
(define
|
||||
persist/checkpoint
|
||||
(fn
|
||||
(b stream name step seed)
|
||||
(let
|
||||
((state (persist/replay b stream name step seed)))
|
||||
(begin (persist/snapshot-save b name state) state))))
|
||||
@@ -1,21 +0,0 @@
|
||||
; persist/subscribe — a subscription hub wraps a backend with per-stream
|
||||
; callbacks fired after each append. The canonical use: a callback re-runs a
|
||||
; projection (or bumps a kv counter) so read models update incrementally on
|
||||
; write instead of being recomputed on read.
|
||||
; callback signature: (backend stream event) -> ignored
|
||||
; Publish goes through the hub; direct persist/append on the backend bypasses
|
||||
; subscribers by design (bulk loads, replay).
|
||||
; Requires: lib/persist/log.sx.
|
||||
|
||||
(define persist/hub (fn (b) (let ((subs {})) {:subscriber-count (fn (stream) (let ((cs (get subs stream))) (if cs (len cs) 0))) :publish (fn (stream type at data) (let ((ev (persist/append b stream type at data))) (begin (for-each (fn (cb) (cb b stream ev)) (let ((cs (get subs stream))) (if cs cs (list)))) ev))) :subscribe (fn (stream cb) (let ((cur (get subs stream))) (set! subs (assoc subs stream (append (if cur cur (list)) cb))))) :backend b})))
|
||||
|
||||
(define persist/hub-backend (fn (h) (get h :backend)))
|
||||
(define
|
||||
persist/subscribe
|
||||
(fn (h stream cb) ((get h :subscribe) stream cb)))
|
||||
(define
|
||||
persist/publish
|
||||
(fn (h stream type at data) ((get h :publish) stream type at data)))
|
||||
(define
|
||||
persist/subscriber-count
|
||||
(fn (h stream) ((get h :subscriber-count) stream)))
|
||||
@@ -1,122 +0,0 @@
|
||||
; Extension — atomic batch append: contiguous seqs, transactional all-or-nothing.
|
||||
|
||||
(persist-test
|
||||
"batch assigns contiguous seqs"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((evs (persist/append-batch b "s" (list (list "a" 0 {}) (list "b" 0 {}) (list "c" 0 {})))))
|
||||
(list
|
||||
(persist/event-seq (first evs))
|
||||
(persist/event-seq (nth evs 2)))))
|
||||
(list 1 3))
|
||||
(persist-test
|
||||
"batch returns events in order"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((evs (persist/append-batch b "s" (list (list "a" 0 {}) (list "b" 0 {})))))
|
||||
(list
|
||||
(persist/event-type (first evs))
|
||||
(persist/event-type (nth evs 1)))))
|
||||
(list "a" "b"))
|
||||
(persist-test
|
||||
"batch grows the stream by its size"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append-batch
|
||||
b
|
||||
"s"
|
||||
(list
|
||||
(list "a" 0 {})
|
||||
(list "b" 0 {})
|
||||
(list "c" 0 {})))
|
||||
(persist/count b "s")))
|
||||
3)
|
||||
(persist-test
|
||||
"batch continues an existing stream"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(let
|
||||
((evs (persist/append-batch b "s" (list (list "a" 0 {}) (list "b" 0 {})))))
|
||||
(persist/event-seq (first evs)))))
|
||||
2)
|
||||
(persist-test
|
||||
"empty batch is a no-op"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin (persist/append-batch b "s" (list)) (persist/count b "s")))
|
||||
0)
|
||||
(persist-test
|
||||
"batch-expect with correct seq commits all"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append-batch-expect
|
||||
b
|
||||
"s"
|
||||
0
|
||||
(list
|
||||
(list "a" 0 {})
|
||||
(list "b" 0 {})))
|
||||
(persist/count b "s")))
|
||||
2)
|
||||
(persist-test
|
||||
"batch-expect with stale seq writes nothing"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append-batch-expect
|
||||
b
|
||||
"s"
|
||||
0
|
||||
(list
|
||||
(list "a" 0 {})
|
||||
(list "b" 0 {})))
|
||||
(persist/count b "s")))
|
||||
1)
|
||||
(persist-test
|
||||
"batch-expect stale returns a conflict"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/conflict?
|
||||
(persist/append-batch-expect
|
||||
b
|
||||
"s"
|
||||
0
|
||||
(list (list "a" 0 {}))))))
|
||||
true)
|
||||
(persist-test
|
||||
"batch data is preserved"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append-batch
|
||||
b
|
||||
"order"
|
||||
(list
|
||||
(list "placed" 0 {:id 1})
|
||||
(list "line" 0 {:sku "x"})))
|
||||
(get
|
||||
(persist/event-data (nth (persist/read b "order") 1))
|
||||
:sku)))
|
||||
"x")
|
||||
(persist-test
|
||||
"batch works on the durable backend"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/append-batch
|
||||
db
|
||||
"s"
|
||||
(list
|
||||
(list "a" 0 {})
|
||||
(list "b" 0 {})))
|
||||
(persist/last-seq db "s")))
|
||||
2)
|
||||
@@ -1,112 +0,0 @@
|
||||
; Phase 4 — blob backend: store the ref, never the bytes. Bytes live in a
|
||||
; separate content-addressed store (mock here).
|
||||
|
||||
(persist-test
|
||||
"blob-ref carries cid"
|
||||
(persist/blob-cid (persist/blob-ref "c1" 10 "image/png"))
|
||||
"c1")
|
||||
(persist-test
|
||||
"blob-ref carries size"
|
||||
(persist/blob-size (persist/blob-ref "c1" 10 "image/png"))
|
||||
10)
|
||||
(persist-test
|
||||
"blob-ref carries mime"
|
||||
(persist/blob-mime (persist/blob-ref "c1" 10 "image/png"))
|
||||
"image/png")
|
||||
(persist-test
|
||||
"blob-ref? true for a ref"
|
||||
(persist/blob-ref? (persist/blob-ref "c1" 1 "x"))
|
||||
true)
|
||||
(persist-test
|
||||
"blob-ref? false for a plain dict"
|
||||
(persist/blob-ref? {:n 1})
|
||||
false)
|
||||
|
||||
(persist-test
|
||||
"store returns a ref, not the bytes"
|
||||
(let
|
||||
((blob (persist/mock-blob (persist/mem-backend))))
|
||||
(persist/blob-ref? (persist/blob-store blob "PNGDATA" "image/png")))
|
||||
true)
|
||||
(persist-test
|
||||
"store records the byte length as size"
|
||||
(let
|
||||
((blob (persist/mock-blob (persist/mem-backend))))
|
||||
(persist/blob-size (persist/blob-store blob "12345" "text/plain")))
|
||||
5)
|
||||
(persist-test
|
||||
"fetch round-trips the bytes via the ref"
|
||||
(let
|
||||
((blob (persist/mock-blob (persist/mem-backend))))
|
||||
(let
|
||||
((ref (persist/blob-store blob "PAYLOAD" "text/plain")))
|
||||
(persist/blob-fetch blob ref)))
|
||||
"PAYLOAD")
|
||||
(persist-test
|
||||
"exists? true after store"
|
||||
(let
|
||||
((blob (persist/mock-blob (persist/mem-backend))))
|
||||
(let
|
||||
((ref (persist/blob-store blob "X" "text/plain")))
|
||||
(persist/blob-exists? blob ref)))
|
||||
true)
|
||||
(persist-test
|
||||
"content addressing: same bytes dedupe to same cid"
|
||||
(let
|
||||
((blob (persist/mock-blob (persist/mem-backend))))
|
||||
(equal?
|
||||
(persist/blob-cid (persist/blob-store blob "SAME" "text/plain"))
|
||||
(persist/blob-cid (persist/blob-store blob "SAME" "text/plain"))))
|
||||
true)
|
||||
(persist-test
|
||||
"different bytes get different cids"
|
||||
(let
|
||||
((blob (persist/mock-blob (persist/mem-backend))))
|
||||
(equal?
|
||||
(persist/blob-cid (persist/blob-store blob "A" "text/plain"))
|
||||
(persist/blob-cid (persist/blob-store blob "B" "text/plain"))))
|
||||
false)
|
||||
|
||||
; ---------- the invariant: persist holds the ref, never the bytes ----------
|
||||
(persist-test
|
||||
"a blob ref stored in kv is a ref"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend)))
|
||||
(blob (persist/mock-blob (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/kv-put
|
||||
db
|
||||
"avatar"
|
||||
(persist/blob-store blob "BIGIMAGE" "image/png"))
|
||||
(persist/blob-ref? (persist/kv-get db "avatar"))))
|
||||
true)
|
||||
(persist-test
|
||||
"the kv value does not contain the bytes"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend)))
|
||||
(blob (persist/mock-blob (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/kv-put
|
||||
db
|
||||
"avatar"
|
||||
(persist/blob-store blob "BIGIMAGE" "image/png"))
|
||||
(has-key? (persist/kv-get db "avatar") :bytes)))
|
||||
false)
|
||||
(persist-test
|
||||
"a blob ref stored in the log is a ref, bytes fetched separately"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend)))
|
||||
(store (persist/mem-backend)))
|
||||
(let
|
||||
((blob (persist/mock-blob store)))
|
||||
(begin
|
||||
(persist/append
|
||||
db
|
||||
"uploads"
|
||||
"added"
|
||||
0
|
||||
(persist/blob-store blob "FILEBYTES" "application/pdf"))
|
||||
(let
|
||||
((ref (persist/event-data (first (persist/read db "uploads")))))
|
||||
(list (persist/blob-ref? ref) (persist/blob-fetch blob ref))))))
|
||||
(list true "FILEBYTES"))
|
||||
@@ -1,96 +0,0 @@
|
||||
; Extension — kv compare-and-swap: atomic current-state updates. Uses
|
||||
; persist/conflict? from concurrency.sx.
|
||||
|
||||
(persist-test
|
||||
"cas on absent key with nil expected succeeds"
|
||||
(let ((b (persist/open))) (persist/kv-cas b "k" nil 1))
|
||||
1)
|
||||
(persist-test
|
||||
"cas with matching expected succeeds"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "k" 5)
|
||||
(persist/kv-cas b "k" 5 6)
|
||||
(persist/kv-get b "k")))
|
||||
6)
|
||||
(persist-test
|
||||
"cas with stale expected returns a conflict"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "k" 5)
|
||||
(persist/conflict? (persist/kv-cas b "k" 4 6))))
|
||||
true)
|
||||
(persist-test
|
||||
"a conflicting cas does not write"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "k" 5)
|
||||
(persist/kv-cas b "k" 4 6)
|
||||
(persist/kv-get b "k")))
|
||||
5)
|
||||
(persist-test
|
||||
"cas conflict carries expected and actual"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "k" 5)
|
||||
(let
|
||||
((r (persist/kv-cas b "k" 4 6)))
|
||||
(list (persist/conflict-expected r) (persist/conflict-actual r)))))
|
||||
(list 4 5))
|
||||
(persist-test
|
||||
"two cas racers: first wins, second conflicts"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "stock" 10)
|
||||
(persist/kv-cas b "stock" 10 9)
|
||||
(persist/conflict? (persist/kv-cas b "stock" 10 9))))
|
||||
true)
|
||||
(persist-test
|
||||
"retry after cas conflict succeeds"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "stock" 10)
|
||||
(persist/kv-cas b "stock" 10 9)
|
||||
(let
|
||||
((r (persist/kv-cas b "stock" 10 9)))
|
||||
(if
|
||||
(persist/conflict? r)
|
||||
(persist/kv-cas b "stock" (persist/conflict-actual r) 8)
|
||||
r))))
|
||||
8)
|
||||
(persist-test
|
||||
"put-new on absent key succeeds"
|
||||
(let ((b (persist/open))) (persist/kv-put-new b "k" 1))
|
||||
1)
|
||||
(persist-test
|
||||
"put-new on existing key conflicts"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "k" 1)
|
||||
(persist/conflict? (persist/kv-put-new b "k" 2))))
|
||||
true)
|
||||
(persist-test
|
||||
"put-new does not overwrite"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "k" 1)
|
||||
(persist/kv-put-new b "k" 2)
|
||||
(persist/kv-get b "k")))
|
||||
1)
|
||||
(persist-test
|
||||
"cas works on the durable backend"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/kv-put db "k" 1)
|
||||
(persist/kv-cas db "k" 1 2)
|
||||
(persist/kv-get db "k")))
|
||||
2)
|
||||
@@ -1,86 +0,0 @@
|
||||
; Extension — stream catalog: enumerate streams, count, existence, totals.
|
||||
|
||||
(persist-test
|
||||
"empty backend has no streams"
|
||||
(persist/stream-count (persist/open))
|
||||
0)
|
||||
(persist-test
|
||||
"stream-exists? false when absent"
|
||||
(persist/stream-exists? (persist/open) "orders")
|
||||
false)
|
||||
(persist-test
|
||||
"append registers a stream"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "orders" "x" 0 {})
|
||||
(persist/stream-exists? b "orders")))
|
||||
true)
|
||||
(persist-test
|
||||
"stream-count counts distinct streams"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "a" "x" 0 {})
|
||||
(persist/append b "b" "x" 0 {})
|
||||
(persist/append b "a" "x" 0 {})
|
||||
(persist/stream-count b)))
|
||||
2)
|
||||
(persist-test
|
||||
"compacted-away stream still lists"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "a" "x" 0 {})
|
||||
(persist/checkpoint b "a" "snap" (fn (acc e) acc) 0)
|
||||
(persist/truncate b "a" 1)
|
||||
(list (persist/count b "a") (persist/stream-exists? b "a"))))
|
||||
(list 0 true))
|
||||
(persist-test
|
||||
"kv-only backend lists no streams"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin (persist/kv-put b "k" 1) (persist/stream-count b)))
|
||||
0)
|
||||
(persist-test
|
||||
"total-events sums high-water marks"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "a" "x" 0 {})
|
||||
(persist/append b "a" "x" 0 {})
|
||||
(persist/append b "b" "x" 0 {})
|
||||
(persist/total-events b)))
|
||||
3)
|
||||
(persist-test
|
||||
"total-events counts compacted events too"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "a" "x" 0 {})
|
||||
(persist/append b "a" "x" 0 {})
|
||||
(persist/checkpoint b "a" "snap" (fn (acc e) acc) 0)
|
||||
(persist/truncate b "a" 2)
|
||||
(persist/total-events b)))
|
||||
2)
|
||||
(persist-test
|
||||
"catalog works on the durable backend"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/append db "a" "x" 0 {})
|
||||
(persist/append db "b" "x" 0 {})
|
||||
(persist/stream-count db)))
|
||||
2)
|
||||
(persist-test
|
||||
"catalog survives restart"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/append db "a" "x" 0 {})
|
||||
(persist/append db "b" "x" 0 {})))
|
||||
(persist/stream-count (persist/mock-durable disk))))
|
||||
2)
|
||||
@@ -1,124 +0,0 @@
|
||||
; Phase 3 — compaction: drop the snapshotted prefix; replay determinism holds.
|
||||
|
||||
(define comp-count (fn (acc e) (+ acc 1)))
|
||||
|
||||
(persist-test
|
||||
"uncompacted counts events since snapshot"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/uncompacted b "s" "snap" 0)))
|
||||
2)
|
||||
(persist-test
|
||||
"should-compact? false below threshold"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/should-compact? b "s" "snap" 3 0)))
|
||||
false)
|
||||
(persist-test
|
||||
"should-compact? true at threshold"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/should-compact? b "s" "snap" 3 0)))
|
||||
true)
|
||||
(persist-test
|
||||
"compact truncates the snapshotted prefix"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/compact b "s" "snap" comp-count 0)
|
||||
(persist/count b "s")))
|
||||
0)
|
||||
(persist-test
|
||||
"compact preserves logical last-seq"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/compact b "s" "snap" comp-count 0)
|
||||
(persist/last-seq b "s")))
|
||||
2)
|
||||
(persist-test
|
||||
"append after compaction continues the seq"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/compact b "s" "snap" comp-count 0)
|
||||
(persist/event-seq (persist/append b "s" "x" 0 {}))))
|
||||
3)
|
||||
(persist-test
|
||||
"replay after compaction == full count before compaction"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/compact b "s" "snap" comp-count 0)
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/project-value
|
||||
(persist/replay b "s" "snap" comp-count 0))))
|
||||
5)
|
||||
(persist-test
|
||||
"determinism: post-compaction replay value equals uncompacted full replay"
|
||||
(let
|
||||
((b (persist/open)) (c (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append c "s" "x" 0 {})
|
||||
(persist/append c "s" "x" 0 {})
|
||||
(persist/append c "s" "x" 0 {})
|
||||
(persist/compact b "s" "snap" comp-count 0)
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append c "s" "x" 0 {})
|
||||
(equal?
|
||||
(persist/project-value
|
||||
(persist/replay b "s" "snap" comp-count 0))
|
||||
(persist/project-fold c "s" comp-count 0))))
|
||||
true)
|
||||
(persist-test
|
||||
"maybe-compact below threshold does not truncate"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/maybe-compact b "s" "snap" comp-count 0 5)
|
||||
(persist/count b "s")))
|
||||
1)
|
||||
(persist-test
|
||||
"maybe-compact at threshold truncates"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/maybe-compact b "s" "snap" comp-count 0 2)
|
||||
(persist/count b "s")))
|
||||
0)
|
||||
(persist-test
|
||||
"compact is idempotent on an empty tail"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/compact b "s" "snap" comp-count 0)
|
||||
(persist/project-value
|
||||
(persist/compact b "s" "snap" comp-count 0))))
|
||||
1)
|
||||
@@ -1,96 +0,0 @@
|
||||
; Phase 2 — optimistic concurrency: conflict is a real result, not a crash.
|
||||
|
||||
(persist-test
|
||||
"append-expect 0 on empty stream succeeds"
|
||||
(persist/event-seq
|
||||
(persist/append-expect
|
||||
(persist/open)
|
||||
"s"
|
||||
0
|
||||
"x"
|
||||
0
|
||||
{}))
|
||||
1)
|
||||
(persist-test
|
||||
"append-expect with correct seq succeeds"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/event-seq
|
||||
(persist/append-expect b "s" 1 "x" 0 {}))))
|
||||
2)
|
||||
(persist-test
|
||||
"append-expect with stale seq returns a conflict"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/conflict?
|
||||
(persist/append-expect b "s" 1 "x" 0 {}))))
|
||||
true)
|
||||
(persist-test
|
||||
"a successful append is not a conflict"
|
||||
(persist/conflict?
|
||||
(persist/append-expect
|
||||
(persist/open)
|
||||
"s"
|
||||
0
|
||||
"x"
|
||||
0
|
||||
{}))
|
||||
false)
|
||||
(persist-test
|
||||
"conflict carries expected and actual"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(let
|
||||
((r (persist/append-expect b "s" 0 "x" 0 {})))
|
||||
(list (persist/conflict-expected r) (persist/conflict-actual r)))))
|
||||
(list 0 2))
|
||||
(persist-test
|
||||
"a conflicting append does not write"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append-expect b "s" 0 "x" 0 {})
|
||||
(persist/count b "s")))
|
||||
1)
|
||||
(persist-test
|
||||
"two writers: first wins, second conflicts"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((seen (persist/last-seq b "s")))
|
||||
(begin
|
||||
(persist/append-expect b "s" seen "x" 0 {:who "A"})
|
||||
(persist/conflict?
|
||||
(persist/append-expect b "s" seen "x" 0 {:who "B"})))))
|
||||
true)
|
||||
(persist-test
|
||||
"retry after conflict succeeds"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((seen (persist/last-seq b "s")))
|
||||
(begin
|
||||
(persist/append-expect b "s" seen "x" 0 {:who "A"})
|
||||
(let
|
||||
((r (persist/append-expect b "s" seen "x" 0 {:who "B"})))
|
||||
(if
|
||||
(persist/conflict? r)
|
||||
(persist/event-seq
|
||||
(persist/append-expect
|
||||
b
|
||||
"s"
|
||||
(persist/conflict-actual r)
|
||||
"x"
|
||||
0
|
||||
{:who "B"}))
|
||||
(persist/event-seq r))))))
|
||||
2)
|
||||
@@ -1,163 +0,0 @@
|
||||
; Phase 4 — durable backend over the IO-suspension boundary, tested with a mock
|
||||
; transport (the mock-IO harness for the durable protocol). The whole facet
|
||||
; stack must run unchanged on mock-durable, and a "crash/restart" (drop the
|
||||
; backend, keep the disk) must recover state by replay.
|
||||
|
||||
(define dur-count (fn (acc e) (+ acc 1)))
|
||||
|
||||
; ---------- request encoders ----------
|
||||
(persist-test
|
||||
"req-append encodes op + args"
|
||||
(persist/req-append "s" {:k 1})
|
||||
{:op "persist/append" :args (list "s" {:k 1})})
|
||||
(persist-test
|
||||
"req-kv-put encodes op + args"
|
||||
(persist/req-kv-put "k" 7)
|
||||
{:op "persist/kv-put" :args (list "k" 7)})
|
||||
|
||||
; ---------- serve round-trips against a disk ----------
|
||||
(persist-test
|
||||
"serve append then serve read"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(persist/serve
|
||||
disk
|
||||
(persist/req-append
|
||||
"s"
|
||||
(persist/event "s" 1 "x" 0 {:n 1})))
|
||||
(get
|
||||
(persist/event-data
|
||||
(first (persist/serve disk (persist/req-read "s"))))
|
||||
:n)))
|
||||
1)
|
||||
(persist-test
|
||||
"serve kv-put then kv-get"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(persist/serve disk (persist/req-kv-put "k" 42))
|
||||
(persist/serve disk (persist/req-kv-get "k"))))
|
||||
42)
|
||||
(persist-test
|
||||
"serve unknown op is a clear error"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(guard (e (true "errored")) (persist/serve disk {:op "persist/bogus" :args (list)})))
|
||||
"errored")
|
||||
|
||||
; ---------- full facet stack on mock-durable ----------
|
||||
(persist-test
|
||||
"log facet works on mock-durable"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/count db "s")))
|
||||
2)
|
||||
(persist-test
|
||||
"seq assignment works on mock-durable"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/event-seq (persist/append db "s" "x" 0 {}))))
|
||||
2)
|
||||
(persist-test
|
||||
"kv facet works on mock-durable"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin (persist/kv-put db "k" 5) (persist/kv-get db "k")))
|
||||
5)
|
||||
(persist-test
|
||||
"projection works on mock-durable"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/project-fold db "s" dur-count 0)))
|
||||
3)
|
||||
(persist-test
|
||||
"snapshot + replay work on mock-durable"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/checkpoint db "s" "snap" dur-count 0)
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/project-value
|
||||
(persist/replay db "s" "snap" dur-count 0))))
|
||||
3)
|
||||
(persist-test
|
||||
"compaction works on mock-durable"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/compact db "s" "snap" dur-count 0)
|
||||
(list (persist/count db "s") (persist/last-seq db "s"))))
|
||||
(list 0 2))
|
||||
|
||||
; ---------- crash / restart replay ----------
|
||||
(persist-test
|
||||
"restart recovers log state from the disk"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/append db "s" "x" 0 {})))
|
||||
(let
|
||||
((db2 (persist/mock-durable disk)))
|
||||
(persist/project-fold db2 "s" dur-count 0))))
|
||||
2)
|
||||
(persist-test
|
||||
"restart continues the seq counter"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/append db "s" "x" 0 {})))
|
||||
(let
|
||||
((db2 (persist/mock-durable disk)))
|
||||
(persist/event-seq (persist/append db2 "s" "x" 0 {})))))
|
||||
3)
|
||||
(persist-test
|
||||
"restart recovers a kv value"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(persist/kv-put db "cfg" "on"))
|
||||
(let ((db2 (persist/mock-durable disk))) (persist/kv-get db2 "cfg"))))
|
||||
"on")
|
||||
(persist-test
|
||||
"restart from snapshot equals full replay"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/append db "s" "x" 0 {})
|
||||
(persist/checkpoint db "s" "snap" dur-count 0)
|
||||
(persist/append db "s" "x" 0 {})))
|
||||
(let
|
||||
((db2 (persist/mock-durable disk)))
|
||||
(equal?
|
||||
(persist/project-value
|
||||
(persist/replay db2 "s" "snap" dur-count 0))
|
||||
(persist/project-fold db2 "s" dur-count 0)))))
|
||||
true)
|
||||
@@ -1,30 +0,0 @@
|
||||
; Phase 1 — event record accessors. Uses the persist-test harness
|
||||
; (persist-test name got expected) provided by conformance.sh.
|
||||
|
||||
(persist-test
|
||||
"event-stream"
|
||||
(persist/event-stream
|
||||
(persist/event "s" 1 "t" 0 {}))
|
||||
"s")
|
||||
(persist-test
|
||||
"event-seq"
|
||||
(persist/event-seq (persist/event "s" 3 "t" 0 {}))
|
||||
3)
|
||||
(persist-test
|
||||
"event-type"
|
||||
(persist/event-type
|
||||
(persist/event "s" 1 "create" 0 {}))
|
||||
"create")
|
||||
(persist-test
|
||||
"event-at"
|
||||
(persist/event-at (persist/event "s" 1 "t" 42 {}))
|
||||
42)
|
||||
(persist-test
|
||||
"event-data"
|
||||
(persist/event-data
|
||||
(persist/event "s" 1 "t" 0 {:x 9}))
|
||||
{:x 9})
|
||||
(persist-test
|
||||
"event is a dict with all fields"
|
||||
(len (keys (persist/event "s" 1 "t" 0 {})))
|
||||
5)
|
||||
@@ -1,104 +0,0 @@
|
||||
; Reference migration — acl grants on persist. Proves the AFTER behaviour,
|
||||
; including the capabilities the hand-rolled BEFORE version could not provide
|
||||
; (durability across restart + an audit trail).
|
||||
|
||||
(persist-test
|
||||
"grant then can?"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(acl/grant b "doc-1" "alice" 0)
|
||||
(acl/can? b "doc-1" "alice")))
|
||||
true)
|
||||
(persist-test
|
||||
"no grant means no access"
|
||||
(acl/can? (persist/open) "doc-1" "alice")
|
||||
false)
|
||||
(persist-test
|
||||
"revoke removes access"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(acl/grant b "doc-1" "alice" 0)
|
||||
(acl/revoke b "doc-1" "alice" 1)
|
||||
(acl/can? b "doc-1" "alice")))
|
||||
false)
|
||||
(persist-test
|
||||
"multiple principals tracked independently"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(acl/grant b "doc-1" "alice" 0)
|
||||
(acl/grant b "doc-1" "bob" 1)
|
||||
(acl/revoke b "doc-1" "alice" 2)
|
||||
(list (acl/can? b "doc-1" "alice") (acl/can? b "doc-1" "bob"))))
|
||||
(list false true))
|
||||
(persist-test
|
||||
"granting twice is idempotent in the set"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(acl/grant b "doc-1" "alice" 0)
|
||||
(acl/grant b "doc-1" "alice" 1)
|
||||
(len (acl/grants b "doc-1"))))
|
||||
1)
|
||||
(persist-test
|
||||
"grants on different resources are isolated"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(acl/grant b "doc-1" "alice" 0)
|
||||
(acl/grant b "doc-2" "bob" 0)
|
||||
(list (acl/can? b "doc-1" "bob") (acl/can? b "doc-2" "bob"))))
|
||||
(list false true))
|
||||
(persist-test
|
||||
"audit window answers when-was-it-granted (new capability)"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(acl/grant b "doc-1" "alice" 100)
|
||||
(acl/revoke b "doc-1" "alice" 200)
|
||||
(acl/grant b "doc-1" "bob" 300)
|
||||
(len (acl/audit-window b "doc-1" 150 300))))
|
||||
2)
|
||||
(persist-test
|
||||
"materialized view stays current on publish"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/view-attach (persist/hub b) (acl/view "doc-1"))))
|
||||
(begin
|
||||
(persist/publish
|
||||
h
|
||||
(acl/stream "doc-1")
|
||||
"granted"
|
||||
0
|
||||
{:principal "alice"})
|
||||
(acl/can-fast? b "doc-1" "alice"))))
|
||||
true)
|
||||
(persist-test
|
||||
"grants survive restart on the durable backend (the headline win)"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(acl/grant db "doc-1" "alice" 0)
|
||||
(acl/grant db "doc-1" "bob" 1)))
|
||||
(let
|
||||
((db2 (persist/mock-durable disk)))
|
||||
(list (acl/can? db2 "doc-1" "alice") (acl/can? db2 "doc-1" "bob")))))
|
||||
(list true true))
|
||||
(persist-test
|
||||
"revoke before restart is still revoked after"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(acl/grant db "doc-1" "alice" 0)
|
||||
(acl/revoke db "doc-1" "alice" 1)))
|
||||
(acl/can? (persist/mock-durable disk) "doc-1" "alice")))
|
||||
false)
|
||||
@@ -1,123 +0,0 @@
|
||||
; Extension — global commit ordering across streams.
|
||||
|
||||
(persist-test
|
||||
"gappend returns the stream event with its local seq"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(persist/event-seq
|
||||
(persist/gappend b "orders" "placed" 0 {})))
|
||||
1)
|
||||
(persist-test
|
||||
"global-pos advances per gappend regardless of stream"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/gappend b "orders" "placed" 0 {})
|
||||
(persist/gappend b "users" "joined" 0 {})
|
||||
(persist/gappend b "orders" "placed" 0 {})
|
||||
(persist/global-pos b)))
|
||||
3)
|
||||
(persist-test
|
||||
"read-global returns events in commit order across streams"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/gappend b "orders" "placed" 0 {:n 1})
|
||||
(persist/gappend b "users" "joined" 0 {:n 2})
|
||||
(persist/gappend b "orders" "placed" 0 {:n 3})
|
||||
(let
|
||||
((g (persist/read-global b)))
|
||||
(list
|
||||
(get (persist/event-data (nth g 0)) :n)
|
||||
(get (persist/event-data (nth g 1)) :n)
|
||||
(get (persist/event-data (nth g 2)) :n)))))
|
||||
(list 1 2 3))
|
||||
(persist-test
|
||||
"read-global resolves to the right streams"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/gappend b "orders" "placed" 0 {})
|
||||
(persist/gappend b "users" "joined" 0 {})
|
||||
(let
|
||||
((g (persist/read-global b)))
|
||||
(list
|
||||
(persist/event-stream (nth g 0))
|
||||
(persist/event-stream (nth g 1))))))
|
||||
(list "orders" "users"))
|
||||
(persist-test
|
||||
"project-global folds across all streams in order"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/gappend b "a" "x" 0 {:v 10})
|
||||
(persist/gappend b "b" "x" 0 {:v 20})
|
||||
(persist/gappend b "a" "x" 0 {:v 30})
|
||||
(persist/project-global
|
||||
b
|
||||
(fn (acc e) (+ acc (get (persist/event-data e) :v)))
|
||||
0)))
|
||||
60)
|
||||
(persist-test
|
||||
"global index is hidden from the public catalog"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/gappend b "orders" "placed" 0 {})
|
||||
(persist/gappend b "users" "joined" 0 {})
|
||||
(list (persist/stream-count b) (persist/stream-exists? b "$global"))))
|
||||
(list 2 false))
|
||||
(persist-test
|
||||
"streams-all reveals the reserved index"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/gappend b "orders" "placed" 0 {})
|
||||
(contains? (persist/streams-all b) "$global")))
|
||||
true)
|
||||
(persist-test
|
||||
"global-from gives pointers at or after a position"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/gappend b "a" "x" 0 {})
|
||||
(persist/gappend b "a" "x" 0 {})
|
||||
(persist/gappend b "a" "x" 0 {})
|
||||
(len (persist/global-from b 2))))
|
||||
2)
|
||||
(persist-test
|
||||
"plain append does not touch the global index"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "orders" "placed" 0 {})
|
||||
(persist/gappend b "orders" "placed" 0 {})
|
||||
(persist/global-pos b)))
|
||||
1)
|
||||
(persist-test
|
||||
"global ordering works on the durable backend"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/gappend db "a" "x" 0 {:v 1})
|
||||
(persist/gappend db "b" "x" 0 {:v 2})
|
||||
(persist/project-global
|
||||
db
|
||||
(fn (acc e) (+ acc (get (persist/event-data e) :v)))
|
||||
0)))
|
||||
3)
|
||||
(persist-test
|
||||
"global order survives restart (determinism)"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/gappend db "a" "x" 0 {:v 1})
|
||||
(persist/gappend db "b" "x" 0 {:v 2})))
|
||||
(persist/project-global
|
||||
(persist/mock-durable disk)
|
||||
(fn (acc e) (+ acc (get (persist/event-data e) :v)))
|
||||
0)))
|
||||
3)
|
||||
@@ -1,92 +0,0 @@
|
||||
; Extension — exactly-once append under retries.
|
||||
|
||||
(persist-test
|
||||
"seen? false before first append"
|
||||
(persist/seen? (persist/open) "orders" "cmd-1")
|
||||
false)
|
||||
(persist-test
|
||||
"append-once appends on first use"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append-once b "orders" "cmd-1" "placed" 0 {})
|
||||
(persist/count b "orders")))
|
||||
1)
|
||||
(persist-test
|
||||
"seen? true after first append"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append-once b "orders" "cmd-1" "placed" 0 {})
|
||||
(persist/seen? b "orders" "cmd-1")))
|
||||
true)
|
||||
(persist-test
|
||||
"repeat with same key does not append again"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append-once b "orders" "cmd-1" "placed" 0 {})
|
||||
(persist/append-once b "orders" "cmd-1" "placed" 0 {})
|
||||
(persist/append-once b "orders" "cmd-1" "placed" 0 {})
|
||||
(persist/count b "orders")))
|
||||
1)
|
||||
(persist-test
|
||||
"repeat returns the same event (same seq)"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((e1 (persist/append-once b "orders" "cmd-1" "placed" 0 {})))
|
||||
(persist/event-seq
|
||||
(persist/append-once b "orders" "cmd-1" "placed" 0 {}))))
|
||||
1)
|
||||
(persist-test
|
||||
"different keys append separately"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append-once b "orders" "cmd-1" "placed" 0 {})
|
||||
(persist/append-once b "orders" "cmd-2" "placed" 0 {})
|
||||
(persist/count b "orders")))
|
||||
2)
|
||||
(persist-test
|
||||
"idempotency is per-stream"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append-once b "a" "cmd-1" "x" 0 {})
|
||||
(persist/append-once b "b" "cmd-1" "x" 0 {})
|
||||
(list (persist/count b "a") (persist/count b "b"))))
|
||||
(list 1 1))
|
||||
(persist-test
|
||||
"stored data is preserved on first append"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(get
|
||||
(persist/event-data
|
||||
(persist/append-once b "s" "k" "x" 0 {:n 9}))
|
||||
:n))
|
||||
9)
|
||||
(persist-test
|
||||
"idempotency survives restart on the durable backend"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(persist/append-once
|
||||
(persist/mock-durable disk)
|
||||
"orders"
|
||||
"cmd-1"
|
||||
"placed"
|
||||
0
|
||||
{})
|
||||
(let
|
||||
((db2 (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/append-once
|
||||
db2
|
||||
"orders"
|
||||
"cmd-1"
|
||||
"placed"
|
||||
0
|
||||
{})
|
||||
(persist/count db2 "orders")))))
|
||||
1)
|
||||
@@ -1,86 +0,0 @@
|
||||
; Phase 1 — kv facet: get/put/delete/has?/keys, get-or, update.
|
||||
|
||||
(persist-test "absent key reads nil" (persist/kv-get (persist/open) "x") nil)
|
||||
(persist-test
|
||||
"has? false when absent"
|
||||
(persist/kv-has? (persist/open) "x")
|
||||
false)
|
||||
(persist-test
|
||||
"put then get"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin (persist/kv-put b "x" 7) (persist/kv-get b "x")))
|
||||
7)
|
||||
(persist-test
|
||||
"put returns value"
|
||||
(let ((b (persist/open))) (persist/kv-put b "x" 9))
|
||||
9)
|
||||
(persist-test
|
||||
"has? true after put"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin (persist/kv-put b "x" 1) (persist/kv-has? b "x")))
|
||||
true)
|
||||
(persist-test
|
||||
"put overwrites"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "x" 1)
|
||||
(persist/kv-put b "x" 2)
|
||||
(persist/kv-get b "x")))
|
||||
2)
|
||||
(persist-test
|
||||
"delete removes key"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "x" 1)
|
||||
(persist/kv-delete b "x")
|
||||
(persist/kv-has? b "x")))
|
||||
false)
|
||||
(persist-test
|
||||
"delete then get is nil"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "x" 1)
|
||||
(persist/kv-delete b "x")
|
||||
(persist/kv-get b "x")))
|
||||
nil)
|
||||
(persist-test
|
||||
"keys lists stored keys"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "a" 1)
|
||||
(persist/kv-put b "b" 2)
|
||||
(len (persist/kv-keys b))))
|
||||
2)
|
||||
(persist-test
|
||||
"get-or returns default when absent"
|
||||
(persist/kv-get-or (persist/open) "x" 99)
|
||||
99)
|
||||
(persist-test
|
||||
"get-or returns value when present"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-put b "x" 5)
|
||||
(persist/kv-get-or b "x" 99)))
|
||||
5)
|
||||
(persist-test
|
||||
"kv-update applies fn over default"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/kv-update b "n" 0 (fn (v) (+ v 1)))
|
||||
(persist/kv-update b "n" 0 (fn (v) (+ v 1)))
|
||||
(persist/kv-get b "n")))
|
||||
2)
|
||||
(persist-test
|
||||
"kv facet does not touch log"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin (persist/kv-put b "x" 1) (persist/count b "x")))
|
||||
0)
|
||||
@@ -1,81 +0,0 @@
|
||||
; Phase 1 — log facet: append/read/read-from, sequential seq, stream isolation.
|
||||
; Note: map returns an array-backed list not equal? to a (list ...) literal,
|
||||
; so assertions build their compared list with list/nth, not map.
|
||||
|
||||
(persist-test
|
||||
"empty stream reads empty"
|
||||
(len (persist/read (persist/open) "orders"))
|
||||
0)
|
||||
(persist-test
|
||||
"last-seq empty is 0"
|
||||
(persist/last-seq (persist/open) "orders")
|
||||
0)
|
||||
(persist-test
|
||||
"append returns event with seq 1"
|
||||
(persist/event-seq
|
||||
(persist/append (persist/open) "orders" "placed" 0 {:id 1}))
|
||||
1)
|
||||
(persist-test
|
||||
"append assigns sequential seqs"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "orders" "placed" 0 {})
|
||||
(persist/append b "orders" "placed" 1 {})
|
||||
(persist/event-seq
|
||||
(persist/append b "orders" "placed" 2 {}))))
|
||||
3)
|
||||
(persist-test
|
||||
"read returns events oldest-first"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "a" 0 {:n 1})
|
||||
(persist/append b "s" "b" 0 {:n 2})
|
||||
(let
|
||||
((es (persist/read b "s")))
|
||||
(list
|
||||
(get (persist/event-data (nth es 0)) :n)
|
||||
(get (persist/event-data (nth es 1)) :n)))))
|
||||
(list 1 2))
|
||||
(persist-test
|
||||
"count tracks appends"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "a" 0 {})
|
||||
(persist/append b "s" "a" 0 {})
|
||||
(persist/count b "s")))
|
||||
2)
|
||||
(persist-test
|
||||
"streams are isolated"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s1" "a" 0 {})
|
||||
(persist/append b "s2" "a" 0 {})
|
||||
(persist/append b "s2" "a" 0 {})
|
||||
(list (persist/count b "s1") (persist/count b "s2"))))
|
||||
(list 1 2))
|
||||
(persist-test
|
||||
"read-from filters by seq"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "a" 0 {})
|
||||
(persist/append b "s" "a" 0 {})
|
||||
(persist/append b "s" "a" 0 {})
|
||||
(let
|
||||
((es (persist/read-from b "s" 2)))
|
||||
(list
|
||||
(persist/event-seq (nth es 0))
|
||||
(persist/event-seq (nth es 1))))))
|
||||
(list 2 3))
|
||||
(persist-test
|
||||
"read-from past end is empty"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "a" 0 {})
|
||||
(len (persist/read-from b "s" 5))))
|
||||
0)
|
||||
@@ -1,115 +0,0 @@
|
||||
; Phase 2 — projections: fold a stream into a read model, resume incrementally.
|
||||
|
||||
(persist-test
|
||||
"project empty stream returns seed value"
|
||||
(persist/project-fold
|
||||
(persist/open)
|
||||
"s"
|
||||
(fn (acc e) (+ acc 1))
|
||||
0)
|
||||
0)
|
||||
(persist-test
|
||||
"project empty stream seq is 0"
|
||||
(persist/project-seq
|
||||
(persist/project (persist/open) "s" (fn (a e) a) 0))
|
||||
0)
|
||||
(persist-test
|
||||
"project counts events"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/project-fold
|
||||
b
|
||||
"s"
|
||||
(fn (acc e) (+ acc 1))
|
||||
0)))
|
||||
3)
|
||||
(persist-test
|
||||
"project sums event data"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "ledger" "credit" 0 {:amt 10})
|
||||
(persist/append b "ledger" "credit" 1 {:amt 5})
|
||||
(persist/append b "ledger" "debit" 2 {:amt 3})
|
||||
(persist/project-fold
|
||||
b
|
||||
"ledger"
|
||||
(fn
|
||||
(bal e)
|
||||
(if
|
||||
(equal? (persist/event-type e) "credit")
|
||||
(+ bal (get (persist/event-data e) :amt))
|
||||
(- bal (get (persist/event-data e) :amt))))
|
||||
0)))
|
||||
12)
|
||||
(persist-test
|
||||
"project tracks last seq"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/project-seq (persist/project b "s" (fn (a e) a) 0))))
|
||||
2)
|
||||
(persist-test
|
||||
"resume folds only the tail"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(let
|
||||
((p1 (persist/project b "s" (fn (acc e) (+ acc 1)) 0)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/project-value
|
||||
(persist/project-resume
|
||||
b
|
||||
"s"
|
||||
(fn (acc e) (+ acc 1))
|
||||
p1))))))
|
||||
3)
|
||||
(persist-test
|
||||
"resume with no new events is a no-op"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(let
|
||||
((p1 (persist/project b "s" (fn (acc e) (+ acc 1)) 0)))
|
||||
(persist/project-value
|
||||
(persist/project-resume b "s" (fn (acc e) (+ acc 1)) p1)))))
|
||||
1)
|
||||
(persist-test
|
||||
"resume advances seq"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(let
|
||||
((p1 (persist/project b "s" (fn (a e) a) 0)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/project-seq
|
||||
(persist/project-resume b "s" (fn (a e) a) p1))))))
|
||||
3)
|
||||
(persist-test
|
||||
"full project equals seed-resume from zero"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(equal?
|
||||
(persist/project b "s" (fn (acc e) (+ acc 1)) 0)
|
||||
(persist/project-resume
|
||||
b
|
||||
"s"
|
||||
(fn (acc e) (+ acc 1))
|
||||
{:value 0 :seq 0}))))
|
||||
true)
|
||||
@@ -1,101 +0,0 @@
|
||||
; Extension — read-side query helpers. Assertions count / index, not map vs list.
|
||||
|
||||
(define q-seqs (fn (es) (map persist/event-seq es)))
|
||||
|
||||
(persist-test
|
||||
"read-between slices a seq range"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(let
|
||||
((es (persist/read-between b "s" 2 3)))
|
||||
(list
|
||||
(len es)
|
||||
(persist/event-seq (first es))
|
||||
(persist/event-seq (nth es 1))))))
|
||||
(list 2 2 3))
|
||||
(persist-test
|
||||
"read-between is inclusive of endpoints"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(len (persist/read-between b "s" 1 3))))
|
||||
3)
|
||||
(persist-test
|
||||
"read-since filters by timestamp"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 100 {})
|
||||
(persist/append b "s" "x" 200 {})
|
||||
(persist/append b "s" "x" 300 {})
|
||||
(len (persist/read-since b "s" 200))))
|
||||
2)
|
||||
(persist-test
|
||||
"read-window is an inclusive time range"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 100 {})
|
||||
(persist/append b "s" "x" 200 {})
|
||||
(persist/append b "s" "x" 300 {})
|
||||
(persist/append b "s" "x" 400 {})
|
||||
(len (persist/read-window b "s" 200 300))))
|
||||
2)
|
||||
(persist-test
|
||||
"read-by-type filters by event type"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "created" 0 {})
|
||||
(persist/append b "s" "updated" 0 {})
|
||||
(persist/append b "s" "created" 0 {})
|
||||
(len (persist/read-by-type b "s" "created"))))
|
||||
2)
|
||||
(persist-test
|
||||
"read-where filters by predicate over data"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {:amt 5})
|
||||
(persist/append b "s" "x" 0 {:amt 15})
|
||||
(persist/append b "s" "x" 0 {:amt 25})
|
||||
(len
|
||||
(persist/read-where
|
||||
b
|
||||
"s"
|
||||
(fn (e) (> (get (persist/event-data e) :amt) 10))))))
|
||||
2)
|
||||
(persist-test
|
||||
"count-where counts matches"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "a" 0 {})
|
||||
(persist/append b "s" "b" 0 {})
|
||||
(persist/append b "s" "a" 0 {})
|
||||
(persist/count-where
|
||||
b
|
||||
"s"
|
||||
(fn (e) (equal? (persist/event-type e) "a")))))
|
||||
2)
|
||||
(persist-test
|
||||
"queries return empty on empty stream"
|
||||
(len (persist/read-since (persist/open) "s" 0))
|
||||
0)
|
||||
(persist-test
|
||||
"queries work on the durable backend"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(begin
|
||||
(persist/append db "s" "x" 100 {})
|
||||
(persist/append db "s" "x" 200 {})
|
||||
(len (persist/read-since db "s" 150))))
|
||||
1)
|
||||
@@ -1,126 +0,0 @@
|
||||
; Phase 4 — crash/restart integration. A whole subsystem (an order ledger:
|
||||
; event log + a kv read model kept by a subscription + a periodic snapshot + an
|
||||
; invoice blob ref) on the durable backend must survive a restart. "Crash" =
|
||||
; drop every in-process object (backend, hub, projections); "restart" = rebuild
|
||||
; them over the SAME disk + blob store. Nothing but the disk and content store
|
||||
; carries across, exactly as a real process restart.
|
||||
|
||||
(define rec-count (fn (acc e) (+ acc 1)))
|
||||
|
||||
(persist-test
|
||||
"log survives restart and seq continues"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/append db "orders" "placed" 0 {:id "a"})
|
||||
(persist/append db "orders" "placed" 1 {:id "b"})))
|
||||
(let
|
||||
((db2 (persist/mock-durable disk)))
|
||||
(list
|
||||
(persist/project-fold db2 "orders" rec-count 0)
|
||||
(persist/event-seq
|
||||
(persist/append db2 "orders" "placed" 2 {:id "c"}))))))
|
||||
(list 2 3))
|
||||
(persist-test
|
||||
"subscription-driven kv read model survives restart"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((h (persist/hub (persist/mock-durable disk))))
|
||||
(begin
|
||||
(persist/subscribe
|
||||
h
|
||||
"orders"
|
||||
(fn
|
||||
(bk s e)
|
||||
(persist/kv-update
|
||||
bk
|
||||
"order-count"
|
||||
0
|
||||
(fn (n) (+ n 1)))))
|
||||
(persist/publish h "orders" "placed" 0 {})
|
||||
(persist/publish h "orders" "placed" 1 {})))
|
||||
(let
|
||||
((db2 (persist/mock-durable disk)))
|
||||
(persist/kv-get db2 "order-count"))))
|
||||
2)
|
||||
(persist-test
|
||||
"snapshot taken before crash drives replay after restart"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/append db "orders" "placed" 0 {})
|
||||
(persist/append db "orders" "placed" 1 {})
|
||||
(persist/checkpoint db "orders" "count" rec-count 0)
|
||||
(persist/append db "orders" "placed" 2 {})))
|
||||
(let
|
||||
((db2 (persist/mock-durable disk)))
|
||||
(equal?
|
||||
(persist/project-value
|
||||
(persist/replay db2 "orders" "count" rec-count 0))
|
||||
(persist/project-fold db2 "orders" rec-count 0)))))
|
||||
true)
|
||||
(persist-test
|
||||
"compacted log still replays correctly after restart"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/append db "orders" "placed" 0 {})
|
||||
(persist/append db "orders" "placed" 1 {})
|
||||
(persist/append db "orders" "placed" 2 {})
|
||||
(persist/compact db "orders" "count" rec-count 0)
|
||||
(persist/append db "orders" "placed" 3 {})))
|
||||
(let
|
||||
((db2 (persist/mock-durable disk)))
|
||||
(persist/project-value
|
||||
(persist/replay db2 "orders" "count" rec-count 0)))))
|
||||
4)
|
||||
(persist-test
|
||||
"invoice blob ref survives restart, bytes fetched from content store"
|
||||
(let
|
||||
((disk (persist/mem-backend)) (store (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)) (blob (persist/mock-blob store)))
|
||||
(persist/kv-put
|
||||
db
|
||||
"invoice"
|
||||
(persist/blob-store blob "INVOICEPDF" "application/pdf")))
|
||||
(let
|
||||
((db2 (persist/mock-durable disk))
|
||||
(blob2 (persist/mock-blob store)))
|
||||
(persist/blob-fetch blob2 (persist/kv-get db2 "invoice")))))
|
||||
"INVOICEPDF")
|
||||
(persist-test
|
||||
"two independent restarts converge to the same state (determinism)"
|
||||
(let
|
||||
((disk (persist/mem-backend)))
|
||||
(begin
|
||||
(let
|
||||
((db (persist/mock-durable disk)))
|
||||
(begin
|
||||
(persist/append db "orders" "placed" 0 {})
|
||||
(persist/append db "orders" "placed" 1 {})
|
||||
(persist/append db "orders" "placed" 2 {})))
|
||||
(equal?
|
||||
(persist/project-fold
|
||||
(persist/mock-durable disk)
|
||||
"orders"
|
||||
rec-count
|
||||
0)
|
||||
(persist/project-fold
|
||||
(persist/mock-durable disk)
|
||||
"orders"
|
||||
rec-count
|
||||
0))))
|
||||
true)
|
||||
@@ -1,114 +0,0 @@
|
||||
; Phase 3 — snapshots + replay. Headline: snapshot + tail == full replay.
|
||||
|
||||
(define snap-count (fn (acc e) (+ acc 1)))
|
||||
|
||||
(persist-test
|
||||
"no snapshot loads fresh seed state"
|
||||
(persist/snapshot-load (persist/open) "feed" 0)
|
||||
{:value 0 :seq 0})
|
||||
(persist-test
|
||||
"snapshot-exists? false initially"
|
||||
(persist/snapshot-exists? (persist/open) "feed")
|
||||
false)
|
||||
(persist-test
|
||||
"checkpoint stores a snapshot"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/checkpoint b "s" "snap" snap-count 0)
|
||||
(persist/snapshot-exists? b "snap")))
|
||||
true)
|
||||
(persist-test
|
||||
"checkpoint value equals full projection"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/project-value
|
||||
(persist/checkpoint b "s" "snap" snap-count 0))))
|
||||
3)
|
||||
(persist-test
|
||||
"checkpoint records the last seq"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/project-seq
|
||||
(persist/checkpoint b "s" "snap" snap-count 0))))
|
||||
2)
|
||||
(persist-test
|
||||
"replay after checkpoint only folds the tail"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/checkpoint b "s" "snap" snap-count 0)
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/project-value
|
||||
(persist/replay b "s" "snap" snap-count 0))))
|
||||
3)
|
||||
(persist-test
|
||||
"snapshot + tail == full replay (value)"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/checkpoint b "s" "snap" snap-count 0)
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(equal?
|
||||
(persist/project-value
|
||||
(persist/replay b "s" "snap" snap-count 0))
|
||||
(persist/project-fold b "s" snap-count 0))))
|
||||
true)
|
||||
(persist-test
|
||||
"snapshot + tail == full replay (whole state)"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/checkpoint b "s" "snap" snap-count 0)
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(equal?
|
||||
(persist/replay b "s" "snap" snap-count 0)
|
||||
(persist/project b "s" snap-count 0))))
|
||||
true)
|
||||
(persist-test
|
||||
"replay determinism: two replays from same snapshot agree"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/checkpoint b "s" "snap" snap-count 0)
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(equal?
|
||||
(persist/replay b "s" "snap" snap-count 0)
|
||||
(persist/replay b "s" "snap" snap-count 0))))
|
||||
true)
|
||||
(persist-test
|
||||
"re-checkpoint advances the snapshot"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/checkpoint b "s" "snap" snap-count 0)
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/checkpoint b "s" "snap" snap-count 0)
|
||||
(persist/project-seq (persist/snapshot-load b "snap" 0))))
|
||||
2)
|
||||
(persist-test
|
||||
"snapshots are keyed independently"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(persist/checkpoint b "s" "a" snap-count 0)
|
||||
(persist/snapshot-exists? b "b")))
|
||||
false)
|
||||
@@ -1,130 +0,0 @@
|
||||
; Phase 2 — subscription hub: callbacks fire on publish, drive read models.
|
||||
|
||||
(persist-test
|
||||
"no subscribers initially"
|
||||
(persist/subscriber-count (persist/hub (persist/open)) "s")
|
||||
0)
|
||||
(persist-test
|
||||
"subscribe registers a callback"
|
||||
(let
|
||||
((h (persist/hub (persist/open))))
|
||||
(begin
|
||||
(persist/subscribe h "s" (fn (b s e) nil))
|
||||
(persist/subscriber-count h "s")))
|
||||
1)
|
||||
(persist-test
|
||||
"publish appends to the log"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/hub b)))
|
||||
(begin
|
||||
(persist/publish h "s" "x" 0 {})
|
||||
(persist/publish h "s" "x" 0 {})
|
||||
(persist/count b "s"))))
|
||||
2)
|
||||
(persist-test
|
||||
"publish returns the stored event"
|
||||
(let
|
||||
((h (persist/hub (persist/open))))
|
||||
(persist/event-seq (persist/publish h "s" "x" 0 {:id 1})))
|
||||
1)
|
||||
(persist-test
|
||||
"callback fires on publish — drives a kv read model"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/hub b)))
|
||||
(begin
|
||||
(persist/subscribe
|
||||
h
|
||||
"s"
|
||||
(fn
|
||||
(bk s e)
|
||||
(persist/kv-update
|
||||
bk
|
||||
"count"
|
||||
0
|
||||
(fn (n) (+ n 1)))))
|
||||
(persist/publish h "s" "x" 0 {})
|
||||
(persist/publish h "s" "x" 0 {})
|
||||
(persist/publish h "s" "x" 0 {})
|
||||
(persist/kv-get b "count"))))
|
||||
3)
|
||||
(persist-test
|
||||
"callback receives the event"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/hub b)))
|
||||
(begin
|
||||
(persist/subscribe
|
||||
h
|
||||
"s"
|
||||
(fn (bk s e) (persist/kv-put bk "last" (persist/event-type e))))
|
||||
(persist/publish h "s" "created" 0 {})
|
||||
(persist/kv-get b "last"))))
|
||||
"created")
|
||||
(persist-test
|
||||
"subscriptions are per-stream"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/hub b)))
|
||||
(begin
|
||||
(persist/subscribe
|
||||
h
|
||||
"s1"
|
||||
(fn
|
||||
(bk s e)
|
||||
(persist/kv-update bk "n" 0 (fn (n) (+ n 1)))))
|
||||
(persist/publish h "s2" "x" 0 {})
|
||||
(persist/kv-get-or b "n" 0))))
|
||||
0)
|
||||
(persist-test
|
||||
"multiple subscribers all fire"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/hub b)))
|
||||
(begin
|
||||
(persist/subscribe
|
||||
h
|
||||
"s"
|
||||
(fn
|
||||
(bk s e)
|
||||
(persist/kv-update bk "a" 0 (fn (n) (+ n 1)))))
|
||||
(persist/subscribe
|
||||
h
|
||||
"s"
|
||||
(fn
|
||||
(bk s e)
|
||||
(persist/kv-update bk "b" 0 (fn (n) (+ n 10)))))
|
||||
(persist/publish h "s" "x" 0 {})
|
||||
(list (persist/kv-get b "a") (persist/kv-get b "b")))))
|
||||
(list 1 10))
|
||||
(persist-test
|
||||
"incremental read model via resume in callback"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/hub b)))
|
||||
(begin
|
||||
(persist/kv-put b "proj" {:value 0 :seq 0})
|
||||
(persist/subscribe
|
||||
h
|
||||
"s"
|
||||
(fn
|
||||
(bk s e)
|
||||
(persist/kv-put
|
||||
bk
|
||||
"proj"
|
||||
(persist/project-resume
|
||||
bk
|
||||
s
|
||||
(fn (acc ev) (+ acc 1))
|
||||
(persist/kv-get bk "proj")))))
|
||||
(persist/publish h "s" "x" 0 {})
|
||||
(persist/publish h "s" "x" 0 {})
|
||||
(persist/project-value (persist/kv-get b "proj")))))
|
||||
2)
|
||||
@@ -1,115 +0,0 @@
|
||||
; Extension — event schema evolution via upcasters.
|
||||
|
||||
; v1 "placed" events had {:total N}; v2 wants {:amount N :currency "GBP"}.
|
||||
(define up-placed (fn (e) (persist/upcast-data e {:amount (get (persist/event-data e) :total) :currency "GBP"})))
|
||||
|
||||
(persist-test
|
||||
"unregistered type passes through unchanged"
|
||||
(let
|
||||
((reg (persist/upcasters)))
|
||||
(persist/event-data
|
||||
(persist/upcast
|
||||
reg
|
||||
(persist/event "s" 1 "other" 0 {:x 1}))))
|
||||
{:x 1})
|
||||
(persist-test
|
||||
"registered upcaster lifts an old event"
|
||||
(let
|
||||
((reg (persist/register-upcaster (persist/upcasters) "placed" up-placed)))
|
||||
(get
|
||||
(persist/event-data
|
||||
(persist/upcast
|
||||
reg
|
||||
(persist/event "s" 1 "placed" 0 {:total 50})))
|
||||
:amount))
|
||||
50)
|
||||
(persist-test
|
||||
"upcaster adds the new field"
|
||||
(let
|
||||
((reg (persist/register-upcaster (persist/upcasters) "placed" up-placed)))
|
||||
(get
|
||||
(persist/event-data
|
||||
(persist/upcast
|
||||
reg
|
||||
(persist/event "s" 1 "placed" 0 {:total 50})))
|
||||
:currency))
|
||||
"GBP")
|
||||
(persist-test
|
||||
"upcast preserves stream/seq/type/at"
|
||||
(let
|
||||
((reg (persist/register-upcaster (persist/upcasters) "placed" up-placed)))
|
||||
(let
|
||||
((e (persist/upcast reg (persist/event "orders" 7 "placed" 99 {:total 1}))))
|
||||
(list
|
||||
(persist/event-seq e)
|
||||
(persist/event-at e)
|
||||
(persist/event-type e))))
|
||||
(list 7 99 "placed"))
|
||||
(persist-test
|
||||
"registry is immutable — register returns a new dict"
|
||||
(let
|
||||
((r0 (persist/upcasters)))
|
||||
(begin
|
||||
(persist/register-upcaster r0 "placed" up-placed)
|
||||
(has-key? r0 "placed")))
|
||||
false)
|
||||
(persist-test
|
||||
"read-upcast lifts every event in a stream"
|
||||
(let
|
||||
((b (persist/open))
|
||||
(reg
|
||||
(persist/register-upcaster (persist/upcasters) "placed" up-placed)))
|
||||
(begin
|
||||
(persist/append b "orders" "placed" 0 {:total 10})
|
||||
(persist/append b "orders" "placed" 0 {:total 20})
|
||||
(let
|
||||
((es (persist/read-upcast b "orders" reg)))
|
||||
(list
|
||||
(get (persist/event-data (nth es 0)) :amount)
|
||||
(get (persist/event-data (nth es 1)) :amount)))))
|
||||
(list 10 20))
|
||||
(persist-test
|
||||
"project-upcast folds over the current shape"
|
||||
(let
|
||||
((b (persist/open))
|
||||
(reg
|
||||
(persist/register-upcaster (persist/upcasters) "placed" up-placed)))
|
||||
(begin
|
||||
(persist/append b "orders" "placed" 0 {:total 10})
|
||||
(persist/append b "orders" "placed" 0 {:total 20})
|
||||
(persist/project-upcast
|
||||
b
|
||||
"orders"
|
||||
reg
|
||||
(fn (acc e) (+ acc (get (persist/event-data e) :amount)))
|
||||
0)))
|
||||
30)
|
||||
(persist-test
|
||||
"mixed old and new events fold uniformly"
|
||||
(let
|
||||
((b (persist/open))
|
||||
(reg
|
||||
(persist/register-upcaster (persist/upcasters) "placed" up-placed)))
|
||||
(begin
|
||||
(persist/append b "orders" "placed" 0 {:total 5})
|
||||
(persist/append b "orders" "placed" 0 {:total 7 :amount 7})
|
||||
(persist/project-upcast
|
||||
b
|
||||
"orders"
|
||||
reg
|
||||
(fn (acc e) (+ acc (get (persist/event-data e) :amount)))
|
||||
0)))
|
||||
12)
|
||||
(persist-test
|
||||
"upcast works on the durable backend"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend)))
|
||||
(reg
|
||||
(persist/register-upcaster (persist/upcasters) "placed" up-placed)))
|
||||
(begin
|
||||
(persist/append db "orders" "placed" 0 {:total 42})
|
||||
(get
|
||||
(persist/event-data
|
||||
(nth (persist/read-upcast db "orders" reg) 0))
|
||||
:amount)))
|
||||
42)
|
||||
@@ -1,105 +0,0 @@
|
||||
; Extension — materialized views: stay current on write, read O(1) via peek.
|
||||
|
||||
(define vw-count (fn (acc e) (+ acc 1)))
|
||||
(define vw (persist/view "order-count" "orders" vw-count 0))
|
||||
|
||||
(persist-test "view-name" (persist/view-name vw) "order-count")
|
||||
(persist-test "view-stream" (persist/view-stream vw) "orders")
|
||||
(persist-test
|
||||
"view-value folds the stream"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "orders" "x" 0 {})
|
||||
(persist/append b "orders" "x" 0 {})
|
||||
(persist/view-value b vw)))
|
||||
2)
|
||||
(persist-test
|
||||
"view-refresh persists a snapshot that peek then reads"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "orders" "x" 0 {})
|
||||
(persist/view-refresh b vw)
|
||||
(persist/view-peek b vw)))
|
||||
1)
|
||||
(persist-test
|
||||
"peek lags an un-refreshed tail"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "orders" "x" 0 {})
|
||||
(persist/view-refresh b vw)
|
||||
(persist/append b "orders" "x" 0 {})
|
||||
(persist/view-peek b vw)))
|
||||
1)
|
||||
(persist-test
|
||||
"view-value sees the whole stream even after a stale snapshot"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(begin
|
||||
(persist/append b "orders" "x" 0 {})
|
||||
(persist/view-refresh b vw)
|
||||
(persist/append b "orders" "x" 0 {})
|
||||
(persist/view-value b vw)))
|
||||
2)
|
||||
(persist-test
|
||||
"attached view stays current on publish — peek needs no manual refresh"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/view-attach (persist/hub b) vw)))
|
||||
(begin
|
||||
(persist/publish h "orders" "x" 0 {})
|
||||
(persist/publish h "orders" "x" 0 {})
|
||||
(persist/publish h "orders" "x" 0 {})
|
||||
(persist/view-peek b vw))))
|
||||
3)
|
||||
(persist-test
|
||||
"attached view advances the snapshot seq incrementally"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/view-attach (persist/hub b) vw)))
|
||||
(begin
|
||||
(persist/publish h "orders" "x" 0 {})
|
||||
(persist/publish h "orders" "x" 0 {})
|
||||
(persist/project-seq
|
||||
(persist/snapshot-load b "order-count" 0)))))
|
||||
2)
|
||||
(persist-test
|
||||
"attach only reacts to its own stream"
|
||||
(let
|
||||
((b (persist/open)))
|
||||
(let
|
||||
((h (persist/view-attach (persist/hub b) vw)))
|
||||
(begin
|
||||
(persist/publish h "other" "x" 0 {})
|
||||
(persist/view-peek b vw))))
|
||||
0)
|
||||
(persist-test
|
||||
"materialized view works on the durable backend"
|
||||
(let
|
||||
((db (persist/mock-durable (persist/mem-backend))))
|
||||
(let
|
||||
((h (persist/view-attach (persist/hub db) vw)))
|
||||
(begin
|
||||
(persist/publish h "orders" "x" 0 {})
|
||||
(persist/publish h "orders" "x" 0 {})
|
||||
(persist/view-peek db vw))))
|
||||
2)
|
||||
(persist-test
|
||||
"view sum over event data"
|
||||
(let
|
||||
((b (persist/open))
|
||||
(sumv
|
||||
(persist/view
|
||||
"rev"
|
||||
"sales"
|
||||
(fn (acc e) (+ acc (get (persist/event-data e) :amt)))
|
||||
0)))
|
||||
(begin
|
||||
(persist/append b "sales" "sale" 0 {:amt 10})
|
||||
(persist/append b "sales" "sale" 1 {:amt 25})
|
||||
(persist/view-value b sumv)))
|
||||
35)
|
||||
@@ -1,44 +0,0 @@
|
||||
; persist/upcast — event schema evolution. An append-only log keeps events
|
||||
; forever, so old events have old shapes. Rather than migrate stored data (you
|
||||
; can't rewrite history) or branch every projection on version, register an
|
||||
; upcaster per event type: a pure (event -> event) that lifts an old event to
|
||||
; the current shape. Reads pass through the registry so projections see ONE
|
||||
; shape. The registry is an immutable dict the consumer threads (no global
|
||||
; mutable state). Requires: lib/persist/event.sx, lib/persist/log.sx.
|
||||
|
||||
(define persist/upcasters (fn () {}))
|
||||
(define persist/register-upcaster (fn (reg type fn) (assoc reg type fn)))
|
||||
|
||||
; apply the registered upcaster for an event's type, or pass it through unchanged
|
||||
(define
|
||||
persist/upcast
|
||||
(fn
|
||||
(reg e)
|
||||
(let ((f (get reg (persist/event-type e)))) (if f (f e) e))))
|
||||
|
||||
; read a stream with every event lifted to current shape
|
||||
(define
|
||||
persist/read-upcast
|
||||
(fn
|
||||
(b stream reg)
|
||||
(map (fn (e) (persist/upcast reg e)) (persist/read b stream))))
|
||||
|
||||
; project over upcasted events — projections never see a legacy shape
|
||||
(define
|
||||
persist/project-upcast
|
||||
(fn
|
||||
(b stream reg step seed)
|
||||
(reduce step seed (persist/read-upcast b stream reg))))
|
||||
|
||||
; helper: upcast an event's :data by merging in/overriding fields, keeping the
|
||||
; record's stream/seq/type/at. Common upcaster body.
|
||||
(define
|
||||
persist/upcast-data
|
||||
(fn
|
||||
(e new-data)
|
||||
(persist/event
|
||||
(persist/event-stream e)
|
||||
(persist/event-seq e)
|
||||
(persist/event-type e)
|
||||
(persist/event-at e)
|
||||
(merge (persist/event-data e) new-data))))
|
||||
@@ -1,49 +0,0 @@
|
||||
; persist/view — a materialized view: the consumer-facing read model. It bundles
|
||||
; a stream, a fold (step + seed) and a snapshot name. Attached to a hub it
|
||||
; refreshes incrementally on every publish, so the materialized value stays
|
||||
; current on write and reads are O(1) snapshot loads (persist/view-peek) instead
|
||||
; of a full fold. This is what feed indices, mod audit rollups, search counters,
|
||||
; etc. sit on. Requires: lib/persist/snapshot.sx, lib/persist/subscribe.sx.
|
||||
|
||||
(define persist/view (fn (name stream step seed) {:name name :step step :stream stream :seed seed}))
|
||||
(define persist/view-name (fn (v) (get v :name)))
|
||||
(define persist/view-stream (fn (v) (get v :stream)))
|
||||
|
||||
; bring the view's snapshot up to date with the log tail; returns the state
|
||||
(define
|
||||
persist/view-refresh
|
||||
(fn
|
||||
(b v)
|
||||
(persist/checkpoint
|
||||
b
|
||||
(get v :stream)
|
||||
(get v :name)
|
||||
(get v :step)
|
||||
(get v :seed))))
|
||||
|
||||
; current materialized value — refreshes first, so never stale
|
||||
(define
|
||||
persist/view-value
|
||||
(fn (b v) (persist/project-value (persist/view-refresh b v))))
|
||||
|
||||
; O(1) read of the last persisted snapshot value WITHOUT folding the tail. Equal
|
||||
; to view-value when the view is attached (kept current on every publish);
|
||||
; otherwise may lag the log by the un-refreshed tail.
|
||||
(define
|
||||
persist/view-peek
|
||||
(fn
|
||||
(b v)
|
||||
(persist/project-value
|
||||
(persist/snapshot-load b (get v :name) (get v :seed)))))
|
||||
|
||||
; attach to a hub: refresh the view on every publish to its stream
|
||||
(define
|
||||
persist/view-attach
|
||||
(fn
|
||||
(h v)
|
||||
(begin
|
||||
(persist/subscribe
|
||||
h
|
||||
(persist/view-stream v)
|
||||
(fn (bk s e) (persist/view-refresh bk v)))
|
||||
h)))
|
||||
44
lib/search/api.sx
Normal file
44
lib/search/api.sx
Normal file
@@ -0,0 +1,44 @@
|
||||
;; search public API — assembles the canonical Haskell source from all layers.
|
||||
;; Tests and callers concatenate `search/src` with their own top-level bindings
|
||||
;; (e.g. "result = lookupTerm \"cat\" idx\n") and evaluate via the haskell-on-sx
|
||||
;; interpreter. Public Haskell entry points: indexDoc, lookupTerm, deleteDoc,
|
||||
;; docFreq, allTerms, tokens, positioned, evalQuery, parseQuery, searchQuery,
|
||||
;; rankTfIdf, rankBm25, topNTfIdf, topNBm25, fedIndex, aclFilter, searchTfIdfAcl,
|
||||
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf,
|
||||
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
|
||||
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
|
||||
;; nearDocs, expandTerm, synDocs, synRankTfIdf, queryTerms, searchRankTfIdf,
|
||||
;; searchRankBm25, suggestN, suggest.
|
||||
|
||||
(define
|
||||
search/src
|
||||
(str
|
||||
search/tokenize-src
|
||||
"\n"
|
||||
search/index-src
|
||||
"\n"
|
||||
search/query-src
|
||||
"\n"
|
||||
search/parse-src
|
||||
"\n"
|
||||
search/rank-src
|
||||
"\n"
|
||||
search/fed-src
|
||||
"\n"
|
||||
search/prefix-src
|
||||
"\n"
|
||||
search/page-src
|
||||
"\n"
|
||||
search/fuzzy-src
|
||||
"\n"
|
||||
search/highlight-src
|
||||
"\n"
|
||||
search/stem-src
|
||||
"\n"
|
||||
search/near-src
|
||||
"\n"
|
||||
search/syn-src
|
||||
"\n"
|
||||
search/rankq-src
|
||||
"\n"
|
||||
search/suggest-src))
|
||||
55
lib/search/conformance.conf
Normal file
55
lib/search/conformance.conf
Normal file
@@ -0,0 +1,55 @@
|
||||
# search-on-sx conformance config — sourced by lib/guest/conformance.sh.
|
||||
|
||||
LANG_NAME=search
|
||||
SCOREBOARD_DIR=lib/search
|
||||
MODE=counters
|
||||
COUNTERS_PASS=hk-test-pass
|
||||
COUNTERS_FAIL=hk-test-fail
|
||||
TIMEOUT_PER_SUITE=600
|
||||
|
||||
PRELOADS=(
|
||||
lib/haskell/tokenizer.sx
|
||||
lib/haskell/layout.sx
|
||||
lib/haskell/parser.sx
|
||||
lib/haskell/desugar.sx
|
||||
lib/haskell/runtime.sx
|
||||
lib/haskell/match.sx
|
||||
lib/haskell/eval.sx
|
||||
lib/haskell/map.sx
|
||||
lib/haskell/set.sx
|
||||
lib/haskell/testlib.sx
|
||||
lib/search/tokenize.sx
|
||||
lib/search/index.sx
|
||||
lib/search/query.sx
|
||||
lib/search/parse.sx
|
||||
lib/search/rank.sx
|
||||
lib/search/fed.sx
|
||||
lib/search/prefix.sx
|
||||
lib/search/page.sx
|
||||
lib/search/fuzzy.sx
|
||||
lib/search/highlight.sx
|
||||
lib/search/stem.sx
|
||||
lib/search/near.sx
|
||||
lib/search/syn.sx
|
||||
lib/search/rankq.sx
|
||||
lib/search/suggest.sx
|
||||
lib/search/api.sx
|
||||
lib/search/testlib.sx
|
||||
)
|
||||
|
||||
SUITES=(
|
||||
"index:lib/search/tests/index.sx"
|
||||
"boolean:lib/search/tests/boolean.sx"
|
||||
"parse:lib/search/tests/parse.sx"
|
||||
"rank:lib/search/tests/rank.sx"
|
||||
"integration:lib/search/tests/integration.sx"
|
||||
"prefix:lib/search/tests/prefix.sx"
|
||||
"page:lib/search/tests/page.sx"
|
||||
"fuzzy:lib/search/tests/fuzzy.sx"
|
||||
"highlight:lib/search/tests/highlight.sx"
|
||||
"stem:lib/search/tests/stem.sx"
|
||||
"near:lib/search/tests/near.sx"
|
||||
"syn:lib/search/tests/syn.sx"
|
||||
"rankq:lib/search/tests/rankq.sx"
|
||||
"suggest:lib/search/tests/suggest.sx"
|
||||
)
|
||||
3
lib/search/conformance.sh
Executable file
3
lib/search/conformance.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
# Thin wrapper — see lib/guest/conformance.sh and lib/search/conformance.conf.
|
||||
exec bash "$(dirname "$0")/../guest/conformance.sh" "$(dirname "$0")/conformance.conf" "$@"
|
||||
16
lib/search/fed.sx
Normal file
16
lib/search/fed.sx
Normal file
@@ -0,0 +1,16 @@
|
||||
;; search federation + ACL — Haskell source fragment. Depends on index + rank.
|
||||
;; Federation merges per-peer INDICES (not ranked results): each peer's local
|
||||
;; DocIds are relabelled to global ids `gid peer local = peer*1000 + local`
|
||||
;; (dedupe by (peer,doc-id) is automatic via the bijection), then posting lists
|
||||
;; are unioned per term. Ranking then runs once over the merged index, which is
|
||||
;; rank-correct. ACL is a post-rank filter: an injected `permit :: DocId -> Bool`
|
||||
;; predicate (viewer baked in by the caller) — never baked into the index.
|
||||
;; fedIndex :: [(PeerId, Index)] -> Index
|
||||
;; aclFilter :: (DocId -> Bool) -> [DocId] -> [DocId]
|
||||
;; searchTfIdfAcl :: (DocId -> Bool) -> [Term] -> Index -> [DocId]
|
||||
;; topNTfIdfAcl :: Int -> (DocId -> Bool) -> [Term] -> Index -> [DocId]
|
||||
;; searchBm25Acl :: (DocId -> Bool) -> Float -> Float -> [Term] -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/fed-src
|
||||
"gid peer local = peer * 1000 + local\nfedRelabelPosting peer p = (gid peer (fst p), snd p)\nfedRelabelEntry peer e = (fst e, map (fedRelabelPosting peer) (snd e))\nfedRelabelIndex peer idx = map (fedRelabelEntry peer) idx\nfedInsP p [] = [p]\nfedInsP p (q:qs) = if fst p < fst q then p : q : qs else if fst p == fst q then p : qs else q : fedInsP p qs\nfedMergePL a b = foldr fedInsP b a\nfedInsTerm t pl [] = [(t, pl)]\nfedInsTerm t pl (x:xs) = if t < fst x then (t, pl) : x : xs else if t == fst x then (fst x, fedMergePL pl (snd x)) : xs else x : fedInsTerm t pl xs\nfedMergeEntry idx e = fedInsTerm (fst e) (snd e) idx\nfedMergeTwo a b = foldl fedMergeEntry a b\nfedAddPeer acc pair = fedMergeTwo acc (fedRelabelIndex (fst pair) (snd pair))\nfedIndex pairs = foldl fedAddPeer emptyIndex pairs\naclFilter permit docs = filter permit docs\nsearchTfIdfAcl permit ts idx = aclFilter permit (rankTfIdf ts idx)\ntopNTfIdfAcl n permit ts idx = take n (aclFilter permit (rankTfIdf ts idx))\nsearchBm25Acl permit k1 b ts idx = aclFilter permit (rankBm25 k1 b ts idx)\n")
|
||||
12
lib/search/fuzzy.sx
Normal file
12
lib/search/fuzzy.sx
Normal file
@@ -0,0 +1,12 @@
|
||||
;; search fuzzy matching — Haskell source fragment. Depends on index + rank.
|
||||
;; Levenshtein edit distance (O(m*n) row-based DP — the naive recursive version is
|
||||
;; exponential and far too slow under load) expands a query term to all indexed
|
||||
;; terms within a max distance, then unions / ranks their docs.
|
||||
;; editDist :: String -> String -> Int
|
||||
;; fuzzyTerms :: Int -> String -> Index -> [Term] (sorted)
|
||||
;; fuzzyDocs :: Int -> String -> Index -> [DocId] (sorted union)
|
||||
;; fuzzyRankTfIdf :: Int -> String -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/fuzzy-src
|
||||
"edMin3 a b c = min a (min b c)\nedCost x y = if x == y then 0 else 1\nedUpto i n = if i > n then [] else i : edUpto (i + 1) n\nedLast [x] = x\nedLast (x:xs) = edLast xs\nedNrow x [] prev left = []\nedNrow x (y:ys) prev left = let v = edMin3 (head (tail prev) + 1) (left + 1) (head prev + edCost x y) in v : edNrow x ys (tail prev) v\nedRow x ys prev = let f = head prev + 1 in f : edNrow x ys prev f\nedRows [] ys prev = prev\nedRows (x:xs) ys prev = edRows xs ys (edRow x ys prev)\neditDist xs ys = edLast (edRows xs ys (edUpto 0 (length ys)))\nqWithinDist maxd term t = editDist term t <= maxd\nfuzzyTerms maxd term idx = filter (qWithinDist maxd term) (allTerms idx)\nfuzzyDocs maxd term idx = foldl (candStep idx) [] (fuzzyTerms maxd term idx)\nfuzzyRankTfIdf maxd term idx = rankTfIdf (fuzzyTerms maxd term idx) idx\n")
|
||||
10
lib/search/highlight.sx
Normal file
10
lib/search/highlight.sx
Normal file
@@ -0,0 +1,10 @@
|
||||
;; search highlight / snippet — Haskell source fragment. Depends on tokenize.
|
||||
;; Operates on document text (not the index): marks query-matching tokens with
|
||||
;; [..] and extracts a context window around the first match. Tokens are
|
||||
;; normalized (lowercase, punctuation-stripped) by `tokens`, matching index side.
|
||||
;; highlight :: [Term] -> String -> String
|
||||
;; snippet :: Int -> [Term] -> String -> String (ctx tokens each side of 1st match)
|
||||
|
||||
(define
|
||||
search/highlight-src
|
||||
"hlMark terms t = if elem t terms then \"[\" ++ t ++ \"]\" else t\nhighlight terms text = unwords (map (hlMark terms) (tokens text))\nhlIdxFrom terms [] i = 0 - 1\nhlIdxFrom terms (t:ts) i = if elem t terms then i else hlIdxFrom terms ts (i + 1)\nhlIdx terms toks = hlIdxFrom terms toks 0\nhlMax0 x = if x < 0 then 0 else x\nsnipStart ctx i = if i < 0 then 0 else hlMax0 (i - ctx)\nsnipToks ctx terms toks = unwords (map (hlMark terms) (take (2 * ctx + 1) (drop (snipStart ctx (hlIdx terms toks)) toks)))\nsnippet ctx terms text = snipToks ctx terms (tokens text)\n")
|
||||
15
lib/search/index.sx
Normal file
15
lib/search/index.sx
Normal file
@@ -0,0 +1,15 @@
|
||||
;; search inverted index — Haskell source fragment (depends on tokenize).
|
||||
;; Index = [(Term, [(DocId, [Pos])])], sorted by Term; postings sorted by DocId.
|
||||
;; Data.Map's public API lacks toList/keys/map/filter, so a sorted assoc-list
|
||||
;; index is used — it is the conceptual `Map Term [(DocId,[Pos])]` and exposes
|
||||
;; term iteration (allTerms) and df naturally for ranking.
|
||||
;; emptyIndex :: Index
|
||||
;; indexDoc :: DocId -> String -> Index -> Index (re-index replaces)
|
||||
;; lookupTerm :: Term -> Index -> [(DocId, [Pos])]
|
||||
;; deleteDoc :: DocId -> Index -> Index
|
||||
;; docFreq :: Term -> Index -> Int
|
||||
;; allTerms :: Index -> [Term]
|
||||
|
||||
(define
|
||||
search/index-src
|
||||
"emptyIndex = []\ngroupBump [] t p = [(t, [p])]\ngroupBump (g:gs) t p = if fst g == t then (t, snd g ++ [p]) : gs else g : groupBump gs t p\ngroupStep acc tp = groupBump acc (fst tp) (snd tp)\ngroupTok pairs = foldl groupStep [] pairs\ninsPosting d ps [] = [(d, ps)]\ninsPosting d ps (q:qs) = if d < fst q then (d, ps) : q : qs else if d == fst q then (d, ps) : qs else q : insPosting d ps qs\ninsTerm t d ps [] = [(t, [(d, ps)])]\ninsTerm t d ps (e:es) = if t < fst e then (t, [(d, ps)]) : e : es else if t == fst e then (fst e, insPosting d ps (snd e)) : es else e : insTerm t d ps es\nindexStep d ix tp = insTerm (fst tp) d (snd tp) ix\nindexDoc d text idx = foldl (indexStep d) idx (groupTok (positioned text))\nlookupTerm t idx = case lookup t idx of { Nothing -> []; Just pl -> pl }\ndocFreq t idx = length (lookupTerm t idx)\nallTerms idx = map fst idx\npostingKeep d q = fst q /= d\ndropTermDoc d e = (fst e, filter (postingKeep d) (snd e))\nplKeep e = not (null (snd e))\ndeleteDoc d idx = filter plKeep (map (dropTermDoc d) idx)\n")
|
||||
8
lib/search/near.sx
Normal file
8
lib/search/near.sx
Normal file
@@ -0,0 +1,8 @@
|
||||
;; search proximity (NEAR) — Haskell source fragment. Depends on query (posIn,
|
||||
;; docsWith, sortedInter). Finds docs where two terms occur within k positions of
|
||||
;; each other (unordered), using the positional postings.
|
||||
;; nearDocs :: Int -> Term -> Term -> Index -> [DocId] (sorted)
|
||||
|
||||
(define
|
||||
search/near-src
|
||||
"nrAbsDiff a b = if a > b then a - b else b - a\nnrCloseTo k x [] = False\nnrCloseTo k x (y:ys) = if nrAbsDiff x y <= k then True else nrCloseTo k x ys\nnrAnyClose k [] ys = False\nnrAnyClose k (x:xs) ys = if nrCloseTo k x ys then True else nrAnyClose k xs ys\nnearInDoc k t1 t2 d idx = nrAnyClose k (posIn t1 d idx) (posIn t2 d idx)\nnearHere k t1 t2 idx d = nearInDoc k t1 t2 d idx\nnearDocs k t1 t2 idx = filter (nearHere k t1 t2 idx) (sortedInter (docsWith t1 idx) (docsWith t2 idx))\n")
|
||||
11
lib/search/page.sx
Normal file
11
lib/search/page.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
;; search pagination — Haskell source fragment. Depends on rank.
|
||||
;; Windows a ranked result list by offset/limit (offset >= length -> empty;
|
||||
;; limit clamps to what remains).
|
||||
;; paginate :: Int -> Int -> [DocId] -> [DocId] (offset, limit)
|
||||
;; pageTfIdf :: Int -> Int -> [Term] -> Index -> [DocId]
|
||||
;; pageBm25 :: Int -> Int -> Float -> Float -> [Term] -> Index -> [DocId]
|
||||
;; resultCount :: [Term] -> Index -> Int
|
||||
|
||||
(define
|
||||
search/page-src
|
||||
"paginate off lim docs = take lim (drop off docs)\npageTfIdf off lim ts idx = paginate off lim (rankTfIdf ts idx)\npageBm25 off lim k1 b ts idx = paginate off lim (rankBm25 k1 b ts idx)\nresultCount ts idx = length (rankTfIdf ts idx)\n")
|
||||
18
lib/search/parse.sx
Normal file
18
lib/search/parse.sx
Normal file
@@ -0,0 +1,18 @@
|
||||
;; search query parser — Haskell source fragment. Depends on tokenize + query.
|
||||
;; Grammar (precedence OR < AND < NOT):
|
||||
;; expr = orExpr
|
||||
;; orExpr = andExpr (OR andExpr)*
|
||||
;; andExpr= notExpr ((AND | <implicit>) notExpr)* -- adjacency means AND
|
||||
;; notExpr= NOT notExpr | atom
|
||||
;; atom = '(' expr ')' | '"' word+ '"' | word
|
||||
;; Keywords AND/OR/NOT are case-insensitive; bare words are normalized via tokens.
|
||||
;; Gotchas: delimiters matched by ord (escaped char literals like '\"' break the
|
||||
;; haskell-on-sx tokenizer); an [] *pattern* inside a `case` alt also breaks the
|
||||
;; parser, so qNormTerm/qDropRP/showQ are written as multi-clause functions.
|
||||
;; parseQuery :: String -> Query
|
||||
;; searchQuery :: String -> Index -> [DocId]
|
||||
;; showQ :: Query -> String -- canonical render for tests/debug
|
||||
|
||||
(define
|
||||
search/parse-src
|
||||
"data QTok = TAnd | TOr | TNot | TLP | TRP | TWord String | TPhrase [String]\nqIsSpace c = ord c == 32\nqIsLP c = ord c == 40\nqIsRP c = ord c == 41\nqIsQuote c = ord c == 34\nqDelim c = qIsSpace c || qIsLP c || qIsRP c || qIsQuote c\nqReadWord [] = ([], [])\nqReadWord (c:cs) = if qDelim c then ([], c:cs) else let (w, rest) = qReadWord cs in (c:w, rest)\nqReadPhrase [] = ([], [])\nqReadPhrase (c:cs) = if qIsQuote c then ([], cs) else let (w, rest) = qReadPhrase cs in (c:w, rest)\ntoUpperCh c = chr (toUpper (ord c))\nqUpper w = joinChars (map toUpperCh w)\nqFirstTok [] = \"\"\nqFirstTok (x:xs) = x\nqNormTerm w = qFirstTok (tokens w)\nqClassify w = if qUpper w == \"AND\" then TAnd else if qUpper w == \"OR\" then TOr else if qUpper w == \"NOT\" then TNot else TWord (qNormTerm w)\nqPhraseTok cs = let (p, rest) = qReadPhrase cs in TPhrase (tokens p) : qtokens rest\nqWordTok cs = let (w, rest) = qReadWord cs in qClassify w : qtokens rest\nqtokens [] = []\nqtokens (c:cs) = if qIsSpace c then qtokens cs else if qIsLP c then TLP : qtokens cs else if qIsRP c then TRP : qtokens cs else if qIsQuote c then qPhraseTok cs else qWordTok (c:cs)\nqDropRP (q, (TRP:rest)) = (q, rest)\nqDropRP (q, ts) = (q, ts)\nparseAtom [] = (Term \"\", [])\nparseAtom (TLP:ts) = qDropRP (parseExpr ts)\nparseAtom (TPhrase ps : ts) = (Phrase ps, ts)\nparseAtom (TWord w : ts) = (Term w, ts)\nparseAtom ts = (Term \"\", ts)\nqWrapNot (q, ts) = (Not q, ts)\nparseNot (TNot:ts) = qWrapNot (parseNot ts)\nparseNot ts = parseAtom ts\nqStartsAtom (TWord w : ts) = True\nqStartsAtom (TPhrase p : ts) = True\nqStartsAtom (TLP : ts) = True\nqStartsAtom (TNot : ts) = True\nqStartsAtom ts = False\nqAndStep left ts = let (r, rest) = parseNot ts in parseAndR (And left r) rest\nparseAndR left (TAnd:ts) = qAndStep left ts\nparseAndR left ts = if qStartsAtom ts then qAndStep left ts else (left, ts)\nparseAnd ts = let (l, rest) = parseNot ts in parseAndR l rest\nparseOrR left (TOr:ts) = let (r, rest) = parseAnd ts in parseOrR (Or left r) rest\nparseOrR left ts = (left, ts)\nparseExpr ts = let (l, rest) = parseAnd ts in parseOrR l rest\nparseQuery s = fst (parseExpr (qtokens s))\nsearchQuery s idx = evalQuery idx (parseQuery s)\njoinSp [] = \"\"\njoinSp [x] = x\njoinSp (x:xs) = x ++ \"-\" ++ joinSp xs\nshowQ (Term t) = \"T:\" ++ t\nshowQ (And a b) = \"(\" ++ showQ a ++ \" & \" ++ showQ b ++ \")\"\nshowQ (Or a b) = \"(\" ++ showQ a ++ \" | \" ++ showQ b ++ \")\"\nshowQ (Not a) = \"!\" ++ showQ a\nshowQ (Phrase ts) = \"P:\" ++ joinSp ts\n")
|
||||
10
lib/search/prefix.sx
Normal file
10
lib/search/prefix.sx
Normal file
@@ -0,0 +1,10 @@
|
||||
;; search prefix / wildcard queries — Haskell source fragment. Depends on index +
|
||||
;; rank (reuses candStep / rankTfIdf). A prefix matches every indexed term that
|
||||
;; starts with it; the matching terms are unioned (OR) into a docid set.
|
||||
;; prefixTerms :: String -> Index -> [Term] (sorted, from allTerms)
|
||||
;; prefixDocs :: String -> Index -> [DocId] (sorted union)
|
||||
;; prefixRankTfIdf :: String -> Index -> [DocId] (ranked by the matched terms)
|
||||
|
||||
(define
|
||||
search/prefix-src
|
||||
"prefixTerms pre idx = filter (isPrefixOf pre) (allTerms idx)\nprefixDocs pre idx = foldl (candStep idx) [] (prefixTerms pre idx)\nprefixRankTfIdf pre idx = rankTfIdf (prefixTerms pre idx) idx\n")
|
||||
11
lib/search/query.sx
Normal file
11
lib/search/query.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
;; search query AST + boolean/phrase evaluation — Haskell source fragment.
|
||||
;; Depends on tokenize + index.
|
||||
;; data Query = Term String | And Query Query | Or Query Query
|
||||
;; | Not Query | Phrase [String]
|
||||
;; evalQuery :: Index -> Query -> [DocId] (sorted, unique)
|
||||
;; Boolean ops are linear merges over docid-sorted posting lists; Not uses
|
||||
;; allDocs as the universe; Phrase checks positional adjacency.
|
||||
|
||||
(define
|
||||
search/query-src
|
||||
"data Query = Term String | And Query Query | Or Query Query | Not Query | Phrase [String]\ndocsWith t idx = map fst (lookupTerm t idx)\nsortedUnion [] ys = ys\nsortedUnion xs [] = xs\nsortedUnion (x:xs) (y:ys) = if x < y then x : sortedUnion xs (y:ys) else if x > y then y : sortedUnion (x:xs) ys else x : sortedUnion xs ys\nsortedInter [] ys = []\nsortedInter xs [] = []\nsortedInter (x:xs) (y:ys) = if x < y then sortedInter xs (y:ys) else if x > y then sortedInter (x:xs) ys else x : sortedInter xs ys\nsortedDiff [] ys = []\nsortedDiff xs [] = xs\nsortedDiff (x:xs) (y:ys) = if x < y then x : sortedDiff xs (y:ys) else if x > y then sortedDiff (x:xs) ys else sortedDiff xs ys\nmergeDocs acc e = sortedUnion acc (map fst (snd e))\nallDocs idx = foldl mergeDocs [] idx\nposIn t d idx = case lookup d (lookupTerm t idx) of { Nothing -> []; Just ps -> ps }\nelemSorted x [] = False\nelemSorted x (y:ys) = if x == y then True else if x < y then False else elemSorted x ys\nphraseAtAll [] d idx p i = True\nphraseAtAll (t:ts) d idx p i = if elemSorted (p + i) (posIn t d idx) then phraseAtAll ts d idx p (i + 1) else False\nphraseStartsAt ts d idx p = phraseAtAll ts d idx p 0\nphraseInDoc [] d idx = True\nphraseInDoc (t0:rest) d idx = any (phraseStartsAt (t0:rest) d idx) (posIn t0 d idx)\nphraseHere ts idx d = phraseInDoc ts d idx\ninterStep idx acc tt = sortedInter acc (docsWith tt idx)\nphraseCands [] idx = allDocs idx\nphraseCands (t:ts) idx = foldl (interStep idx) (docsWith t idx) ts\nphraseDocs ts idx = filter (phraseHere ts idx) (phraseCands ts idx)\nevalQuery idx q = case q of { Term t -> docsWith t idx ; And a b -> sortedInter (evalQuery idx a) (evalQuery idx b) ; Or a b -> sortedUnion (evalQuery idx a) (evalQuery idx b) ; Not a -> sortedDiff (allDocs idx) (evalQuery idx a) ; Phrase ts -> phraseDocs ts idx }\n")
|
||||
14
lib/search/rank.sx
Normal file
14
lib/search/rank.sx
Normal file
@@ -0,0 +1,14 @@
|
||||
;; search ranking — Haskell source fragment. Depends on tokenize + index + query.
|
||||
;; Ranked retrieval over the candidate set (docs containing any query term).
|
||||
;; Scores are floats; ties broken by DocId ascending (deterministic).
|
||||
;; numDocs :: Index -> Int
|
||||
;; docFreq :: Term -> Index -> Int (from index)
|
||||
;; docLen :: DocId -> Index -> Int
|
||||
;; rankTfIdf :: [Term] -> Index -> [DocId]
|
||||
;; topNTfIdf :: Int -> [Term] -> Index -> [DocId]
|
||||
;; rankBm25 :: Float -> Float -> [Term] -> Index -> [DocId] (k1, b)
|
||||
;; topNBm25 :: Int -> Float -> Float -> [Term] -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/rank-src
|
||||
"numDocs idx = length (allDocs idx)\ntfIn t d idx = length (posIn t d idx)\nqIdf n df = if df == 0 then 0 else log (n / df)\nidf t idx = qIdf (numDocs idx) (docFreq t idx)\ntermScoreTf idx d t = tfIn t d idx * idf t idx\ntfidfDoc ts idx d = sum (map (termScoreTf idx d) ts)\ncandStep idx acc t = sortedUnion acc (docsWith t idx)\ncandDocs ts idx = foldl (candStep idx) [] ts\ncmpScore p1 p2 = if fst p1 > fst p2 then LT else if fst p1 < fst p2 then GT else compare (snd p1) (snd p2)\nmkPair f ts idx d = (f ts idx d, d)\nrankWith f ts idx = map snd (sortBy cmpScore (map (mkPair f ts idx) (candDocs ts idx)))\nrankTfIdf ts idx = rankWith tfidfDoc ts idx\ntopNTfIdf n ts idx = take n (rankTfIdf ts idx)\ntfAt d idx t = tfIn t d idx\ndocLen d idx = sum (map (tfAt d idx) (allTerms idx))\nlenAt idx d = docLen d idx\navgDocLen idx = sum (map (lenAt idx) (allDocs idx)) / numDocs idx\nbm25idf t idx = log ((numDocs idx - docFreq t idx + 0.5) / (docFreq t idx + 0.5) + 1)\nbm25Term k1 b avgdl idx d t = bm25idf t idx * (tfIn t d idx * (k1 + 1)) / (tfIn t d idx + k1 * (1 - b + b * docLen d idx / avgdl))\nbm25Doc k1 b ts idx d = sum (map (bm25Term k1 b (avgDocLen idx) idx d) ts)\nrankBm25 k1 b ts idx = rankWith (bm25Doc k1 b) ts idx\ntopNBm25 n k1 b ts idx = take n (rankBm25 k1 b ts idx)\n")
|
||||
11
lib/search/rankq.sx
Normal file
11
lib/search/rankq.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
;; search boolean-filtered ranked search — Haskell source fragment.
|
||||
;; Depends on parse (parseQuery/Query), query (evalQuery), rank (tfidfDoc/bm25Doc/
|
||||
;; cmpScore). Filters by the boolean query, then ranks the surviving docs by
|
||||
;; relevance over the query's leaf terms — the real-world filter-then-rank pattern.
|
||||
;; queryTerms :: Query -> [Term]
|
||||
;; searchRankTfIdf :: String -> Index -> [DocId]
|
||||
;; searchRankBm25 :: Float -> Float -> String -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/rankq-src
|
||||
"queryTerms (Term t) = [t]\nqueryTerms (And a b) = queryTerms a ++ queryTerms b\nqueryTerms (Or a b) = queryTerms a ++ queryTerms b\nqueryTerms (Not a) = queryTerms a\nqueryTerms (Phrase ts) = ts\nmkSubPair f terms idx d = (f terms idx d, d)\nrankSubsetWith f terms docs idx = map snd (sortBy cmpScore (map (mkSubPair f terms idx) docs))\nsearchRankTfIdf s idx = let q = parseQuery s in rankSubsetWith tfidfDoc (queryTerms q) (evalQuery idx q) idx\nsearchRankBm25 k1 b s idx = let q = parseQuery s in rankSubsetWith (bm25Doc k1 b) (queryTerms q) (evalQuery idx q) idx\n")
|
||||
23
lib/search/scoreboard.json
Normal file
23
lib/search/scoreboard.json
Normal file
@@ -0,0 +1,23 @@
|
||||
{
|
||||
"lang": "search",
|
||||
"total_passed": 234,
|
||||
"total_failed": 0,
|
||||
"total": 234,
|
||||
"suites": [
|
||||
{"name":"index","passed":18,"failed":0,"total":18},
|
||||
{"name":"boolean","passed":28,"failed":0,"total":28},
|
||||
{"name":"parse","passed":32,"failed":0,"total":32},
|
||||
{"name":"rank","passed":23,"failed":0,"total":23},
|
||||
{"name":"integration","passed":21,"failed":0,"total":21},
|
||||
{"name":"prefix","passed":14,"failed":0,"total":14},
|
||||
{"name":"page","passed":12,"failed":0,"total":12},
|
||||
{"name":"fuzzy","passed":18,"failed":0,"total":18},
|
||||
{"name":"highlight","passed":12,"failed":0,"total":12},
|
||||
{"name":"stem","passed":18,"failed":0,"total":18},
|
||||
{"name":"near","passed":9,"failed":0,"total":9},
|
||||
{"name":"syn","passed":9,"failed":0,"total":9},
|
||||
{"name":"rankq","passed":11,"failed":0,"total":11},
|
||||
{"name":"suggest","passed":9,"failed":0,"total":9}
|
||||
],
|
||||
"generated": "2026-06-07T00:44:05+00:00"
|
||||
}
|
||||
20
lib/search/scoreboard.md
Normal file
20
lib/search/scoreboard.md
Normal file
@@ -0,0 +1,20 @@
|
||||
# search scoreboard
|
||||
|
||||
**234 / 234 passing** (0 failure(s)).
|
||||
|
||||
| Suite | Passed | Total | Status |
|
||||
|-------|--------|-------|--------|
|
||||
| index | 18 | 18 | ok |
|
||||
| boolean | 28 | 28 | ok |
|
||||
| parse | 32 | 32 | ok |
|
||||
| rank | 23 | 23 | ok |
|
||||
| integration | 21 | 21 | ok |
|
||||
| prefix | 14 | 14 | ok |
|
||||
| page | 12 | 12 | ok |
|
||||
| fuzzy | 18 | 18 | ok |
|
||||
| highlight | 12 | 12 | ok |
|
||||
| stem | 18 | 18 | ok |
|
||||
| near | 9 | 9 | ok |
|
||||
| syn | 9 | 9 | ok |
|
||||
| rankq | 11 | 11 | ok |
|
||||
| suggest | 9 | 9 | ok |
|
||||
15
lib/search/stem.sx
Normal file
15
lib/search/stem.sx
Normal file
@@ -0,0 +1,15 @@
|
||||
;; search stemming — Haskell source fragment. Depends on tokenize + index.
|
||||
;; Lightweight, deterministic English suffix stripping (recall-improving
|
||||
;; normalizer). Rules are checked most-specific first; conservative length guards
|
||||
;; avoid mangling short words. Not a full Porter stemmer.
|
||||
;; Gotcha: take/drop over a String yield char CODES (ints), not char strings, so
|
||||
;; rebuild strings with `stStr = joinChars . map chr`. (isSuffixOf's reverse also
|
||||
;; trips `++` on the String representation, hence the manual stEnds.)
|
||||
;; stem :: String -> String
|
||||
;; stemText :: String -> String (tokenize + stem + rejoin)
|
||||
;; stemTokens :: String -> [String]
|
||||
;; indexStemmed:: DocId -> String -> Index -> Index (index the stemmed text)
|
||||
|
||||
(define
|
||||
search/stem-src
|
||||
"stStr cs = joinChars (map chr cs)\nstEnds suf w = let n = length w in let m = length suf in if m > n then False else stStr (drop (n - m) w) == suf\nstDropEnd k w = stStr (take (length w - k) w)\nstem w = if stEnds \"ies\" w && length w >= 5 then stDropEnd 3 w ++ \"y\" else if stEnds \"ss\" w then w else if stEnds \"es\" w && length w >= 5 then stDropEnd 2 w else if stEnds \"s\" w && length w >= 4 then stDropEnd 1 w else if stEnds \"ing\" w && length w >= 6 then stDropEnd 3 w else if stEnds \"ed\" w && length w >= 5 then stDropEnd 2 w else w\nstemTokens s = map stem (tokens s)\nstemText s = unwords (stemTokens s)\nindexStemmed d text idx = indexDoc d (stemText text) idx\n")
|
||||
9
lib/search/suggest.sx
Normal file
9
lib/search/suggest.sx
Normal file
@@ -0,0 +1,9 @@
|
||||
;; search did-you-mean / spelling suggestion — Haskell source fragment.
|
||||
;; Depends on fuzzy (editDist) + index (allTerms). Ranks indexed terms by edit
|
||||
;; distance to a (possibly misspelled) query term; ties broken alphabetically.
|
||||
;; suggestN :: Int -> String -> Index -> [Term]
|
||||
;; suggest :: String -> Index -> Term ("" if the index has no terms)
|
||||
|
||||
(define
|
||||
search/suggest-src
|
||||
"sgMk term t = (editDist term t, t)\nsgPairs term idx = map (sgMk term) (allTerms idx)\nsgCmp p1 p2 = if fst p1 < fst p2 then LT else if fst p1 > fst p2 then GT else compare (snd p1) (snd p2)\nsuggestN n term idx = take n (map snd (sortBy sgCmp (sgPairs term idx)))\nsgHead [] = \"\"\nsgHead (x:xs) = x\nsuggest term idx = sgHead (suggestN 1 term idx)\n")
|
||||
10
lib/search/syn.sx
Normal file
10
lib/search/syn.sx
Normal file
@@ -0,0 +1,10 @@
|
||||
;; search synonym / query expansion — Haskell source fragment. Depends on index +
|
||||
;; rank. A synonym map is an assoc list [(Term, [Term])]; a query term is expanded
|
||||
;; to itself plus its synonyms, then the expanded set is unioned / ranked.
|
||||
;; expandTerm :: [(Term,[Term])] -> Term -> [Term]
|
||||
;; synDocs :: [(Term,[Term])] -> Term -> Index -> [DocId]
|
||||
;; synRankTfIdf :: [(Term,[Term])] -> Term -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/syn-src
|
||||
"synLookup synmap t = case lookup t synmap of { Nothing -> [] ; Just ss -> ss }\nexpandTerm synmap t = t : synLookup synmap t\nsynDocs synmap t idx = foldl (candStep idx) [] (expandTerm synmap t)\nsynRankTfIdf synmap t idx = rankTfIdf (expandTerm synmap t) idx\n")
|
||||
50
lib/search/testlib.sx
Normal file
50
lib/search/testlib.sx
Normal file
@@ -0,0 +1,50 @@
|
||||
;; search test helpers — convert forced haskell values to plain SX and run
|
||||
;; programs built on top of search/src. Reuses hk-test / counters from
|
||||
;; lib/haskell/testlib.sx (preloaded by the conformance config).
|
||||
|
||||
;; Recursively turn a forced HK value into plain SX:
|
||||
;; cons-list -> SX list, Tuple -> SX list, leaves unchanged.
|
||||
(define
|
||||
search-hk->sx
|
||||
(fn
|
||||
(v)
|
||||
(cond
|
||||
((and (list? v) (not (empty? v)) (= (first v) "[]")) (list))
|
||||
((and (list? v) (not (empty? v)) (= (first v) ":"))
|
||||
(cons
|
||||
(search-hk->sx (nth v 1))
|
||||
(search-hk->sx (nth v 2))))
|
||||
((and (list? v) (not (empty? v)) (= (first v) "Tuple"))
|
||||
(map search-hk->sx (rest v)))
|
||||
(:else v))))
|
||||
|
||||
;; Evaluate `extra` (extra top-level Haskell bindings) on top of search/src
|
||||
;; and return binding `name` as plain SX.
|
||||
(define
|
||||
search-eval
|
||||
(fn
|
||||
(extra name)
|
||||
(search-hk->sx
|
||||
(hk-deep-force
|
||||
(get (hk-eval-program (hk-core (str search/src extra))) name)))))
|
||||
|
||||
(define
|
||||
search-join
|
||||
(fn
|
||||
(sep xs)
|
||||
(cond
|
||||
((empty? xs) "")
|
||||
((empty? (rest xs)) (first xs))
|
||||
(:else (str (first xs) sep (search-join sep (rest xs)))))))
|
||||
|
||||
;; Batch many haskell expressions into ONE program evaluation (amortizes the
|
||||
;; cost of parsing/binding search/src — important under heavy CPU load).
|
||||
;; `setup` is extra top-level Haskell; `exprs` is a list of expression strings
|
||||
;; whose results form a single haskell list. Returns the SX list of results.
|
||||
(define
|
||||
search-batch
|
||||
(fn
|
||||
(setup exprs)
|
||||
(search-eval
|
||||
(str setup "\nresult = [" (search-join ", " exprs) "]\n")
|
||||
"result")))
|
||||
123
lib/search/tests/boolean.sx
Normal file
123
lib/search/tests/boolean.sx
Normal file
@@ -0,0 +1,123 @@
|
||||
;; Phase 2 — query AST + boolean/phrase evaluation (hand-built Query values).
|
||||
;; Corpus:
|
||||
;; doc 1 "the quick brown dog" -> the quick brown dog
|
||||
;; doc 2 "a quick brown fox" -> a quick brown fox
|
||||
;; doc 3 "the dog barks loudly" -> the dog barks loudly
|
||||
;; All queries run in ONE program evaluation (search-batch) to stay fast.
|
||||
|
||||
(define
|
||||
search-corpus
|
||||
"idx = indexDoc 3 \"the dog barks loudly\" (indexDoc 2 \"a quick brown fox\" (indexDoc 1 \"the quick brown dog\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
bool-cases
|
||||
(list
|
||||
(list
|
||||
"term in two docs"
|
||||
"evalQuery idx (Term \"quick\")"
|
||||
(list 1 2))
|
||||
(list
|
||||
"term in two docs (the)"
|
||||
"evalQuery idx (Term \"the\")"
|
||||
(list 1 3))
|
||||
(list "term in one doc" "evalQuery idx (Term \"fox\")" (list 2))
|
||||
(list "term absent" "evalQuery idx (Term \"zzz\")" (list))
|
||||
(list
|
||||
"term case-sensitive at AST level"
|
||||
"evalQuery idx (Term \"QUICK\")"
|
||||
(list))
|
||||
(list "term on empty index" "evalQuery emptyIndex (Term \"cat\")" (list))
|
||||
(list
|
||||
"and both terms"
|
||||
"evalQuery idx (And (Term \"quick\") (Term \"brown\"))"
|
||||
(list 1 2))
|
||||
(list
|
||||
"and overlap subset"
|
||||
"evalQuery idx (And (Term \"the\") (Term \"dog\"))"
|
||||
(list 1 3))
|
||||
(list
|
||||
"and disjoint is empty"
|
||||
"evalQuery idx (And (Term \"the\") (Term \"fox\"))"
|
||||
(list))
|
||||
(list
|
||||
"and right-nested"
|
||||
"evalQuery idx (And (Term \"the\") (And (Term \"dog\") (Term \"barks\")))"
|
||||
(list 3))
|
||||
(list
|
||||
"or two singletons"
|
||||
"evalQuery idx (Or (Term \"fox\") (Term \"barks\"))"
|
||||
(list 2 3))
|
||||
(list
|
||||
"or all docs"
|
||||
"evalQuery idx (Or (Term \"quick\") (Term \"the\"))"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"or with absent term"
|
||||
"evalQuery idx (Or (Term \"fox\") (Term \"zzz\"))"
|
||||
(list 2))
|
||||
(list "not term" "evalQuery idx (Not (Term \"the\"))" (list 2))
|
||||
(list "not term 2" "evalQuery idx (Not (Term \"quick\"))" (list 3))
|
||||
(list
|
||||
"and with not"
|
||||
"evalQuery idx (And (Term \"quick\") (Not (Term \"the\")))"
|
||||
(list 2))
|
||||
(list
|
||||
"double negation"
|
||||
"evalQuery idx (Not (Not (Term \"fox\")))"
|
||||
(list 2))
|
||||
(list
|
||||
"or of and with term"
|
||||
"evalQuery idx (Or (And (Term \"the\") (Term \"dog\")) (Term \"fox\"))"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"phrase adjacent both docs"
|
||||
"evalQuery idx (Phrase [\"quick\", \"brown\"])"
|
||||
(list 1 2))
|
||||
(list
|
||||
"phrase adjacent one doc"
|
||||
"evalQuery idx (Phrase [\"brown\", \"dog\"])"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase the quick"
|
||||
"evalQuery idx (Phrase [\"the\", \"quick\"])"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase dog barks"
|
||||
"evalQuery idx (Phrase [\"dog\", \"barks\"])"
|
||||
(list 3))
|
||||
(list
|
||||
"phrase non-adjacent empty"
|
||||
"evalQuery idx (Phrase [\"quick\", \"dog\"])"
|
||||
(list))
|
||||
(list
|
||||
"phrase order matters"
|
||||
"evalQuery idx (Phrase [\"brown\", \"quick\"])"
|
||||
(list))
|
||||
(list
|
||||
"phrase single term"
|
||||
"evalQuery idx (Phrase [\"dog\"])"
|
||||
(list 1 3))
|
||||
(list
|
||||
"phrase three terms"
|
||||
"evalQuery idx (Phrase [\"the\", \"dog\", \"barks\"])"
|
||||
(list 3))
|
||||
(list
|
||||
"and of phrase and term"
|
||||
"evalQuery idx (And (Phrase [\"quick\", \"brown\"]) (Term \"dog\"))"
|
||||
(list 1))
|
||||
(list
|
||||
"not of phrase"
|
||||
"evalQuery idx (Not (Phrase [\"quick\", \"brown\"]))"
|
||||
(list 3))))
|
||||
|
||||
(define
|
||||
bool-results
|
||||
(search-batch search-corpus (map (fn (c) (nth c 1)) bool-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth bool-results i) (nth c 2)))
|
||||
bool-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
74
lib/search/tests/fuzzy.sx
Normal file
74
lib/search/tests/fuzzy.sx
Normal file
@@ -0,0 +1,74 @@
|
||||
;; Extension — fuzzy matching via Levenshtein edit distance.
|
||||
;; Corpus: 1 "color flavor" 2 "colour kitten" 3 "colored"
|
||||
;; allTerms: color colored colour flavor kitten
|
||||
|
||||
(define
|
||||
fuzzy-setup
|
||||
"idx = indexDoc 3 \"colored\" (indexDoc 2 \"colour kitten\" (indexDoc 1 \"color flavor\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
fuzzy-cases
|
||||
(list
|
||||
(list
|
||||
"editDist substitution"
|
||||
"[editDist \"kitten\" \"sitten\"]"
|
||||
(list 1))
|
||||
(list "editDist equal" "[editDist \"abc\" \"abc\"]" (list 0))
|
||||
(list "editDist deletion" "[editDist \"abc\" \"ab\"]" (list 1))
|
||||
(list "editDist insertion" "[editDist \"ab\" \"abc\"]" (list 1))
|
||||
(list "editDist from empty" "[editDist \"\" \"abc\"]" (list 3))
|
||||
(list "editDist both empty" "[editDist \"\" \"\"]" (list 0))
|
||||
(list
|
||||
"editDist classic"
|
||||
"[editDist \"kitten\" \"sitting\"]"
|
||||
(list 3))
|
||||
(list
|
||||
"editDist color colour"
|
||||
"[editDist \"color\" \"colour\"]"
|
||||
(list 1))
|
||||
(list
|
||||
"editDist color colored"
|
||||
"[editDist \"color\" \"colored\"]"
|
||||
(list 2))
|
||||
(list
|
||||
"fuzzy terms dist 1"
|
||||
"fuzzyTerms 1 \"color\" idx"
|
||||
(list "color" "colour"))
|
||||
(list
|
||||
"fuzzy terms dist 2"
|
||||
"fuzzyTerms 2 \"color\" idx"
|
||||
(list "color" "colored" "colour"))
|
||||
(list "fuzzy terms exact" "fuzzyTerms 0 \"color\" idx" (list "color"))
|
||||
(list
|
||||
"fuzzy terms other word"
|
||||
"fuzzyTerms 1 \"flavour\" idx"
|
||||
(list "flavor"))
|
||||
(list
|
||||
"fuzzy docs dist 1"
|
||||
"fuzzyDocs 1 \"color\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"fuzzy docs dist 2"
|
||||
"fuzzyDocs 2 \"color\" idx"
|
||||
(list 1 2 3))
|
||||
(list "fuzzy docs none" "fuzzyDocs 1 \"zzzzz\" idx" (list))
|
||||
(list
|
||||
"fuzzy rank dist 1"
|
||||
"fuzzyRankTfIdf 1 \"color\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"fuzzy rank dist 2"
|
||||
"fuzzyRankTfIdf 2 \"color\" idx"
|
||||
(list 1 2 3))))
|
||||
|
||||
(define
|
||||
fuzzy-results
|
||||
(search-batch fuzzy-setup (map (fn (c) (nth c 1)) fuzzy-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth fuzzy-results i) (nth c 2)))
|
||||
fuzzy-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
66
lib/search/tests/highlight.sx
Normal file
66
lib/search/tests/highlight.sx
Normal file
@@ -0,0 +1,66 @@
|
||||
;; Extension — highlight + snippet over document text.
|
||||
;; Text: "the quick brown fox jumps"
|
||||
|
||||
(define
|
||||
hl-cases
|
||||
(list
|
||||
(list
|
||||
"highlight two terms"
|
||||
"highlight [\"quick\", \"fox\"] \"the quick brown fox jumps\""
|
||||
"the [quick] brown [fox] jumps")
|
||||
(list
|
||||
"highlight none"
|
||||
"highlight [] \"the quick brown fox jumps\""
|
||||
"the quick brown fox jumps")
|
||||
(list
|
||||
"highlight absent term"
|
||||
"highlight [\"zzz\"] \"the quick brown fox jumps\""
|
||||
"the quick brown fox jumps")
|
||||
(list
|
||||
"highlight first token"
|
||||
"highlight [\"the\"] \"the quick brown fox jumps\""
|
||||
"[the] quick brown fox jumps")
|
||||
(list
|
||||
"highlight normalizes text"
|
||||
"highlight [\"quick\"] \"The Quick, brown!\""
|
||||
"the [quick] brown")
|
||||
(list
|
||||
"snippet around middle"
|
||||
"snippet 1 [\"brown\"] \"the quick brown fox jumps\""
|
||||
"quick [brown] fox")
|
||||
(list
|
||||
"snippet at start"
|
||||
"snippet 1 [\"the\"] \"the quick brown fox jumps\""
|
||||
"[the] quick brown")
|
||||
(list
|
||||
"snippet near end"
|
||||
"snippet 1 [\"fox\"] \"the quick brown fox jumps\""
|
||||
"brown [fox] jumps")
|
||||
(list
|
||||
"snippet ctx zero"
|
||||
"snippet 0 [\"brown\"] \"the quick brown fox jumps\""
|
||||
"[brown]")
|
||||
(list
|
||||
"snippet clamps at end"
|
||||
"snippet 2 [\"jumps\"] \"the quick brown fox jumps\""
|
||||
"brown fox [jumps]")
|
||||
(list
|
||||
"snippet no match shows head"
|
||||
"snippet 1 [\"zzz\"] \"the quick brown fox jumps\""
|
||||
"the quick brown")
|
||||
(list
|
||||
"snippet wide window"
|
||||
"snippet 5 [\"brown\"] \"the quick brown fox jumps\""
|
||||
"the quick [brown] fox jumps")))
|
||||
|
||||
(define
|
||||
hl-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) hl-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth hl-results i) (nth c 2)))
|
||||
hl-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
88
lib/search/tests/index.sx
Normal file
88
lib/search/tests/index.sx
Normal file
@@ -0,0 +1,88 @@
|
||||
;; Phase 1 — tokenize + inverted index.
|
||||
;; All cases run in ONE program evaluation (search-batch) to stay fast under load.
|
||||
;; Scalar results (docFreq) are wrapped as singleton lists so the batch is a list
|
||||
;; of lists.
|
||||
|
||||
(define
|
||||
index-cases
|
||||
(list
|
||||
(list
|
||||
"tokens basic lowercases"
|
||||
"tokens \"The Cat sat\""
|
||||
(list "the" "cat" "sat"))
|
||||
(list
|
||||
"tokens strips punctuation"
|
||||
"tokens \"Hello, World!\""
|
||||
(list "hello" "world"))
|
||||
(list "tokens collapses whitespace" "tokens \" a b \"" (list "a" "b"))
|
||||
(list "tokens empty is empty" "tokens \"\"" (list))
|
||||
(list "tokens keeps digits" "tokens \"abc123 x9\"" (list "abc123" "x9"))
|
||||
(list
|
||||
"positioned attaches ordinals"
|
||||
"positioned \"a b a\""
|
||||
(list
|
||||
(list "a" 0)
|
||||
(list "b" 1)
|
||||
(list "a" 2)))
|
||||
(list
|
||||
"index + lookup single doc"
|
||||
"lookupTerm \"cat\" (indexDoc 1 \"the cat sat\" emptyIndex)"
|
||||
(list (list 1 (list 1))))
|
||||
(list
|
||||
"lookup missing term is empty"
|
||||
"lookupTerm \"dog\" (indexDoc 1 \"the cat sat\" emptyIndex)"
|
||||
(list))
|
||||
(list
|
||||
"lookup records all positions"
|
||||
"lookupTerm \"the\" (indexDoc 1 \"the cat the dog the\" emptyIndex)"
|
||||
(list (list 1 (list 0 2 4))))
|
||||
(list
|
||||
"multi-doc posting list sorted by docid"
|
||||
"lookupTerm \"x\" (indexDoc 1 \"x y\" (indexDoc 2 \"x z\" emptyIndex))"
|
||||
(list
|
||||
(list 1 (list 0))
|
||||
(list 2 (list 0))))
|
||||
(list
|
||||
"index/query case symmetry"
|
||||
"lookupTerm \"cat\" (indexDoc 1 \"CAT Cat cat\" emptyIndex)"
|
||||
(list (list 1 (list 0 1 2))))
|
||||
(list
|
||||
"re-index replaces a doc"
|
||||
"lookupTerm \"a\" (indexDoc 1 \"a a a\" (indexDoc 1 \"a\" emptyIndex))"
|
||||
(list (list 1 (list 0 1 2))))
|
||||
(list
|
||||
"delete removes a doc"
|
||||
"lookupTerm \"cat\" (deleteDoc 1 (indexDoc 1 \"the cat\" emptyIndex))"
|
||||
(list))
|
||||
(list
|
||||
"delete leaves other docs"
|
||||
"lookupTerm \"cat\" (deleteDoc 2 (indexDoc 2 \"big cat\" (indexDoc 1 \"the cat\" emptyIndex)))"
|
||||
(list (list 1 (list 1))))
|
||||
(list
|
||||
"docFreq counts docs"
|
||||
"[docFreq \"cat\" (indexDoc 2 \"a cat\" (indexDoc 1 \"the cat\" emptyIndex))]"
|
||||
(list 2))
|
||||
(list
|
||||
"docFreq zero for missing"
|
||||
"[docFreq \"zzz\" (indexDoc 1 \"a b\" emptyIndex)]"
|
||||
(list 0))
|
||||
(list
|
||||
"allTerms sorted and unique"
|
||||
"allTerms (indexDoc 1 \"banana apple cherry apple\" emptyIndex)"
|
||||
(list "apple" "banana" "cherry"))
|
||||
(list
|
||||
"allTerms merged across docs"
|
||||
"allTerms (indexDoc 2 \"d a\" (indexDoc 1 \"c b\" emptyIndex))"
|
||||
(list "a" "b" "c" "d"))))
|
||||
|
||||
(define
|
||||
index-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) index-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth index-results i) (nth c 2)))
|
||||
index-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
102
lib/search/tests/integration.sx
Normal file
102
lib/search/tests/integration.sx
Normal file
@@ -0,0 +1,102 @@
|
||||
;; Phase 4 — federation (merge per-peer indices) + ACL post-filter.
|
||||
;; Peers (global id = peer*1000 + local):
|
||||
;; peer 1: 1 "alpha beta" 2 "alpha gamma" -> 1001 1002
|
||||
;; peer 2: 1 "alpha delta" 2 "beta gamma" -> 2001 2002
|
||||
;; ACL predicates are injected (viewer baked in by the caller), applied post-rank.
|
||||
|
||||
(define
|
||||
fed-setup
|
||||
"p1 = indexDoc 2 \"alpha gamma\" (indexDoc 1 \"alpha beta\" emptyIndex)\np2 = indexDoc 2 \"beta gamma\" (indexDoc 1 \"alpha delta\" emptyIndex)\nfed = fedIndex [(1, p1), (2, p2)]\npermitP1 g = g < 2000\npermitNone g = False\npermitList g = elem g [1002, 2001]\n")
|
||||
|
||||
(define
|
||||
fed-cases
|
||||
(list
|
||||
(list
|
||||
"fed merges all docs"
|
||||
"sort (allDocs fed)"
|
||||
(list 1001 1002 2001 2002))
|
||||
(list
|
||||
"fed docFreq across peers"
|
||||
"[docFreq \"alpha\" fed]"
|
||||
(list 3))
|
||||
(list "fed docFreq beta" "[docFreq \"beta\" fed]" (list 2))
|
||||
(list "fed numDocs" "[numDocs fed]" (list 4))
|
||||
(list
|
||||
"fed term lookup spans peers"
|
||||
"map fst (lookupTerm \"gamma\" fed)"
|
||||
(list 1002 2002))
|
||||
(list
|
||||
"fed preserves positions"
|
||||
"lookupTerm \"beta\" fed"
|
||||
(list
|
||||
(list 1001 (list 1))
|
||||
(list 2002 (list 0))))
|
||||
(list
|
||||
"fed rank alpha tie by gid"
|
||||
"rankTfIdf [\"alpha\"] fed"
|
||||
(list 1001 1002 2001))
|
||||
(list
|
||||
"fed rank beta"
|
||||
"rankTfIdf [\"beta\"] fed"
|
||||
(list 1001 2002))
|
||||
(list
|
||||
"fed boolean and"
|
||||
"searchQuery \"alpha AND beta\" fed"
|
||||
(list 1001))
|
||||
(list
|
||||
"fed boolean or"
|
||||
"searchQuery \"delta OR barks\" fed"
|
||||
(list 2001))
|
||||
(list
|
||||
"fed phrase within peer1"
|
||||
"searchQuery \"\\\"alpha beta\\\"\" fed"
|
||||
(list 1001))
|
||||
(list
|
||||
"fed phrase within peer2"
|
||||
"searchQuery \"\\\"beta gamma\\\"\" fed"
|
||||
(list 2002))
|
||||
(list
|
||||
"fed phrase peer2 alpha delta"
|
||||
"searchQuery \"\\\"alpha delta\\\"\" fed"
|
||||
(list 2001))
|
||||
(list "fed empty peer list" "allDocs (fedIndex [])" (list))
|
||||
(list
|
||||
"fed single relabelled peer"
|
||||
"rankTfIdf [\"alpha\"] (fedIndex [(5, p1)])"
|
||||
(list 5001 5002))
|
||||
(list
|
||||
"acl peer1 only"
|
||||
"aclFilter permitP1 (rankTfIdf [\"alpha\"] fed)"
|
||||
(list 1001 1002))
|
||||
(list
|
||||
"acl allowlist preserves rank order"
|
||||
"aclFilter permitList (rankTfIdf [\"alpha\"] fed)"
|
||||
(list 1002 2001))
|
||||
(list
|
||||
"acl topN after filter"
|
||||
"topNTfIdfAcl 1 permitP1 [\"alpha\"] fed"
|
||||
(list 1001))
|
||||
(list
|
||||
"acl denies all"
|
||||
"aclFilter permitNone (rankTfIdf [\"alpha\"] fed)"
|
||||
(list))
|
||||
(list
|
||||
"acl on bm25"
|
||||
"searchBm25Acl permitP1 1.5 0.75 [\"alpha\"] fed"
|
||||
(list 1001 1002))
|
||||
(list
|
||||
"acl end-to-end tfidf"
|
||||
"searchTfIdfAcl permitP1 [\"alpha\"] fed"
|
||||
(list 1001 1002))))
|
||||
|
||||
(define
|
||||
fed-results
|
||||
(search-batch fed-setup (map (fn (c) (nth c 1)) fed-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth fed-results i) (nth c 2)))
|
||||
fed-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
49
lib/search/tests/near.sx
Normal file
49
lib/search/tests/near.sx
Normal file
@@ -0,0 +1,49 @@
|
||||
;; Extension — proximity (NEAR) search: terms within k positions, unordered.
|
||||
;; Corpus:
|
||||
;; 1 "the quick brown fox" the0 quick1 brown2 fox3
|
||||
;; 2 "quick the lazy fox dog" quick0 the1 lazy2 fox3 dog4
|
||||
;; 3 "fox runs quick" fox0 runs1 quick2
|
||||
|
||||
(define
|
||||
near-setup
|
||||
"idx = indexDoc 3 \"fox runs quick\" (indexDoc 2 \"quick the lazy fox dog\" (indexDoc 1 \"the quick brown fox\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
near-cases
|
||||
(list
|
||||
(list
|
||||
"near adjacent one doc"
|
||||
"nearDocs 1 \"quick\" \"brown\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"near adjacent both docs"
|
||||
"nearDocs 1 \"quick\" \"the\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"near within 2"
|
||||
"nearDocs 2 \"quick\" \"fox\" idx"
|
||||
(list 1 3))
|
||||
(list "near too far at k1" "nearDocs 1 \"quick\" \"fox\" idx" (list))
|
||||
(list
|
||||
"near unordered symmetric"
|
||||
"nearDocs 2 \"fox\" \"quick\" idx"
|
||||
(list 1 3))
|
||||
(list "near wider window" "nearDocs 5 \"the\" \"dog\" idx" (list 2))
|
||||
(list "near absent term" "nearDocs 1 \"quick\" \"zzz\" idx" (list))
|
||||
(list "near needs both terms" "nearDocs 3 \"brown\" \"dog\" idx" (list))
|
||||
(list
|
||||
"near same docs only"
|
||||
"nearDocs 3 \"fox\" \"runs\" idx"
|
||||
(list 3))))
|
||||
|
||||
(define
|
||||
near-results
|
||||
(search-batch near-setup (map (fn (c) (nth c 1)) near-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth near-results i) (nth c 2)))
|
||||
near-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
53
lib/search/tests/page.sx
Normal file
53
lib/search/tests/page.sx
Normal file
@@ -0,0 +1,53 @@
|
||||
;; Extension — result pagination (offset / limit) over ranked results.
|
||||
;; Corpus (tf of "x" descending): 1 x4 2 x3 3 x2 4 x1 5 y(no x)
|
||||
;; rankTfIdf ["x"] -> [1,2,3,4]
|
||||
|
||||
(define
|
||||
page-setup
|
||||
"idx = indexDoc 5 \"y\" (indexDoc 4 \"x\" (indexDoc 3 \"x x\" (indexDoc 2 \"x x x\" (indexDoc 1 \"x x x x other\" emptyIndex))))\n")
|
||||
|
||||
(define
|
||||
page-cases
|
||||
(list
|
||||
(list "first page" "pageTfIdf 0 2 [\"x\"] idx" (list 1 2))
|
||||
(list
|
||||
"second page"
|
||||
"pageTfIdf 2 2 [\"x\"] idx"
|
||||
(list 3 4))
|
||||
(list
|
||||
"sliding window"
|
||||
"pageTfIdf 1 2 [\"x\"] idx"
|
||||
(list 2 3))
|
||||
(list
|
||||
"limit exceeds remaining"
|
||||
"pageTfIdf 3 10 [\"x\"] idx"
|
||||
(list 4))
|
||||
(list "offset past end" "pageTfIdf 4 2 [\"x\"] idx" (list))
|
||||
(list "limit zero" "pageTfIdf 0 0 [\"x\"] idx" (list))
|
||||
(list
|
||||
"whole result"
|
||||
"pageTfIdf 0 10 [\"x\"] idx"
|
||||
(list 1 2 3 4))
|
||||
(list
|
||||
"paginate raw list"
|
||||
"paginate 1 2 [10, 20, 30, 40]"
|
||||
(list 20 30))
|
||||
(list "paginate raw past end" "paginate 9 2 [10, 20]" (list))
|
||||
(list
|
||||
"bm25 page window size"
|
||||
"[length (pageBm25 0 2 1.5 0.75 [\"x\"] idx)]"
|
||||
(list 2))
|
||||
(list "result count" "[resultCount [\"x\"] idx]" (list 4))
|
||||
(list "result count zero" "[resultCount [\"zzz\"] idx]" (list 0))))
|
||||
|
||||
(define
|
||||
page-results
|
||||
(search-batch page-setup (map (fn (c) (nth c 1)) page-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth page-results i) (nth c 2)))
|
||||
page-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
139
lib/search/tests/parse.sx
Normal file
139
lib/search/tests/parse.sx
Normal file
@@ -0,0 +1,139 @@
|
||||
;; Phase 2 — query parser (parseQuery / searchQuery).
|
||||
;; AST cases assert showQ (parseQuery s); search cases assert searchQuery s idx
|
||||
;; against the standard corpus. Each group runs in one batched program eval.
|
||||
;; doc 1 "the quick brown dog" doc 2 "a quick brown fox" doc 3 "the dog barks loudly"
|
||||
|
||||
(define
|
||||
parse-corpus
|
||||
"idx = indexDoc 3 \"the dog barks loudly\" (indexDoc 2 \"a quick brown fox\" (indexDoc 1 \"the quick brown dog\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
ast-cases
|
||||
(list
|
||||
(list "single term" "showQ (parseQuery \"cat\")" "T:cat")
|
||||
(list "term normalized" "showQ (parseQuery \"CAT\")" "T:cat")
|
||||
(list "explicit and" "showQ (parseQuery \"cat AND dog\")" "(T:cat & T:dog)")
|
||||
(list
|
||||
"lowercase and keyword"
|
||||
"showQ (parseQuery \"cat and dog\")"
|
||||
"(T:cat & T:dog)")
|
||||
(list "implicit and" "showQ (parseQuery \"cat dog\")" "(T:cat & T:dog)")
|
||||
(list "or" "showQ (parseQuery \"cat OR dog\")" "(T:cat | T:dog)")
|
||||
(list "not" "showQ (parseQuery \"NOT cat\")" "!T:cat")
|
||||
(list
|
||||
"and binds tighter than or"
|
||||
"showQ (parseQuery \"cat AND dog OR bird\")"
|
||||
"((T:cat & T:dog) | T:bird)")
|
||||
(list
|
||||
"or then and"
|
||||
"showQ (parseQuery \"cat OR dog AND bird\")"
|
||||
"(T:cat | (T:dog & T:bird))")
|
||||
(list
|
||||
"parens override precedence"
|
||||
"showQ (parseQuery \"(cat OR dog) AND bird\")"
|
||||
"((T:cat | T:dog) & T:bird)")
|
||||
(list
|
||||
"and with not"
|
||||
"showQ (parseQuery \"cat AND NOT dog\")"
|
||||
"(T:cat & !T:dog)")
|
||||
(list
|
||||
"two-word phrase"
|
||||
"showQ (parseQuery \"\\\"quick brown\\\"\")"
|
||||
"P:quick-brown")
|
||||
(list
|
||||
"three-word phrase"
|
||||
"showQ (parseQuery \"\\\"quick brown fox\\\"\")"
|
||||
"P:quick-brown-fox")
|
||||
(list
|
||||
"and left-assoc"
|
||||
"showQ (parseQuery \"a AND b AND c\")"
|
||||
"((T:a & T:b) & T:c)")
|
||||
(list
|
||||
"or left-assoc"
|
||||
"showQ (parseQuery \"a OR b OR c\")"
|
||||
"((T:a | T:b) | T:c)")
|
||||
(list
|
||||
"punctuation stripped"
|
||||
"showQ (parseQuery \"cat, dog!\")"
|
||||
"(T:cat & T:dog)")))
|
||||
|
||||
(define
|
||||
search-cases
|
||||
(list
|
||||
(list "term" "searchQuery \"quick\" idx" (list 1 2))
|
||||
(list
|
||||
"term normalized"
|
||||
"searchQuery \"QUICK\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"explicit and"
|
||||
"searchQuery \"quick AND brown\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"implicit and"
|
||||
"searchQuery \"quick brown\" idx"
|
||||
(list 1 2))
|
||||
(list "and disjoint" "searchQuery \"the AND fox\" idx" (list))
|
||||
(list "or" "searchQuery \"fox OR barks\" idx" (list 2 3))
|
||||
(list "not" "searchQuery \"NOT the\" idx" (list 2))
|
||||
(list "and not" "searchQuery \"quick AND NOT the\" idx" (list 2))
|
||||
(list
|
||||
"precedence and-or"
|
||||
"searchQuery \"the AND dog OR fox\" idx"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"precedence or-and"
|
||||
"searchQuery \"fox OR the AND dog\" idx"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"parens"
|
||||
"searchQuery \"the AND (dog OR fox)\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"phrase"
|
||||
"searchQuery \"\\\"quick brown\\\"\" idx"
|
||||
(list 1 2))
|
||||
(list
|
||||
"phrase one doc"
|
||||
"searchQuery \"\\\"brown dog\\\"\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"phrase and term"
|
||||
"searchQuery \"\\\"quick brown\\\" AND dog\" idx"
|
||||
(list 1))
|
||||
(list
|
||||
"not phrase"
|
||||
"searchQuery \"NOT \\\"quick brown\\\"\" idx"
|
||||
(list 3))
|
||||
(list
|
||||
"implicit and terms"
|
||||
"searchQuery \"dog barks\" idx"
|
||||
(list 3))))
|
||||
|
||||
(define
|
||||
ast-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) ast-cases)))
|
||||
(define
|
||||
search-results
|
||||
(search-batch
|
||||
parse-corpus
|
||||
(map (fn (c) (nth c 1)) search-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test
|
||||
(str "ast: " (nth c 0))
|
||||
(nth ast-results i)
|
||||
(nth c 2)))
|
||||
ast-cases)
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test
|
||||
(str "search: " (nth c 0))
|
||||
(nth search-results i)
|
||||
(nth c 2)))
|
||||
search-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
63
lib/search/tests/prefix.sx
Normal file
63
lib/search/tests/prefix.sx
Normal file
@@ -0,0 +1,63 @@
|
||||
;; Extension — prefix / wildcard queries.
|
||||
;; Corpus: 1 "alpha alpine" 2 "beta apple" 3 "banana alpha"
|
||||
;; allTerms sorted: alpha alpine apple banana beta
|
||||
|
||||
(define
|
||||
prefix-setup
|
||||
"idx = indexDoc 3 \"banana alpha\" (indexDoc 2 \"beta apple\" (indexDoc 1 \"alpha alpine\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
prefix-cases
|
||||
(list
|
||||
(list
|
||||
"prefix terms two matches"
|
||||
"prefixTerms \"al\" idx"
|
||||
(list "alpha" "alpine"))
|
||||
(list
|
||||
"prefix terms narrower"
|
||||
"prefixTerms \"alp\" idx"
|
||||
(list "alpha" "alpine"))
|
||||
(list
|
||||
"prefix terms wide"
|
||||
"prefixTerms \"a\" idx"
|
||||
(list "alpha" "alpine" "apple"))
|
||||
(list "prefix terms single" "prefixTerms \"ban\" idx" (list "banana"))
|
||||
(list "prefix terms exact term" "prefixTerms \"beta\" idx" (list "beta"))
|
||||
(list "prefix terms none" "prefixTerms \"z\" idx" (list))
|
||||
(list
|
||||
"prefix docs union"
|
||||
"prefixDocs \"al\" idx"
|
||||
(list 1 3))
|
||||
(list "prefix docs single term" "prefixDocs \"ban\" idx" (list 3))
|
||||
(list
|
||||
"prefix docs wide"
|
||||
"prefixDocs \"a\" idx"
|
||||
(list 1 2 3))
|
||||
(list "prefix docs none" "prefixDocs \"z\" idx" (list))
|
||||
(list
|
||||
"prefix docs exact"
|
||||
"prefixDocs \"alpha\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"prefix rank ranks by matched terms"
|
||||
"prefixRankTfIdf \"al\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"prefix rank single doc"
|
||||
"prefixRankTfIdf \"ban\" idx"
|
||||
(list 3))
|
||||
(list "prefix rank empty" "prefixRankTfIdf \"z\" idx" (list))))
|
||||
|
||||
(define
|
||||
prefix-results
|
||||
(search-batch
|
||||
prefix-setup
|
||||
(map (fn (c) (nth c 1)) prefix-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth prefix-results i) (nth c 2)))
|
||||
prefix-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
90
lib/search/tests/rank.sx
Normal file
90
lib/search/tests/rank.sx
Normal file
@@ -0,0 +1,90 @@
|
||||
;; Phase 3 — ranking (TF-IDF, BM25, top-N). Deterministic: ties broken by DocId.
|
||||
;; Corpora:
|
||||
;; idx1: 1 "alpha alpha alpha gamma" 2 "alpha" 3 "beta"
|
||||
;; idx2: 1 "cat" 2 "cat cat dog elephant frog grape" 3 "zzz"
|
||||
;; idx3: 1 "kite" 2 "kite" (identical docs -> tiebreak)
|
||||
|
||||
(define
|
||||
rank-setup
|
||||
"idx1 = indexDoc 3 \"beta\" (indexDoc 2 \"alpha\" (indexDoc 1 \"alpha alpha alpha gamma\" emptyIndex))\nidx2 = indexDoc 3 \"zzz\" (indexDoc 2 \"cat cat dog elephant frog grape\" (indexDoc 1 \"cat\" emptyIndex))\nidx3 = indexDoc 2 \"kite\" (indexDoc 1 \"kite\" emptyIndex)\n")
|
||||
|
||||
(define
|
||||
rank-cases
|
||||
(list
|
||||
(list
|
||||
"tfidf tf ordering"
|
||||
"rankTfIdf [\"alpha\"] idx1"
|
||||
(list 1 2))
|
||||
(list
|
||||
"tfidf rare term boosts"
|
||||
"rankTfIdf [\"alpha\", \"beta\"] idx1"
|
||||
(list 1 3 2))
|
||||
(list
|
||||
"tfidf single-doc term"
|
||||
"rankTfIdf [\"gamma\"] idx1"
|
||||
(list 1))
|
||||
(list "tfidf absent term empty" "rankTfIdf [\"nope\"] idx1" (list))
|
||||
(list "tfidf empty query empty" "rankTfIdf [] idx1" (list))
|
||||
(list
|
||||
"tfidf candidate union tie by docid"
|
||||
"rankTfIdf [\"beta\", \"gamma\"] idx1"
|
||||
(list 1 3))
|
||||
(list
|
||||
"tfidf tf ordering idx2"
|
||||
"rankTfIdf [\"cat\"] idx2"
|
||||
(list 2 1))
|
||||
(list "topN tfidf 1" "topNTfIdf 1 [\"alpha\"] idx1" (list 1))
|
||||
(list
|
||||
"topN tfidf 2"
|
||||
"topNTfIdf 2 [\"alpha\", \"beta\"] idx1"
|
||||
(list 1 3))
|
||||
(list
|
||||
"topN exceeds results"
|
||||
"topNTfIdf 10 [\"gamma\"] idx1"
|
||||
(list 1))
|
||||
(list "topN zero" "topNTfIdf 0 [\"alpha\"] idx1" (list))
|
||||
(list
|
||||
"bm25 tf+length flips tfidf"
|
||||
"rankBm25 1.5 0.75 [\"cat\"] idx2"
|
||||
(list 1 2))
|
||||
(list
|
||||
"bm25 b=0 ignores length"
|
||||
"rankBm25 1.5 0.0 [\"cat\"] idx2"
|
||||
(list 2 1))
|
||||
(list
|
||||
"bm25 alpha idx1"
|
||||
"rankBm25 1.5 0.75 [\"alpha\"] idx1"
|
||||
(list 1 2))
|
||||
(list "bm25 absent empty" "rankBm25 1.5 0.75 [\"nope\"] idx1" (list))
|
||||
(list
|
||||
"bm25 single-doc term"
|
||||
"rankBm25 1.5 0.75 [\"gamma\"] idx1"
|
||||
(list 1))
|
||||
(list "bm25 topN 1" "topNBm25 1 1.5 0.75 [\"cat\"] idx2" (list 1))
|
||||
(list
|
||||
"bm25 same candidate set"
|
||||
"sort (rankBm25 1.5 0.75 [\"alpha\", \"beta\"] idx1)"
|
||||
(list 1 2 3))
|
||||
(list
|
||||
"tfidf stable tiebreak"
|
||||
"rankTfIdf [\"kite\"] idx3"
|
||||
(list 1 2))
|
||||
(list
|
||||
"bm25 stable tiebreak"
|
||||
"rankBm25 1.5 0.75 [\"kite\"] idx3"
|
||||
(list 1 2))
|
||||
(list "numDocs" "[numDocs idx1]" (list 3))
|
||||
(list "docLen counts tokens" "[docLen 1 idx1]" (list 4))
|
||||
(list "docFreq via index" "[docFreq \"alpha\" idx1]" (list 2))))
|
||||
|
||||
(define
|
||||
rank-results
|
||||
(search-batch rank-setup (map (fn (c) (nth c 1)) rank-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth rank-results i) (nth c 2)))
|
||||
rank-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
67
lib/search/tests/rankq.sx
Normal file
67
lib/search/tests/rankq.sx
Normal file
@@ -0,0 +1,67 @@
|
||||
;; Extension — boolean-filtered ranked search (filter then rank by relevance).
|
||||
;; Corpus:
|
||||
;; 1 "apple apple banana" apple2 banana1
|
||||
;; 2 "apple cherry" apple1 cherry1
|
||||
;; 3 "banana cherry" banana1 cherry1
|
||||
;; 4 "apple banana cherry" apple1 banana1 cherry1
|
||||
|
||||
(define
|
||||
rankq-setup
|
||||
"idx = indexDoc 4 \"apple banana cherry\" (indexDoc 3 \"banana cherry\" (indexDoc 2 \"apple cherry\" (indexDoc 1 \"apple apple banana\" emptyIndex)))\n")
|
||||
|
||||
(define
|
||||
rankq-cases
|
||||
(list
|
||||
(list
|
||||
"queryTerms and"
|
||||
"queryTerms (parseQuery \"apple AND banana\")"
|
||||
(list "apple" "banana"))
|
||||
(list
|
||||
"queryTerms or not"
|
||||
"queryTerms (parseQuery \"a OR NOT b\")"
|
||||
(list "a" "b"))
|
||||
(list
|
||||
"queryTerms phrase"
|
||||
"queryTerms (parseQuery \"\\\"x y\\\" OR z\")"
|
||||
(list "x" "y" "z"))
|
||||
(list
|
||||
"and filter ranked by tf"
|
||||
"searchRankTfIdf \"apple AND banana\" idx"
|
||||
(list 1 4))
|
||||
(list
|
||||
"single term ranked tie"
|
||||
"searchRankTfIdf \"cherry\" idx"
|
||||
(list 2 3 4))
|
||||
(list
|
||||
"or filter ranked"
|
||||
"searchRankTfIdf \"apple OR banana\" idx"
|
||||
(list 1 4 2 3))
|
||||
(list
|
||||
"and-not narrows then ranks"
|
||||
"searchRankTfIdf \"apple AND NOT banana\" idx"
|
||||
(list 2))
|
||||
(list
|
||||
"phrase filter ranked"
|
||||
"searchRankTfIdf \"\\\"apple banana\\\"\" idx"
|
||||
(list 1 4))
|
||||
(list "no matches" "searchRankTfIdf \"zzz\" idx" (list))
|
||||
(list
|
||||
"bm25 boolean ranked subset"
|
||||
"sort (searchRankBm25 1.5 0.75 \"apple OR banana\" idx)"
|
||||
(list 1 2 3 4))
|
||||
(list
|
||||
"bm25 and filter"
|
||||
"searchRankBm25 1.5 0.75 \"apple AND NOT banana\" idx"
|
||||
(list 2))))
|
||||
|
||||
(define
|
||||
rankq-results
|
||||
(search-batch rankq-setup (map (fn (c) (nth c 1)) rankq-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth rankq-results i) (nth c 2)))
|
||||
rankq-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
47
lib/search/tests/stem.sx
Normal file
47
lib/search/tests/stem.sx
Normal file
@@ -0,0 +1,47 @@
|
||||
;; Extension — stemming (suffix stripping). Scalar string results wrapped in [].
|
||||
|
||||
(define
|
||||
stem-cases
|
||||
(list
|
||||
(list "stem plural s" "[stem \"cats\"]" (list "cat"))
|
||||
(list "stem plural dogs" "[stem \"dogs\"]" (list "dog"))
|
||||
(list "stem keeps ss" "[stem \"pass\"]" (list "pass"))
|
||||
(list "stem short s unchanged" "[stem \"is\"]" (list "is"))
|
||||
(list "stem es boxes" "[stem \"boxes\"]" (list "box"))
|
||||
(list "stem es wishes" "[stem \"wishes\"]" (list "wish"))
|
||||
(list "stem ies cities" "[stem \"cities\"]" (list "city"))
|
||||
(list "stem ies parties" "[stem \"parties\"]" (list "party"))
|
||||
(list "stem ing jumping" "[stem \"jumping\"]" (list "jump"))
|
||||
(list "stem ing running literal" "[stem \"running\"]" (list "runn"))
|
||||
(list "stem ed jumped" "[stem \"jumped\"]" (list "jump"))
|
||||
(list "stem ed wanted" "[stem \"wanted\"]" (list "want"))
|
||||
(list "stem short ed unchanged" "[stem \"red\"]" (list "red"))
|
||||
(list "stem no suffix" "[stem \"cat\"]" (list "cat"))
|
||||
(list
|
||||
"stemText normalizes and stems"
|
||||
"[stemText \"Cats Running!\"]"
|
||||
(list "cat runn"))
|
||||
(list
|
||||
"stemTokens list"
|
||||
"stemTokens \"boxes and cats\""
|
||||
(list "box" "and" "cat"))
|
||||
(list
|
||||
"indexStemmed unifies plural"
|
||||
"map fst (lookupTerm \"cat\" (indexStemmed 2 \"a cat\" (indexStemmed 1 \"the cats\" emptyIndex)))"
|
||||
(list 1 2))
|
||||
(list
|
||||
"indexStemmed stem query"
|
||||
"map fst (lookupTerm (stem \"boxes\") (indexStemmed 1 \"many boxes\" emptyIndex))"
|
||||
(list 1))))
|
||||
|
||||
(define
|
||||
stem-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) stem-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth stem-results i) (nth c 2)))
|
||||
stem-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
42
lib/search/tests/suggest.sx
Normal file
42
lib/search/tests/suggest.sx
Normal file
@@ -0,0 +1,42 @@
|
||||
;; Extension — did-you-mean / spelling suggestion.
|
||||
;; Corpus terms (sorted): ample apple apply banana orange
|
||||
|
||||
(define
|
||||
suggest-setup
|
||||
"idx = indexDoc 1 \"apple apply ample banana orange\" emptyIndex\n")
|
||||
|
||||
(define
|
||||
suggest-cases
|
||||
(list
|
||||
(list "suggest exact term" "[suggest \"apple\" idx]" (list "apple"))
|
||||
(list
|
||||
"suggest misspelled banana"
|
||||
"[suggest \"bananna\" idx]"
|
||||
(list "banana"))
|
||||
(list
|
||||
"suggest missing letter orange"
|
||||
"[suggest \"orang\" idx]"
|
||||
(list "orange"))
|
||||
(list "suggest closest apply" "[suggest \"aply\" idx]" (list "apply"))
|
||||
(list "suggestN 1 banana" "suggestN 1 \"bananna\" idx" (list "banana"))
|
||||
(list
|
||||
"suggestN 2 ties alpha"
|
||||
"suggestN 2 \"aple\" idx"
|
||||
(list "ample" "apple"))
|
||||
(list "suggest empty term shortest" "[suggest \"\" idx]" (list "ample"))
|
||||
(list "suggest empty index" "[suggest \"apple\" emptyIndex]" (list ""))
|
||||
(list "suggestN empty index" "suggestN 1 \"apple\" emptyIndex" (list))))
|
||||
|
||||
(define
|
||||
suggest-results
|
||||
(search-batch
|
||||
suggest-setup
|
||||
(map (fn (c) (nth c 1)) suggest-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth suggest-results i) (nth c 2)))
|
||||
suggest-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
53
lib/search/tests/syn.sx
Normal file
53
lib/search/tests/syn.sx
Normal file
@@ -0,0 +1,53 @@
|
||||
;; Extension — synonym / query expansion.
|
||||
;; synmap: car -> automobile, vehicle ; big -> large
|
||||
;; Corpus: 1 "fast car" 2 "shiny automobile" 3 "big truck" 4 "large house" 5 "vehicle review"
|
||||
|
||||
(define
|
||||
syn-setup
|
||||
"synmap = [(\"car\", [\"automobile\", \"vehicle\"]), (\"big\", [\"large\"])]\nidx = indexDoc 5 \"vehicle review\" (indexDoc 4 \"large house\" (indexDoc 3 \"big truck\" (indexDoc 2 \"shiny automobile\" (indexDoc 1 \"fast car\" emptyIndex))))\n")
|
||||
|
||||
(define
|
||||
syn-cases
|
||||
(list
|
||||
(list
|
||||
"expand term with synonyms"
|
||||
"expandTerm synmap \"car\""
|
||||
(list "car" "automobile" "vehicle"))
|
||||
(list
|
||||
"expand single synonym"
|
||||
"expandTerm synmap \"big\""
|
||||
(list "big" "large"))
|
||||
(list "expand unknown term" "expandTerm synmap \"banana\"" (list "banana"))
|
||||
(list
|
||||
"syn docs union"
|
||||
"synDocs synmap \"car\" idx"
|
||||
(list 1 2 5))
|
||||
(list
|
||||
"syn docs single synonym"
|
||||
"synDocs synmap \"big\" idx"
|
||||
(list 3 4))
|
||||
(list
|
||||
"syn docs no synonyms"
|
||||
"synDocs synmap \"house\" idx"
|
||||
(list 4))
|
||||
(list "syn docs absent" "synDocs synmap \"plane\" idx" (list))
|
||||
(list
|
||||
"syn rank expanded"
|
||||
"synRankTfIdf synmap \"car\" idx"
|
||||
(list 1 2 5))
|
||||
(list
|
||||
"syn rank single"
|
||||
"synRankTfIdf synmap \"big\" idx"
|
||||
(list 3 4))))
|
||||
|
||||
(define
|
||||
syn-results
|
||||
(search-batch syn-setup (map (fn (c) (nth c 1)) syn-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth syn-results i) (nth c 2)))
|
||||
syn-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
8
lib/search/tokenize.sx
Normal file
8
lib/search/tokenize.sx
Normal file
@@ -0,0 +1,8 @@
|
||||
;; search tokenizer — Haskell source fragment.
|
||||
;; normalize (lowercase + strip punctuation), split on whitespace, attach positions.
|
||||
;; tokens :: String -> [String]
|
||||
;; positioned :: String -> [(String, Int)] -- 0-based ordinal positions
|
||||
|
||||
(define
|
||||
search/tokenize-src
|
||||
"lowerChar c = chr (toLower (ord c))\nnormChar c = if isAlphaNum c then lowerChar c else ' '\nisBlankCh c = c == ' '\ndropBlanks [] = []\ndropBlanks (c:cs) = if isBlankCh c then dropBlanks cs else c:cs\ntakeWord [] = []\ntakeWord (c:cs) = if isBlankCh c then [] else c : takeWord cs\nafterWord [] = []\nafterWord (c:cs) = if isBlankCh c then c:cs else afterWord cs\nsplitWords s = let s2 = dropBlanks s in if null s2 then [] else takeWord s2 : splitWords (afterWord s2)\nappendStr a b = a ++ b\njoinChars cs = foldr appendStr \"\" cs\ntokens s = map joinChars (splitWords (map normChar s))\nposFrom i [] = []\nposFrom i (x:xs) = (x, i) : posFrom (i + 1) xs\npositioned s = posFrom 0 (tokens s)\n")
|
||||
@@ -1,115 +0,0 @@
|
||||
# persist-on-sx loop agent (single agent, queue-driven)
|
||||
|
||||
Role: iterates `plans/persist-on-sx.md` forever. **Durable state on the SX kernel**
|
||||
— the foundation substrate every other subsystem currently fakes with an in-memory
|
||||
mutable list. Event log (append-only streams) + kv (current-state) over one
|
||||
injectable backend; pure projections; snapshots; durable IO at the kernel's
|
||||
`perform` boundary. This is **substrate-level**, not a guest language.
|
||||
|
||||
```
|
||||
description: persist-on-sx queue loop
|
||||
subagent_type: general-purpose
|
||||
run_in_background: true
|
||||
isolation: worktree
|
||||
```
|
||||
|
||||
## Prompt
|
||||
|
||||
You are the sole background agent working `plans/persist-on-sx.md`. Isolated
|
||||
worktree `/root/rose-ash-loops/persist` on branch `loops/persist`, forever, one
|
||||
commit per feature. Push to `origin/loops/persist` after every commit. Never touch
|
||||
`main` or `architecture`.
|
||||
|
||||
## Restart baseline — check before iterating
|
||||
|
||||
1. Read `plans/persist-on-sx.md` — roadmap + Progress log. Note the scope table:
|
||||
persist owns the **log** + **kv** facets; blobs are delegated (store the CID,
|
||||
not the bytes); cache is out of scope. Do not event-source everything.
|
||||
2. `ls lib/persist/` — pick up from the most advanced file.
|
||||
3. If `lib/persist/tests/*.sx` exist, run them via `bash lib/persist/conformance.sh`.
|
||||
Green before new work.
|
||||
4. If `lib/persist/scoreboard.md` exists, that's your baseline.
|
||||
5. **Learn the substrate before writing durable code.** persist sits on the kernel's
|
||||
IO-suspension surface — the third CEK phase: `perform`, `cek-step-loop`,
|
||||
`cek-resume`, `make-cek-suspended`. Study how IO is requested and resumed, and
|
||||
how `spec/harness.sx` mocks an IO platform for tests (assert-io-*). Phases 1–3
|
||||
need NO real IO — the in-memory backend is pure SX. Real durable IO (Phase 4)
|
||||
goes through `perform` and is tested against the mock-IO harness, not a real disk.
|
||||
Verify the actual exported names with sx_find_all / grep before relying on them.
|
||||
|
||||
## The queue
|
||||
|
||||
Phase order per `plans/persist-on-sx.md`:
|
||||
|
||||
- **Phase 1** — log + kv + in-memory backend (event record, injectable backend
|
||||
protocol, append/read, kv get/put/delete, api).
|
||||
- **Phase 2** — projections (`fold step seed`) + subscriptions; concurrency
|
||||
conflict as a real result.
|
||||
- **Phase 3** — snapshots + replay (checkpoint, replay = snapshot + tail,
|
||||
determinism).
|
||||
- **Phase 4** — durable backend via kernel IO (`perform`), blob-ref interface,
|
||||
crash/restart replay against the mock-IO harness.
|
||||
|
||||
Within a phase, pick the checkbox that unlocks the most tests per effort.
|
||||
|
||||
Every iteration: implement → test → commit → tick `[ ]` → Progress log → next.
|
||||
|
||||
## Ground rules (hard)
|
||||
|
||||
- **Scope:** only `lib/persist/**` and `plans/persist-on-sx.md`. Do **not** edit
|
||||
`spec/`, `hosts/`, `shared/`, or any `lib/<lang>/`. You may **import** the
|
||||
kernel's IO-suspension + platform-IO surface only. **Do NOT add host primitives.**
|
||||
If a durable IO op you need doesn't exist, it belongs in `hosts/` (out of scope) →
|
||||
Blockers entry with a minimal repro, and stop on that item.
|
||||
- **NEVER call `sx_build`.** 600s watchdog. If the sx_server binary is broken →
|
||||
Blockers entry, stop. Run tests by invoking the sx_server binary directly from a
|
||||
conformance.sh (model it on an existing one, e.g. `lib/apl/conformance.sh`),
|
||||
pointing `SX_SERVER` at `/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe`
|
||||
— fresh worktrees have no `_build/`.
|
||||
- **Determinism:** replay must be pure — same log → same state. No clocks/randomness
|
||||
inside projections; timestamps live on the event, passed in.
|
||||
- **Shared-file issues** → plan's Blockers with minimal repro; don't fix here.
|
||||
- **SX files:** `sx-tree` MCP tools ONLY. **They take `file:` not `path:`** — a
|
||||
wrong key yields `Yojson Type_error("Expected string, got null")`, which looks
|
||||
like a broken binary but is just a param mismatch. `sx_validate` after edits.
|
||||
Path-based edits (`sx_replace_node`) count comment headers in their indices and
|
||||
can clobber the wrong node — re-read after, or prefer `sx_write_file` for small
|
||||
files.
|
||||
- **Unicode in `.sx`:** raw UTF-8 only, never `\uXXXX` escapes.
|
||||
- **Commit granularity:** one feature per commit. Short factual messages
|
||||
(`persist: kv facet get/put/delete + 6 tests`). Push to `origin/loops/persist`.
|
||||
- **Plan file:** update Progress log (newest first) + tick boxes every commit.
|
||||
|
||||
## persist-specific gotchas
|
||||
|
||||
- **Two facets, not one.** Don't force current-state values (a stock count, a
|
||||
config value, a session blob) through the event log — that's the kv facet. Event
|
||||
log is for things whose *history* matters.
|
||||
- **Backend is injected.** The in-memory backend is the test default; never hardwire
|
||||
it. Every op goes through the backend protocol so file/pg/ipfs swap in unchanged.
|
||||
- **Optimistic concurrency is a real result.** A conflicting append returns a
|
||||
conflict value the caller can retry on — not a crash, not a silent overwrite.
|
||||
- **Blobs by reference only.** persist stores a content-address/CID + metadata. The
|
||||
bytes live in a content-addressed store (artdag/IPFS). Never put large payloads in
|
||||
the log.
|
||||
- **Replay determinism is the headline property.** Snapshot + tail must equal full
|
||||
replay. Test it explicitly, both directions.
|
||||
|
||||
## General gotchas (all loops)
|
||||
|
||||
- SX `do` = R7RS iteration. Use `begin` for multi-expr sequences.
|
||||
- `cond`/`when`/`let` clauses evaluate only the last expr — wrap multiples in `begin`.
|
||||
- `let` is parallel, not sequential — nest `let`s when a binding references an earlier one.
|
||||
- `env-bind!` creates a binding; `env-set!` mutates an existing one (walks scope chain).
|
||||
- `sx_validate` after every structural edit.
|
||||
- Namespace-prefix all helpers (`persist/...`) — short/host-colliding names get
|
||||
silently shadowed or hang the runtime.
|
||||
|
||||
## Style
|
||||
|
||||
- No comments in `.sx` unless non-obvious.
|
||||
- No new planning docs — update `plans/persist-on-sx.md` inline.
|
||||
- Short, factual commit messages.
|
||||
- One feature per iteration. Commit. Log. Push. Next.
|
||||
|
||||
Go. Start by reading the plan; find the first unchecked `[ ]`; implement it.
|
||||
110
plans/agent-briefings/search-loop.md
Normal file
110
plans/agent-briefings/search-loop.md
Normal file
@@ -0,0 +1,110 @@
|
||||
# search-on-sx loop agent (single agent, queue-driven)
|
||||
|
||||
Role: iterates `plans/search-on-sx.md` forever. **Full-text + structured search on
|
||||
Haskell** — tokenize, inverted index, query AST, boolean + phrase + ranked
|
||||
queries (TF-IDF / BM25), ACL-aware post-filter, federated index merge. Typed ADTs
|
||||
make query parsing clean; lazy lists make posting-list iteration efficient. Sits on
|
||||
`lib/haskell/` (1514/1514 already green); adds a search-shaped vocabulary on top.
|
||||
|
||||
```
|
||||
description: search-on-sx queue loop
|
||||
subagent_type: general-purpose
|
||||
run_in_background: true
|
||||
isolation: worktree
|
||||
```
|
||||
|
||||
## Prompt
|
||||
|
||||
You are the sole background agent working `plans/search-on-sx.md`. Isolated
|
||||
worktree `/root/rose-ash-loops/search` on branch `loops/search`, forever, one
|
||||
commit per feature. Push to `origin/loops/search` after every commit. Never touch
|
||||
`main` or `architecture`.
|
||||
|
||||
## Restart baseline — check before iterating
|
||||
|
||||
1. Read `plans/search-on-sx.md` — roadmap + Progress log.
|
||||
2. `ls lib/search/` — pick up from the most advanced file.
|
||||
3. If `lib/search/tests/*.sx` exist, run them via `bash lib/search/conformance.sh`.
|
||||
Green before new work.
|
||||
4. If `lib/search/scoreboard.md` exists, that's your baseline.
|
||||
5. Read the `lib/haskell/` public API once — that's your substrate. `lib/haskell/
|
||||
haskell.sx` exists; also study `runtime.sx`, `eval.sx`, `parser.sx`, `infer.sx`,
|
||||
`match.sx`, `map.sx`, `set.sx`, `testlib.sx`. Learn how to declare ADTs, pattern
|
||||
match, and use the `Map`/`Set` helpers before writing index code. Verify the real
|
||||
exported names with sx_find_all / grep — don't assume from the plan's sketch.
|
||||
|
||||
## The queue
|
||||
|
||||
Phase order per `plans/search-on-sx.md`:
|
||||
|
||||
- **Phase 1** — tokenize + inverted index + simple term lookup
|
||||
(`Map Term [(DocId,[Pos])]`, insert/lookup, `(search/index doc)`,
|
||||
`(search/query term)`).
|
||||
- **Phase 2** — query AST + boolean/phrase eval (Term | And | Or | Not | Phrase;
|
||||
posting-list set ops; positional phrase match).
|
||||
- **Phase 3** — ranking (TF-IDF, BM25), top-N.
|
||||
- **Phase 4** — ACL-aware post-filter + federation (merge per-peer indices).
|
||||
|
||||
Within a phase, pick the checkbox that unlocks the most tests per effort.
|
||||
|
||||
Every iteration: implement → test → commit → tick `[ ]` → Progress log → next.
|
||||
|
||||
## Ground rules (hard)
|
||||
|
||||
- **Scope:** only `lib/search/**` and `plans/search-on-sx.md`. Do **not** edit
|
||||
`spec/`, `hosts/`, `shared/`, other `lib/<lang>/` dirs, `lib/stdlib.sx`, or
|
||||
`lib/` root. May **import** from `lib/haskell/` only (its public API). Do **not**
|
||||
modify Haskell.
|
||||
- **NEVER call `sx_build`.** 600s watchdog. If the sx_server binary is broken →
|
||||
Blockers entry, stop. Run tests by invoking the sx_server binary directly from a
|
||||
conformance.sh (model it on `lib/haskell/conformance.sh`), pointing `SX_SERVER`
|
||||
at `/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe` — fresh
|
||||
worktrees have no `_build/`, so the relative path won't resolve.
|
||||
- **Shared-file issues** → plan's Blockers with minimal repro; don't fix here.
|
||||
- **SX files:** `sx-tree` MCP tools ONLY. **They take `file:` not `path:`** — a
|
||||
wrong key yields `Yojson Type_error("Expected string, got null")`, which looks
|
||||
like a broken binary but is just a param mismatch. `sx_validate` after edits.
|
||||
Path-based edits (`sx_replace_node`) count comment headers in their indices and
|
||||
can clobber the wrong node — re-read after, or prefer `sx_write_file` for small
|
||||
files.
|
||||
- **Unicode in `.sx`:** raw UTF-8 only, never `\uXXXX` escapes.
|
||||
- **Commit granularity:** one feature per commit. Short factual messages
|
||||
(`search: phrase query positional match + 7 tests`). Push to `origin/loops/search`.
|
||||
- **Plan file:** update Progress log (newest first) + tick boxes every commit.
|
||||
|
||||
## search-specific gotchas
|
||||
|
||||
- **Posting lists are the hot path.** Keep them sorted by DocId so boolean AND/OR
|
||||
are linear merges, not nested scans. Phrase match needs positions, so store
|
||||
`(DocId, [Pos])` — don't drop positions early to save space; you can't recover them.
|
||||
- **Tokenization decides recall.** Normalize consistently (lowercase, strip
|
||||
punctuation) on BOTH index and query side, or queries silently miss. Test the
|
||||
index/query symmetry explicitly.
|
||||
- **Ranking must be deterministic on ties.** TF-IDF/BM25 scores collide; always
|
||||
add a stable tiebreak (DocId ascending) or tests flake.
|
||||
- **ACL filter is per-viewer and post-ranking.** Filter the result list against the
|
||||
viewer, after scoring — never bake visibility into the index (the same index
|
||||
serves all viewers). Inject the permit predicate; don't hardwire an ACL module
|
||||
that doesn't exist yet.
|
||||
- **Federation merges indices, not results.** Merging per-peer inverted indices
|
||||
(union posting lists per term) is cleaner and rank-correct vs merging ranked
|
||||
result lists. Mock peer indices in tests.
|
||||
|
||||
## General gotchas (all loops)
|
||||
|
||||
- SX `do` = R7RS iteration. Use `begin` for multi-expr sequences.
|
||||
- `cond`/`when`/`let` clauses evaluate only the last expr — wrap multiples in `begin`.
|
||||
- `let` is parallel, not sequential — nest `let`s when a binding references an earlier one.
|
||||
- `env-bind!` creates a binding; `env-set!` mutates an existing one (walks scope chain).
|
||||
- `sx_validate` after every structural edit.
|
||||
- Namespace-prefix all guest helpers (`search/...`) — short/host-colliding names
|
||||
get silently shadowed or hang the runtime.
|
||||
|
||||
## Style
|
||||
|
||||
- No comments in `.sx` unless non-obvious.
|
||||
- No new planning docs — update `plans/search-on-sx.md` inline.
|
||||
- Short, factual commit messages.
|
||||
- One feature per iteration. Commit. Log. Push. Next.
|
||||
|
||||
Go. Start by reading the plan; find the first unchecked `[ ]`; implement it.
|
||||
@@ -1,82 +0,0 @@
|
||||
# commerce-on-sx: Catalog, cart, pricing & orders on miniKanren
|
||||
|
||||
> **DRAFT outline.** The revenue vertical. Depends on `persist-on-sx` (durable
|
||||
> orders) and `flow-on-sx` (checkout as a durable flow). Don't start before
|
||||
> persist-on-sx Phase 1 is green.
|
||||
|
||||
rose-ash's revenue engine — market (catalog), cart (checkout), orders (SumUp
|
||||
payment, reconciliation) — has no SX subsystem. The hard part of commerce isn't
|
||||
CRUD; it's **pricing**: discounts, bundles, tax, membership rates, promotions that
|
||||
stack (or don't). These are relations, and a relational engine can run them in
|
||||
multiple directions — forward ("what's the total?") and backward ("what promo code
|
||||
yields this total?", "which line item triggered the discount?").
|
||||
|
||||
That's a miniKanren fit. Pricing/promotion rules are relational; cart and order
|
||||
*lifecycle* (reserve → pay → fulfil → reconcile) is a durable `flow`; the order
|
||||
ledger is a `persist` stream. Commerce is the first real **composition** subsystem.
|
||||
|
||||
End-state: a catalog model, a relational pricing/promotion engine, a cart with
|
||||
deterministic totals, and an order lifecycle flow with payment-webhook
|
||||
reconciliation — all auditable via the event log.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/commerce/conformance.sh` → **0/0** (not yet started)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only `lib/commerce/**` and `plans/commerce-on-sx.md`. May **import**
|
||||
from `lib/minikanren/`, and (once they exist) `lib/persist/` + `lib/flow/`. Do not
|
||||
edit substrates.
|
||||
- **Architecture:** prices/promotions are miniKanren relations over catalog facts;
|
||||
a cart total is a *deterministic* query result (first solution under a fixed rule
|
||||
order). Order lifecycle is a `flow` that suspends at the payment IO boundary.
|
||||
Money is integer minor units — never floats.
|
||||
- **Determinism:** promotion stacking must have explicit, tested precedence;
|
||||
totals must be reproducible from the cart + catalog snapshot.
|
||||
- **Commits:** one feature per commit. Progress log + tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
Catalog + cart Total / order
|
||||
product(id,price,tags) {:subtotal :discounts :tax :total}
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/commerce/catalog.sx lib/commerce/price.sx
|
||||
— product / variant / stock facts — miniKanren pricing relations
|
||||
│ — promo stacking, membership rates
|
||||
▼ ▲
|
||||
lib/commerce/cart.sx lib/commerce/order.sx (flow + store)
|
||||
— line items, quantities — reserve→pay→fulfil→reconcile
|
||||
│ — SumUp webhook = flow resume
|
||||
▼ │
|
||||
lib/commerce/api.sx ── (commerce/add) (commerce/total) (commerce/checkout) ──┘
|
||||
```
|
||||
|
||||
## Phase 1 — Catalog + cart + deterministic totals
|
||||
- [ ] `catalog.sx` — product/variant/stock as facts
|
||||
- [ ] `cart.sx` — line items, add/remove/qty
|
||||
- [ ] `price.sx` — base pricing relation, subtotal; tax
|
||||
- [ ] `api.sx` + tests + scoreboard + conformance.sh
|
||||
|
||||
## Phase 2 — Promotions (relational)
|
||||
- [ ] promo rules: percentage, fixed, bundle, member rate
|
||||
- [ ] explicit stacking precedence; "best price" backward query
|
||||
- [ ] tests: stacking order, mutually-exclusive promos, member vs guest
|
||||
|
||||
## Phase 3 — Order lifecycle (flow + store)
|
||||
- [ ] order flow: reserve stock → await payment → fulfil
|
||||
- [ ] payment webhook resumes the suspended flow
|
||||
- [ ] order ledger as a `persist` stream; idempotent reconciliation
|
||||
|
||||
## Phase 4 — Reconciliation + federation
|
||||
- [ ] mismatch detection (paid≠ordered) as queries over the ledger
|
||||
- [ ] cross-instance catalog (federated marketplace) — out-of-scope stub
|
||||
- [ ] tests: webhook replay, partial refund, double-charge guard
|
||||
|
||||
## Progress log
|
||||
(loop fills this in)
|
||||
|
||||
## Blockers
|
||||
(loop fills this in)
|
||||
@@ -1,82 +0,0 @@
|
||||
# content-on-sx: Documents, blocks & collaborative editing on Smalltalk
|
||||
|
||||
> **DRAFT outline.** The CMS vertical — blog, WYSIWYG editor, Ghost sync. Depends
|
||||
> on `persist-on-sx` (document history as an event log). Ghost/CMS sync stays a thin
|
||||
> external adapter (Python/FFI) until a native replacement exists.
|
||||
|
||||
rose-ash's `blog` domain is content management: a block-based WYSIWYG editor,
|
||||
navigation, Ghost CMS sync. A document is a tree of live blocks; editing is a
|
||||
stream of operations; collaboration needs conflict-free merge. That is an object
|
||||
model — blocks are objects, edits are messages, and a document is the object graph
|
||||
responding to them. Smalltalk's "everything is an object responding to messages"
|
||||
maps directly to a block/WYSIWYG model, and a semilattice (CRDT) merge keeps
|
||||
concurrent edits conflict-free.
|
||||
|
||||
End-state: a Smalltalk-on-SX document model (typed blocks, structural ops),
|
||||
operation log + CRDT merge for collaborative editing, versioning/history via the
|
||||
event store, and a render boundary to HTML/SX. External CMS (Ghost) sync is an
|
||||
injected adapter, not core.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/content/conformance.sh` → **0/0** (not yet started)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only `lib/content/**` and `plans/content-on-sx.md`. May **import**
|
||||
from `lib/smalltalk/`, and (once it exists) `lib/persist/`. Do not edit substrates.
|
||||
- **Architecture:** a document is an ordered tree of blocks (objects); an edit is a
|
||||
message (`insert`/`update`/`move`/`delete`); concurrent edits merge via a
|
||||
commutative (CRDT/semilattice) operation so order doesn't matter. History is the
|
||||
`persist` event stream; any version is a replay.
|
||||
- **Determinism:** merge must be commutative + idempotent (test: apply ops in any
|
||||
order / twice → same document).
|
||||
- **Commits:** one feature per commit. Progress log + tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
Edit op Rendered document
|
||||
(insert block after id) ... HTML / SX tree
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/content/block.sx lib/content/render.sx
|
||||
— typed blocks as objects — block tree → HTML/SX
|
||||
— heading/text/image/embed — (reuses SX render boundary)
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/content/doc.sx lib/content/merge.sx
|
||||
— ordered block tree — CRDT/semilattice op merge
|
||||
— apply op, structural moves — concurrent-edit reconciliation
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/content/api.sx ── (content/edit) (content/render) (content/history) ──┐
|
||||
│ │
|
||||
├── op log + versions → persist │
|
||||
└── Ghost/CMS sync → injected external adapter (thin, non-core) ──┘
|
||||
```
|
||||
|
||||
## Phase 1 — Block document model
|
||||
- [ ] `block.sx` — typed block objects
|
||||
- [ ] `doc.sx` — ordered tree, apply edit op, structural moves
|
||||
- [ ] `render.sx` — block tree → HTML/SX
|
||||
- [ ] `api.sx` + tests + scoreboard + conformance.sh
|
||||
|
||||
## Phase 2 — Op log + versioning
|
||||
- [ ] edit ops as `persist` events; replay to any version
|
||||
- [ ] `(content/history doc)`, diff between versions
|
||||
|
||||
## Phase 3 — Collaborative merge (CRDT)
|
||||
- [ ] commutative/idempotent op merge
|
||||
- [ ] concurrent-edit tests (any order, double-apply → identical)
|
||||
|
||||
## Phase 4 — External sync + federation
|
||||
- [ ] Ghost/CMS sync via injected adapter (import/export)
|
||||
- [ ] federated documents (peer-authored blocks) — trust-gated stub
|
||||
- [ ] tests: round-trip import/export, conflict on concurrent external edit
|
||||
|
||||
## Progress log
|
||||
(loop fills this in)
|
||||
|
||||
## Blockers
|
||||
(loop fills this in)
|
||||
@@ -1,81 +0,0 @@
|
||||
# events-on-sx: Calendar, ticketing & notification delivery on Datalog
|
||||
|
||||
> **DRAFT outline.** The events vertical + the shared notification-delivery edge.
|
||||
> Depends on `persist-on-sx` (bookings ledger) and `flow-on-sx` (reminders, retrying
|
||||
> delivery). Pairs with `commerce-on-sx` for paid tickets.
|
||||
|
||||
rose-ash's `events` domain is calendar + ticketing: recurring events, availability,
|
||||
capacity, bookings. Scheduling is constraint reasoning — "is this slot free given
|
||||
recurrence, capacity, and the attendee's other bookings?" — which is rule
|
||||
evaluation over facts. Datalog expresses availability, recurrence expansion, and
|
||||
capacity as rules; a booking is a transaction; reminders and digests are durable
|
||||
`flow`s. Notification *delivery* (email/push) — needed here and by `feed/notify` —
|
||||
is folded in as an injected transport, extractable later.
|
||||
|
||||
End-state: a Datalog-on-SX events layer with recurrence expansion, availability +
|
||||
capacity rules, transactional booking, and a flow-driven notification dispatcher
|
||||
(reminders, digests, retries) over an injected transport.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/events/conformance.sh` → **0/0** (not yet started)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only `lib/events/**` and `plans/events-on-sx.md`. May **import** from
|
||||
`lib/datalog/`, and (once they exist) `lib/persist/` + `lib/flow/`. Do not edit
|
||||
substrates.
|
||||
- **Architecture:** events/availability/capacity are Datalog facts + rules;
|
||||
recurrence expands to occurrence facts within a window; a booking checks rules
|
||||
then appends a `persist` event (idempotent, capacity-safe). Notifications are flows
|
||||
that suspend on transport IO and retry on failure.
|
||||
- **Determinism:** recurrence expansion + availability must be reproducible for a
|
||||
fixed window + ruleset; capacity checks must be race-safe (no overbooking).
|
||||
- **Commits:** one feature per commit. Progress log + tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
Event + booking Result
|
||||
event(id,start,rrule,capacity) {:booked | :full | :conflict} + reminders
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/events/calendar.sx lib/events/availability.sx
|
||||
— event facts, recurrence (RRULE) — free/busy + capacity rules (Datalog)
|
||||
— expand occurrences in window │
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/events/booking.sx lib/events/notify.sx (flow)
|
||||
— transactional, capacity-safe — reminders / digests, retry on fail
|
||||
— bookings → persist ledger — injected transport (email/push)
|
||||
│ │
|
||||
▼ ▼
|
||||
lib/events/api.sx ── (events/schedule) (events/book) (events/agenda) ──────┘
|
||||
```
|
||||
|
||||
## Phase 1 — Calendar + recurrence
|
||||
- [ ] `calendar.sx` — event facts, RRULE expansion in a window
|
||||
- [ ] `availability.sx` — free/busy rules
|
||||
- [ ] `api.sx` + tests + scoreboard + conformance.sh
|
||||
|
||||
## Phase 2 — Ticketing + booking
|
||||
- [ ] capacity rules; transactional booking → `persist` (no overbooking)
|
||||
- [ ] paid tickets compose with `commerce` order flow
|
||||
- [ ] tests: capacity edge, double-book guard, conflict detection
|
||||
|
||||
## Phase 3 — Notification delivery (flow)
|
||||
- [ ] `notify.sx` — reminder/digest flows over injected transport
|
||||
- [ ] retry/backoff on transport failure (flow suspend/resume)
|
||||
- [ ] tests: delivery success, retry path, idempotent re-send
|
||||
- [ ] NOTE: shared with `feed/notify` — candidate for later extraction to a
|
||||
`delivery-on-sx` once a second consumer is real
|
||||
|
||||
## Phase 4 — Federation
|
||||
- [ ] cross-instance events (peer calendar) — trust-gated stub
|
||||
- [ ] tests: federated agenda merge
|
||||
|
||||
## Progress log
|
||||
(loop fills this in)
|
||||
|
||||
## Blockers
|
||||
(loop fills this in)
|
||||
@@ -1,100 +0,0 @@
|
||||
# host-on-sx: The SX web host — off Quart, onto the kernel (Dream-bound)
|
||||
|
||||
> **DRAFT outline.** The integration boundary that turns the subsystem libraries
|
||||
> into running services, and the strangler path off Python/Quart. This is the
|
||||
> dependency hub — it imports every subsystem. Decision recorded below: native
|
||||
> server + SXTP **now**, `dream-on-sx` framework layer **next**, Python only at the
|
||||
> external-integration edges.
|
||||
|
||||
The subsystems (`feed`, `search`, `acl`, `mod`, `flow`, `commerce`, `identity`,
|
||||
`content`, `events`) are libraries. Something has to receive an HTTP request, route
|
||||
it, call the right subsystem, and serialize the response. Today that's Python/Quart
|
||||
— the one large non-SX component in the stack: separate runtime, deploy, and
|
||||
failure mode. The goal is to move the web/host/domain layer onto the SX substrate
|
||||
and retire Quart, **incrementally (strangler-fig), never big-bang.**
|
||||
|
||||
This is already underway: a native OCaml HTTP server is live in prod on
|
||||
`sx.rose-ash.com` (~3ms cached, ~323 req/s, ~2MB RSS), `defhandler`/`defpage`
|
||||
exist, and a partial **SXTP** protocol is specced. That is the unblocked near-term
|
||||
host — no `ocaml-on-sx` dependency.
|
||||
|
||||
## Two layers, two timelines
|
||||
|
||||
1. **Now (unblocked): native server + SXTP adapter + SX handlers.** Route rose-ash
|
||||
endpoints onto the SX host one at a time. Each migrated endpoint is an SX
|
||||
handler dispatching to a subsystem; Quart proxies the rest until cut over.
|
||||
2. **Next: `dream-on-sx` as the framework layer.** Dream gives Quart-grade
|
||||
ergonomics — typed routing, middleware stacks, sessions, CSRF. It is gated on
|
||||
`ocaml-on-sx` Phases 1–5 + minimal stdlib. **This plan is the concrete target
|
||||
user that un-parks `dream-on-sx`** (see `plans/dream-on-sx.md`): "the subsystems
|
||||
need an HTTP front door" is the real feature pulling Dream. Until then, do not
|
||||
block migration on Dream — the native server is sufficient.
|
||||
3. **Always: Python only at the edges.** External integrations — SumUp payments,
|
||||
Ghost CMS, ActivityPub crypto, IPFS/Kubo — ride Python libraries today. They
|
||||
stay as thin injected adapters (Python/FFI) behind subsystem interfaces until
|
||||
native replacements exist. "Drop Quart" ≠ "drop every line of Python."
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/host/conformance.sh` → **0/0** (not yet started)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** `lib/host/**` and `plans/host-on-sx.md`. May **import** every subsystem
|
||||
+ the kernel's server/SXTP surface. Do **not** edit `spec/`, `hosts/`, `shared/`,
|
||||
or subsystem internals — wire to their public APIs only. Host-primitive / server
|
||||
changes belong in `hosts/` (out of scope) → Blockers.
|
||||
- **Architecture:** a route maps (method, path) → handler; a handler is an SX fn
|
||||
`request -> response` that calls subsystem APIs; middleware is composed handlers
|
||||
(auth via `identity`, permission via `acl`, mute via subsystem prefs). SXTP is the
|
||||
wire format between host and subsystem-as-service.
|
||||
- **Migration discipline:** each endpoint moved must be behavior-equivalent to its
|
||||
Quart original (golden-response test before flip). Keep a migration ledger.
|
||||
- **Commits:** one feature per commit. Progress log + tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
HTTP request HTTP response
|
||||
│ ▲
|
||||
▼ │
|
||||
native OCaml http server (prod) ──────► lib/host/router.sx
|
||||
(hosts/ — out of scope) — (method,path) → handler
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/host/middleware.sx lib/host/handler.sx
|
||||
— auth(identity) ∘ acl ∘ mute ∘ ... — request → subsystem call → response
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/host/sxtp.sx subsystem APIs (feed/search/commerce/…)
|
||||
— wire format, host↔service — called via public interfaces
|
||||
│
|
||||
└── external edges: SumUp / Ghost / AP / IPFS → injected Python/FFI adapters
|
||||
```
|
||||
|
||||
## Phase 1 — Router + handler + one real endpoint
|
||||
- [ ] `router.sx` — route table, (method,path) match
|
||||
- [ ] `handler.sx` — request/response model, subsystem dispatch
|
||||
- [ ] migrate ONE read endpoint (e.g. a feed timeline) end-to-end, golden test
|
||||
- [ ] `conformance.sh` + scoreboard
|
||||
|
||||
## Phase 2 — Middleware + SXTP
|
||||
- [ ] `middleware.sx` — composable auth/acl/mute/error layers
|
||||
- [ ] `sxtp.sx` — host↔subsystem wire format (align with existing spec)
|
||||
- [ ] migrate a write endpoint (auth + permission + action)
|
||||
|
||||
## Phase 3 — Strangler migration ledger
|
||||
- [ ] enumerate Quart endpoints; track migrated vs proxied
|
||||
- [ ] golden-response harness vs the live Quart responses
|
||||
- [ ] cut over a whole domain (smallest: `likes` or `relations`) as proof
|
||||
|
||||
## Phase 4 — Dream framework layer (gated)
|
||||
- [ ] gate: `ocaml-on-sx` Phases 1–5 + minimal stdlib green
|
||||
- [ ] adopt `dream-on-sx` routing/middleware/session ergonomics over the same handlers
|
||||
- [ ] re-home external adapters as native where replacements land
|
||||
|
||||
## Progress log
|
||||
(loop fills this in)
|
||||
|
||||
## Blockers
|
||||
(loop fills this in)
|
||||
@@ -1,84 +0,0 @@
|
||||
# identity-on-sx: OAuth2, sessions & membership on Erlang
|
||||
|
||||
> **DRAFT outline.** The identity core `acl-on-sx` assumes already exists. `acl`
|
||||
> answers "may X do Y"; identity answers "who is X, and how did they prove it."
|
||||
> Depends on `persist-on-sx` (grant/audit ledger). Pairs with `acl-on-sx`.
|
||||
|
||||
rose-ash's `account` domain is the OAuth2 authorization server every other app is
|
||||
a client of: silent SSO, per-app first-party cookies, grant verification,
|
||||
membership. Sessions and grants are **long-lived, concurrent, individually
|
||||
addressable, and expire on their own** — that is the actor model. Erlang's
|
||||
processes + mailboxes map cleanly: a session is a process, token issue/refresh/
|
||||
revoke are messages, expiry is a process timeout, and SSO is one process answering
|
||||
many apps.
|
||||
|
||||
End-state: an Erlang-on-SX layer with the OAuth2 authorization-code + silent
|
||||
(`prompt=none`) flows as message protocols, a session/grant registry, token
|
||||
lifecycle (issue/refresh/revoke/introspect), and membership state — all auditable
|
||||
through the event log, all authorization questions delegated to `acl-on-sx`.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/identity/conformance.sh` → **0/0** (not yet started)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only `lib/identity/**` and `plans/identity-on-sx.md`. May **import**
|
||||
from `lib/erlang/`, and (once they exist) `lib/persist/` + `lib/acl/`. Do not edit
|
||||
substrates.
|
||||
- **Architecture:** a session/grant is a process holding its own state; the
|
||||
registry routes messages by subject/client id. Tokens are opaque + introspected,
|
||||
not self-validating (revocation must be real). Authorization decisions are NOT
|
||||
made here — `identity` proves identity, `acl` decides permission.
|
||||
- **Security:** revocation is immediate (kill the process / tombstone the grant);
|
||||
no decision relies on a token that outlived its grant. Negative answers are
|
||||
explicit, never "absence of a yes."
|
||||
- **Commits:** one feature per commit. Progress log + tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
Auth request Token / session
|
||||
(authorize client scope subject) {:access :refresh :expires :grant}
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/identity/oauth.sx lib/identity/token.sx
|
||||
— authz-code + prompt=none flows — issue / refresh / revoke / introspect
|
||||
— as Erlang message protocols — opaque tokens, grant-backed
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/identity/session.sx lib/identity/registry.sx
|
||||
— session = process, expiry=timeout — route by subject/client; SSO fan-out
|
||||
│ │
|
||||
▼ ▼
|
||||
lib/identity/api.sx ── (identity/login) (identity/grant?) (identity/revoke) ──┐
|
||||
│ │
|
||||
└──────── grant + audit events → persist ; permission? → acl ──────────┘
|
||||
```
|
||||
|
||||
## Phase 1 — Sessions + tokens
|
||||
- [ ] `session.sx` — session process, create/lookup/expire
|
||||
- [ ] `token.sx` — issue/introspect/revoke (opaque, grant-backed)
|
||||
- [ ] `registry.sx` — route by subject/client
|
||||
- [ ] `api.sx` + tests + scoreboard + conformance.sh
|
||||
|
||||
## Phase 2 — OAuth2 flows
|
||||
- [ ] authorization-code flow as a message protocol
|
||||
- [ ] refresh + rotation; revocation cascades to issued tokens
|
||||
- [ ] tests: full code exchange, refresh, revoke-then-use (must fail)
|
||||
|
||||
## Phase 3 — Silent SSO + membership
|
||||
- [ ] `prompt=none` cross-app login (one session, many clients)
|
||||
- [ ] membership state + per-app grant projection
|
||||
- [ ] grant verification delegated cache (mirror Redis-cache pattern)
|
||||
|
||||
## Phase 4 — Audit + federation
|
||||
- [ ] every issue/refresh/revoke is a `persist` event; `(identity/audit subject)`
|
||||
- [ ] federated identity (peer-asserted subject) — advisory, trust-gated stub
|
||||
- [ ] tests: audit completeness, cross-instance subject mapping
|
||||
|
||||
## Progress log
|
||||
(loop fills this in)
|
||||
|
||||
## Blockers
|
||||
(loop fills this in)
|
||||
@@ -1,411 +0,0 @@
|
||||
# persist-on-sx: Durable state on the SX kernel
|
||||
|
||||
> **DRAFT outline.** Foundation subsystem — the durable substrate the other five
|
||||
> currently fake with in-memory mutable lists. Build this first.
|
||||
>
|
||||
> **"persist" = persistence / data store, NOT the shop.** The shop/commerce vertical
|
||||
> is `commerce-on-sx`.
|
||||
|
||||
rose-ash needs durable state: every subsystem (feed log, flow store, mod audit,
|
||||
search index, acl grants, sessions) today hand-rolls an in-memory structure that
|
||||
vanishes on restart. `persist-on-sx` is the one durable substrate they share. It
|
||||
lives directly on the SX kernel's IO-suspension primitives (`perform`/`cek-resume`
|
||||
— the third CEK phase) so a read/write `perform`s and the kernel persists at the
|
||||
boundary. Concrete storage backends are injected.
|
||||
|
||||
## Does it cover ALL persistence? No — and on purpose.
|
||||
|
||||
Event-sourcing-everything is a known trap (replay cost, event schema evolution,
|
||||
awkward ad-hoc queries, 5MB images in a log). So persist owns the **durable
|
||||
source-of-truth substrate**, exposed as **two facets over one backend protocol**,
|
||||
with two things explicitly delegated out:
|
||||
|
||||
| Shape | Owner | Notes |
|
||||
|-------|-------|-------|
|
||||
| **Event streams** (append-only, history matters) | persist — **log facet** | feed activities, mod audit, order ledger, flow state, content edits |
|
||||
| **Current-state values** (KV / document, no history) | persist — **kv facet** | profiles, stock counts, config, session blobs; also where projections materialize |
|
||||
| **Snapshots / read models** (derived, queryable) | persist — projections → kv/log | rebuildable from the log; persisted so you don't replay to answer a query |
|
||||
| **Blobs / large objects** (images, media) | **delegated** → content-addressed store (artdag/IPFS already) | persist stores the *reference/CID*, never the bytes |
|
||||
| **Cache** (ephemeral, evictable) | **out of scope** | not persistence — different lifecycle (Redis-shaped) |
|
||||
| **Ad-hoc relational query** | the subsystem, over a projected read model | the log is bad at "all orders by X in March"; project into a queryable kv/SQL backend |
|
||||
|
||||
So: persist is the **single durable substrate** for state that's either a stream of
|
||||
changes or a current value — but it does **not** force everything into an event
|
||||
log, it does **not** hold blobs (only their content-addressed refs), and it does
|
||||
**not** do caching. Those boundaries are the whole point of calling it a substrate
|
||||
rather than "the database."
|
||||
|
||||
End-state: `log` (append/read streams) + `kv` (get/put/delete by key) facets, an
|
||||
injectable backend protocol (mem → file → Postgres → IPFS-ref), pure projections
|
||||
with incremental snapshots, optimistic concurrency, and a subscription hook so
|
||||
read models (feeds, indices, audit logs) update incrementally.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/persist/conformance.sh` → **201/201** (Phases 1–4 complete + extensions + a reference migration)
|
||||
|
||||
## Ground rules
|
||||
|
||||
- **Scope:** only `lib/persist/**` and `plans/persist-on-sx.md`. May **import** the
|
||||
kernel's IO-suspension surface (`perform`, platform IO ops) — verify what's
|
||||
exported first. Do not add host primitives; a missing durable IO op is a Blockers
|
||||
entry (it belongs in `hosts/`, out of scope).
|
||||
- **Architecture:** an event is `{:stream :seq :type :at :data}`; the log is an
|
||||
ordered append-only vector; a projection is `(fold step seed events)`; a kv value
|
||||
is `(get/put/delete key)`. Both facets sit on one injected backend
|
||||
`{:append :read :kv-get :kv-put :snapshot-read :snapshot-write}`. The in-memory
|
||||
backend is the test default; real backends wire in unchanged.
|
||||
- **Determinism:** replay is pure — same log → same state, always. No clocks or
|
||||
randomness inside projections; time lives on the event.
|
||||
- **Blobs:** store the content-address/CID and metadata; never the bytes. The blob
|
||||
backend is a separate injected dependency.
|
||||
- **Commits:** one feature per commit. Progress log + tick boxes.
|
||||
|
||||
## Architecture sketch
|
||||
|
||||
```
|
||||
Command / write Read model / value
|
||||
(append stream type data) (project stream step seed)
|
||||
(kv-put key value) (kv-get key)
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/persist/event.sx lib/persist/project.sx
|
||||
— {:stream :seq :type :at :data} — fold step seed; incremental from snapshot
|
||||
│ ▲
|
||||
▼ │
|
||||
lib/persist/log.sx lib/persist/kv.sx lib/persist/snapshot.sx
|
||||
— append/read — get/put/delete — checkpoint; replay = snapshot + tail
|
||||
— optimistic seq — current-state
|
||||
│ │ ▲
|
||||
└──────────────────┴── (perform → backend) ───┘
|
||||
│
|
||||
lib/persist/backend.sx lib/persist/api.sx
|
||||
— injected protocol — (persist/append) (persist/project)
|
||||
— mem | file | pg | ipfs-ref — (persist/kv-get/put) (persist/subscribe)
|
||||
│
|
||||
└── blobs → content-addressed store (artdag/IPFS), by reference only
|
||||
```
|
||||
|
||||
## Phase 1 — Log + kv + in-memory backend
|
||||
- [x] `event.sx` — event record, stream/seq helpers
|
||||
- [x] `backend.sx` — injectable protocol + in-memory impl (log + kv)
|
||||
- [x] `log.sx` — `append` (optimistic seq), `read`, `read-from`
|
||||
- [x] `kv.sx` — `get`/`put`/`delete` current-state
|
||||
- [x] `api.sx` + tests + scoreboard + conformance.sh
|
||||
|
||||
## Phase 2 — Projections + subscriptions
|
||||
- [x] `project.sx` — `(project stream step seed)`, incremental fold
|
||||
- [x] subscription hook — projection / kv read model re-runs on append
|
||||
- [x] concurrency conflict surfaced as a real result, not a crash
|
||||
|
||||
## Phase 3 — Snapshots + replay
|
||||
- [x] `snapshot.sx` — checkpoint a projection; replay = snapshot + tail
|
||||
- [x] compaction policy; replay-determinism tests
|
||||
|
||||
## Phase 4 — Durable backends via kernel IO
|
||||
- [x] file/log backend driven through `perform` (IO-suspension boundary)
|
||||
- [x] blob backend interface (store ref/CID; bytes live in artdag/IPFS)
|
||||
- [x] crash/restart replay test (mock IO platform)
|
||||
- [x] migration notes for swapping mem → durable under a live subsystem
|
||||
|
||||
### Migration notes — mem → durable under a live subsystem
|
||||
|
||||
The facet API takes the backend as its first argument and never names a concrete
|
||||
backend, so swapping storage is a one-line change at the open site:
|
||||
|
||||
```
|
||||
(persist/open) ; in-memory (test / ephemeral)
|
||||
(persist/mock-durable (persist/mem-backend)); durable protocol, in-process disk
|
||||
(persist/durable-backend) ; production: ops cross perform → host
|
||||
```
|
||||
|
||||
Everything above the backend — `append`/`read`/`project`/`subscribe`/`snapshot`
|
||||
/`compact` — is byte-identical across all three. A subsystem migrates by:
|
||||
|
||||
1. **Pick the seam.** The subsystem holds one backend value (today an in-memory
|
||||
list). Replace its construction with `persist/open`/`durable-backend`; leave
|
||||
every call site untouched.
|
||||
2. **Backfill.** For an existing in-memory store, replay its current state into
|
||||
the durable backend once (append historical events / `kv-put` current
|
||||
values) before cutting reads over. New writes go to durable from then on.
|
||||
3. **Read models rebuild themselves.** A projection is pure `(fold step seed)`;
|
||||
after cutover, `persist/replay` (snapshot + tail) reconstructs every read
|
||||
model from the durable log — no bespoke migration of derived state.
|
||||
4. **Blobs first, by reference.** Move large payloads into the content store and
|
||||
store only `persist/blob-ref`s; the log/kv stay small, so the backfill in (2)
|
||||
never copies bytes.
|
||||
5. **Concurrency is already handled.** Two writers racing a stream get a
|
||||
`persist/conflict?` result, not corruption — the same on mem or durable, so
|
||||
no new code is needed at cutover.
|
||||
|
||||
The only behavioural difference durable introduces is that each op crosses the
|
||||
kernel IO-suspension boundary (`perform`): under the real kernel the call
|
||||
suspends and the host resumes it transparently, so the facet code is unaware.
|
||||
Tests prove this by routing the identical request shapes through `persist/serve`
|
||||
over an in-process disk (the mock-IO harness).
|
||||
|
||||
## Extensions (post-roadmap)
|
||||
- [x] `view.sx` — materialized views: bundle stream + fold + snapshot name;
|
||||
`view-attach` keeps the snapshot current on every publish so `view-peek` is an
|
||||
O(1) read. The consumer-facing read-model abstraction (feed indices, audit
|
||||
rollups, search counters).
|
||||
|
||||
- [x] `kv.sx` CAS — `persist/kv-cas` (compare-and-swap) + `persist/kv-put-new`
|
||||
(create-only): atomic current-state updates, conflict as a real value (kv
|
||||
analogue of log `append-expect`). For sessions, acl grants, stock counts.
|
||||
|
||||
- [x] `catalog.sx` — stream catalog: `persist/streams`/`stream-count`/
|
||||
`stream-exists?`/`total-events`. Backend `:streams` op (from seq high-water
|
||||
marks, so compacted streams still list), threaded through mem + durable.
|
||||
|
||||
- [x] `query.sx` — read-side scans: `read-between` (seq range), `read-since`/
|
||||
`read-window` (by `:at`), `read-by-type`, `read-where`, `count-where`. Pure
|
||||
reads for audit windows / type filters / since-cursors.
|
||||
|
||||
- [x] `batch.sx` — `persist/append-batch` commits a list of `(type at data)`
|
||||
specs as one contiguous block; `persist/append-batch-expect` is transactional
|
||||
(all-or-nothing guarded by optimistic concurrency). For an order + its line
|
||||
items as one commit.
|
||||
|
||||
- [x] `upcast.sx` — event schema evolution: register a pure `(event -> event)`
|
||||
upcaster per type; `read-upcast`/`project-upcast` lift old events to the
|
||||
current shape on read so projections see one shape. Immutable registry;
|
||||
`upcast-data` helper merges new `:data` fields. Addresses the schema-evolution
|
||||
trap without rewriting history.
|
||||
|
||||
- [x] `idempotency.sx` — exactly-once append under retries: `persist/append-once`
|
||||
keyed by a caller idempotency key (per stream), returning the same event on a
|
||||
repeat. Marker lives in kv, so idempotency holds across restart. `seen?` check.
|
||||
|
||||
- [x] `global.sx` — global commit ordering across streams (the primitive feed's
|
||||
unified timeline needs). `persist/gappend` records a pointer in a reserved
|
||||
`$global` index whose seq is the commit position; `read-global`/
|
||||
`project-global` replay every event in commit order; `global-from` for
|
||||
incremental consumers. Opt-in (plain `append` never touches it); reserved
|
||||
index hidden from the public catalog. Deterministic across restart.
|
||||
|
||||
## Consumers (post-foundation, not in scope here)
|
||||
feed/-log, flow store, mod/audit, search index, acl grants, identity sessions all
|
||||
become `persist` log or kv. Track each migration in that subsystem's plan.
|
||||
|
||||
**Reference migration:** `lib/persist/examples/acl.sx` is a worked, tested
|
||||
template — an ACL-grants store rebuilt on persist (grants/revokes as events,
|
||||
current set as a projection, O(1) checks via a materialized view, an audit-window
|
||||
query). It carries an explicit BEFORE (hand-rolled ephemeral map) → AFTER
|
||||
diff in its header and proves the headline win (grants survive restart) on the
|
||||
durable backend. Other subsystem loops copy this pattern; it does not touch the
|
||||
real `lib/acl`.
|
||||
|
||||
## Progress log
|
||||
- **Reference migration: acl grants (201/201).** `lib/persist/examples/acl.sx` —
|
||||
a worked, in-scope template migrating an ACL-grants store from a hand-rolled
|
||||
ephemeral map to persist: grants/revokes as events, current set as a
|
||||
projection, O(1) checks via a materialized view, audit via `read-window`.
|
||||
Header carries the BEFORE→AFTER diff. 10 tests, incl. grants surviving restart
|
||||
on the durable backend (the capability the BEFORE version lacked). The pattern
|
||||
other subsystem loops copy.
|
||||
- **Ext: global commit ordering (191/191).** `global.sx` — `persist/gappend`
|
||||
records a pointer in a reserved `$global` index (its seq = global commit
|
||||
position); `read-global`/`project-global` resolve pointers to events in commit
|
||||
order; `global-from` for incremental global consumers. Opt-in; `$`-streams are
|
||||
now reserved + hidden from the public catalog (`streams-all` reveals them).
|
||||
Gives feed its cross-stream timeline. 11 tests incl. durable + restart
|
||||
determinism.
|
||||
- **Ext: exactly-once append (180/180).** `idempotency.sx` —
|
||||
`persist/append-once` appends at most once per (stream, idempotency key),
|
||||
returning the same event on a repeat; the marker lives in kv so it survives
|
||||
restart (verified on durable). `persist/seen?` check. 9 tests.
|
||||
- **Ext: event schema evolution (171/171).** `upcast.sx` — per-type pure
|
||||
`(event -> event)` upcasters in an immutable registry; `read-upcast`/
|
||||
`project-upcast` lift legacy events to the current shape on read so
|
||||
projections never branch on version. `upcast-data` merges new `:data` fields
|
||||
keeping stream/seq/type/at. 9 tests incl. mixed old/new + durable.
|
||||
- **Ext: atomic batch append (162/162).** `batch.sx` — `persist/append-batch`
|
||||
commits `(type at data)` specs as one contiguous block (real cons-list, in
|
||||
order); `persist/append-batch-expect` checks the stream is still at expected
|
||||
before writing any event, so the batch is all-or-nothing under a concurrent
|
||||
writer. 10 tests incl. conflict-writes-nothing + durable.
|
||||
- **Ext: read-side query helpers (152/152).** `query.sx` — `read-between` (seq
|
||||
range), `read-since`/`read-window` (by `:at`), `read-by-type`, `read-where`,
|
||||
`count-where`. Pure scans over `persist/read`; for ad-hoc relational queries
|
||||
consumers still project into a kv read model. 9 tests incl. durable.
|
||||
- **Ext: stream catalog (143/143).** New backend op `:streams` (keys of the seq
|
||||
high-water-mark dict, threaded through mem-backend + durable serve/io-backend)
|
||||
so fully-compacted streams still enumerate. `catalog.sx`:
|
||||
`persist/streams`/`stream-count`/`stream-exists?`/`total-events`. 10 tests
|
||||
incl. durable + restart.
|
||||
- **Ext: kv compare-and-swap (133/133).** `persist/kv-cas` sets a key only if
|
||||
its current value equals expected, else returns `{:conflict :expected
|
||||
:actual}`; `persist/kv-put-new` is create-only. The kv analogue of log
|
||||
`append-expect` — atomic current-state for sessions/acl/stock. 11 tests incl.
|
||||
racer + retry + durable backend.
|
||||
- **Ext: materialized views (122/122).** `view.sx` — `persist/view` bundles
|
||||
stream + step + seed + snapshot name; `view-attach` subscribes it to a hub so
|
||||
every publish refreshes the snapshot incrementally; `view-peek` is then an
|
||||
O(1) current read (no fold), `view-value` always folds the tail so it's never
|
||||
stale. 11 tests incl. on durable backend + a sum-over-data view.
|
||||
- **Phase 4c+4d (111/111) — Phase 4 complete, roadmap done.** `recovery.sx` — a
|
||||
6-test crash/restart integration: an order ledger (event log + subscription
|
||||
kv read model + snapshot + compaction + invoice blob ref) over the durable
|
||||
backend, where "crash" drops every in-process object and "restart" rebuilds
|
||||
over the same disk + content store. Log, read model, snapshot, compacted
|
||||
replay, and blob ref all survive; seq continues; two restarts converge
|
||||
(determinism). Migration notes (mem → durable under a live subsystem) added
|
||||
inline above.
|
||||
- **Phase 4b (105/105).** `blob.sx` — large objects stay out of persist. A blob
|
||||
ref is `{:cid :size :mime}`; the blob store is a SEPARATE injected dependency
|
||||
(`persist/blob-io` over an injectable transport, perform in prod / mock
|
||||
content store in tests). `persist/blob-store` puts bytes and returns ONLY the
|
||||
ref; `persist/blob-fetch` retrieves bytes via the ref. Mock store is
|
||||
content-addressed (same bytes dedupe). 14 tests assert the invariant: a ref in
|
||||
the log/kv carries the CID, never the bytes (`has-key? :bytes` is false).
|
||||
- **Phase 4a (91/91).** `durable.sx` — a backend whose every op crosses the
|
||||
kernel IO boundary via `(perform {:op "persist/..." :args (...)})`. The
|
||||
transport is injectable: `persist/durable-backend` uses the kernel's
|
||||
`perform` (suspends; host resumes); `persist/mock-durable` uses
|
||||
`persist/serve` over an in-memory disk. `persist/serve` is the reference host
|
||||
+ the mock-IO harness. Because the request shapes are identical, the ENTIRE
|
||||
facet stack (log/kv/project/snapshot/compaction) runs unchanged on
|
||||
mock-durable — verified. Crash/restart (drop backend, keep disk) recovers log
|
||||
+ kv + snapshot by replay; seq counter continues. 15 tests. See Blockers for
|
||||
why end-to-end perform suspension isn't exercised under sx_server.exe.
|
||||
- **Phase 3b (76/76) — Phase 3 complete.** Backend refactor: `last-seq` is now
|
||||
a monotonic per-stream high-water mark (backend `seqs` dict), not physical
|
||||
length, so a compacted log keeps assigning climbing seqs. Added backend
|
||||
`:truncate-through` + `persist/truncate`. `compaction.sx` — `persist/compact`
|
||||
checkpoints then drops events with seq <= snapshot seq; `should-compact?`/
|
||||
`maybe-compact` give an explicit "compact every N tail events" policy. 11
|
||||
tests: post-compaction replay value == uncompacted full replay (determinism),
|
||||
seq continuity after truncation, idempotence. `persist/count` = physical
|
||||
stored count (shrinks on compaction) vs `persist/last-seq` = logical.
|
||||
- **Phase 3a (65/65).** `snapshot.sx` — a snapshot is a projection state
|
||||
`{:value :seq}` stored in the kv facet under `snapshot/<name>`.
|
||||
`persist/checkpoint` replays + saves; `persist/replay` = snapshot + tail.
|
||||
11 tests assert the headline both ways: snapshot+tail == full replay (value
|
||||
and whole state), plus replay determinism.
|
||||
- **Phase 2c (54/54) — Phase 2 complete.** `concurrency.sx` — optimistic
|
||||
concurrency: `persist/append-expect b stream expected ...` refuses the append
|
||||
if the stream advanced past `expected`, returning a conflict VALUE
|
||||
`{:conflict true :expected :actual}` (never a crash, never a silent
|
||||
overwrite). `persist/conflict?` + accessors; caller re-reads actual and
|
||||
retries. 8 tests incl. two-writer race + retry.
|
||||
- **Phase 2b (46/46).** `subscribe.sx` — `persist/hub` wraps a backend with
|
||||
per-stream callbacks. `persist/publish` appends then fires subscribers
|
||||
`(backend stream event)`; direct `persist/append` bypasses them by design
|
||||
(bulk load/replay). Canonical use: callback re-runs `project-resume` or bumps
|
||||
a kv counter so read models update on write. 9 tests.
|
||||
- **Phase 2a (37/37).** `project.sx` — projection state `{:value :seq}`;
|
||||
`persist/project` folds whole stream from seed, `persist/project-resume`
|
||||
folds only the tail (seq > prior seq) so read models update incrementally.
|
||||
step is pure `(value event) -> value`. 9 tests incl. resume==full-from-zero.
|
||||
- **Phase 1 complete (28/28).** `event.sx` (event record + accessors),
|
||||
`backend.sx` (injectable protocol + in-memory log/kv impl, closure state via
|
||||
set!), `log.sx` (append/read/read-from, sequential per-stream seq, stream
|
||||
isolation), `kv.sx` (get/put/delete/has?/keys/get-or/update), `api.sx`
|
||||
(`persist/open` — mem default, backend injectable). conformance.sh + three
|
||||
suites (event/log/kv). Gotcha logged in Blockers: `map` returns an
|
||||
array-backed list not `equal?` to a `(list ...)` literal — assertions build
|
||||
compared lists with list/nth.
|
||||
|
||||
## Blockers
|
||||
|
||||
### OPEN — host durable-storage adapter (the only gap to real durability)
|
||||
|
||||
**Owner:** a `hosts/` loop (NOT this one — `lib/persist/**` is the scope fence,
|
||||
and `sx_build` is forbidden here). **Without it, durable persistence silently
|
||||
drops all writes.**
|
||||
|
||||
**Symptom / minimal repro.** `persist/durable-backend` performs
|
||||
`{:op "persist/..." :args (...)}` for every storage op. Under `sx_server.exe`
|
||||
the kernel's default IO resolver answers unknown ops with `nil` — so the durable
|
||||
backend does not error, it *silently no-ops*:
|
||||
|
||||
```
|
||||
; load event/backend/log/durable, then:
|
||||
(let ((b (persist/durable-backend)))
|
||||
(begin (persist/append b "s" "x" 0 {})
|
||||
(persist/append b "s" "x" 0 {})
|
||||
(list (persist/event-seq (persist/append b "s" "x" 0 {}))
|
||||
(persist/count b "s")
|
||||
(persist/read b "s"))))
|
||||
; => (1 0 nil) ; every append gets seq 1, nothing stored, reads empty — DATA LOSS
|
||||
```
|
||||
|
||||
The in-memory backend (`persist/open`) is correct and complete; this gap is
|
||||
*only* the production transport.
|
||||
|
||||
**What to build.** A host servicer that answers the `persist/*` IO ops against a
|
||||
real store (sqlite/files/pg). It is the production twin of `persist/serve`
|
||||
(`lib/persist/durable.sx`) — same op names, same request/response shapes — so
|
||||
mirror that function and back it with durable storage instead of a mem-backend.
|
||||
|
||||
**Op contract** (request `{:op :args}` → response). `args` is a positional list;
|
||||
events are dicts `{:stream :seq :type :at :data}`:
|
||||
|
||||
| op | args | returns | semantics |
|
||||
|----|------|---------|-----------|
|
||||
| `persist/append` | `(stream event)` | (ignored) | store `event` in `stream` |
|
||||
| `persist/read` | `(stream)` | event list (oldest-first) | currently-stored events |
|
||||
| `persist/last-seq` | `(stream)` | number | **monotonic high-water mark** (see below) |
|
||||
| `persist/streams` | `()` | stream-name list | every stream ever appended to |
|
||||
| `persist/truncate` | `(stream n)` | (ignored) | drop events with `seq <= n` |
|
||||
| `persist/kv-get` | `(key)` | value or nil | |
|
||||
| `persist/kv-put` | `(key val)` | (ignored) | upsert |
|
||||
| `persist/kv-delete`| `(key)` | (ignored) | remove key |
|
||||
| `persist/kv-has?` | `(key)` | boolean | |
|
||||
| `persist/kv-keys` | `()` | key list | |
|
||||
|
||||
**Hard invariants** (the facets above rely on these; mem-backend + `persist/serve`
|
||||
are the reference):
|
||||
1. **`last-seq` is a per-stream monotonic counter, NOT the row count.** It must
|
||||
keep climbing after `truncate`, so a compacted stream never reassigns a seq.
|
||||
Store the counter separately from the rows.
|
||||
2. `append` is the only seq-assigner upstream (`log.sx` does `last-seq + 1`); the
|
||||
host must not renumber.
|
||||
3. `read` returns events in append order with `:seq` intact (post-truncate it
|
||||
returns only the surviving tail).
|
||||
4. `streams` is the set of streams that ever had an append (survives full
|
||||
compaction) — keep it keyed off the seq counters, like mem-backend's `seqs`.
|
||||
5. Values round-trip structurally: dicts/lists/numbers/strings/nil/booleans in =
|
||||
same out (event `:data`, kv values, blob refs).
|
||||
|
||||
**Blobs** are a *separate* adapter with the same pattern: ops `blob/put`
|
||||
`(bytes mime)` → cid, `blob/get` `(cid)` → bytes, `blob/has?` `(cid)` → bool
|
||||
(see `lib/persist/blob.sx` / `persist/blob-serve`). Back it with the
|
||||
content-addressed store (artdag/IPFS); persist only ever stores the returned ref.
|
||||
|
||||
**Where to register.** `hosts/ocaml/bin/sx_server.ml`:
|
||||
- the in-process resolver `Sx_types._cek_io_resolver` (~line 3864) — add a
|
||||
`"persist/..."` match arm dispatching to the new storage module (used by
|
||||
SSR/`eval_with_io`); and/or
|
||||
- the bridge path in `cek_run_with_io` (~line 528–576), which currently forwards
|
||||
unknown ops via `io_request op args` to the external bridge — a Python-bridge
|
||||
handler is the alternative home if storage lives Python-side.
|
||||
Pick one home; the op names are the contract, not the location.
|
||||
|
||||
**Acceptance test.** Swap the transport: point a `persist/io-backend` at the new
|
||||
host servicer (instead of `persist/serve` over a mem disk) and run the existing
|
||||
`durable` + `recovery` suites — they must stay green, and state must survive an
|
||||
actual process restart (kill the server, restart, replay → recovered). That is
|
||||
exactly what `lib/persist/tests/durable.sx` and `recovery.sx` already assert
|
||||
against the mock; the host adapter just makes the disk real.
|
||||
|
||||
---
|
||||
|
||||
- **Phase 4 perform-suspension not exercised end-to-end under sx_server.exe (by
|
||||
design, not a bug).** The CEK suspension primitives (`cek-step-loop`,
|
||||
`cek-resume`, `cek-suspended?`, `cek-io-request`) and a settable SX-level IO
|
||||
hook are only bound by the `run_tests` OCaml binary (out of scope: hosts/, and
|
||||
sx_build is forbidden). Under `sx_server.exe`, an unhandled `perform` resolves
|
||||
through the OCaml io-request/io-response stdin bridge (production path) — not
|
||||
callable from the pure-eval conformance harness. Resolution: the durable
|
||||
backend's transport is injectable, so the production path is one line
|
||||
`(perform req)` (kernel-handled) and ALL durable logic is tested through the
|
||||
mock transport (`persist/serve` over an in-memory disk). The single untested
|
||||
line is the kernel primitive itself. No host primitive needed; nothing to fix.
|
||||
- **Not a blocker, a testing convention:** `map` returns an array-backed list
|
||||
that is NOT `equal?` to a `(list ...)` cons-literal (two `map` results do
|
||||
compare equal to each other). When asserting list-shaped results against a
|
||||
`(list ...)` literal, build the compared value with `list`/`nth`/`cons`, not
|
||||
`map`. `into`/list-coercion needs the IO bridge and is unusable in the
|
||||
pure-eval harness.
|
||||
@@ -10,7 +10,7 @@ extension that merges per-peer indices.
|
||||
|
||||
## Status (rolling)
|
||||
|
||||
`bash lib/search/conformance.sh` → **0/0** (not yet started)
|
||||
`bash lib/search/conformance.sh` → **122/122** (Phases 1–4 complete)
|
||||
|
||||
## Ground rules
|
||||
|
||||
@@ -61,46 +61,148 @@ lib/search/index.sx lib/search/eval.sx
|
||||
|
||||
## Phase 1 — Tokenize + index
|
||||
|
||||
- [ ] `lib/search/tokenize.sx` — normalize (lowercase, strip punctuation), split on
|
||||
- [x] `lib/search/tokenize.sx` — normalize (lowercase, strip punctuation), split on
|
||||
whitespace, return positions
|
||||
- [ ] `lib/search/index.sx` — inverted index data structure (typed `Map` from
|
||||
haskell lib); `insert`, `delete`, `lookup`
|
||||
- [ ] `lib/search/api.sx` — `(search/index doc)`, `(search/lookup term)`
|
||||
- [ ] `lib/search/tests/index.sx` — 15+ cases: tokenize, insert + lookup, update,
|
||||
delete, multi-doc
|
||||
- [ ] `lib/search/scoreboard.{json,md}`
|
||||
- [ ] `lib/search/conformance.sh`
|
||||
- [x] `lib/search/index.sx` — inverted index data structure; `indexDoc`, `deleteDoc`,
|
||||
`lookupTerm`, `docFreq`, `allTerms`. (Data.Map's public API lacks
|
||||
toList/keys/map/filter, so a sorted assoc-list `[(Term,[(DocId,[Pos])])]` is used —
|
||||
the conceptual `Map Term [(DocId,[Pos])]` with free term iteration.)
|
||||
- [x] `lib/search/api.sx` — assembles `search/src` (tokenize + index); Haskell entry
|
||||
points `indexDoc` / `lookupTerm`
|
||||
- [x] `lib/search/tests/index.sx` — 18 cases: tokenize, insert + lookup, update,
|
||||
delete, multi-doc, positions, docFreq, allTerms
|
||||
- [x] `lib/search/scoreboard.{json,md}`
|
||||
- [x] `lib/search/conformance.sh`
|
||||
|
||||
## Phase 2 — Query AST + boolean evaluation
|
||||
|
||||
- [ ] Query ADT: `Term Text | And Query Query | Or Query Query | Not Query |
|
||||
Phrase [Text]`
|
||||
- [ ] `lib/search/parse.sx` — query syntax parser (boolean operators, quoted phrases)
|
||||
- [ ] `lib/search/eval.sx` — boolean eval via set ops on posting lists
|
||||
- [ ] phrase eval — adjacency check using positions
|
||||
- [ ] `lib/search/tests/boolean.sx` — 25+ cases: term, and, or, not, phrase,
|
||||
composition, parser edge cases
|
||||
- [x] Query ADT: `Term String | And Query Query | Or Query Query | Not Query |
|
||||
Phrase [String]` (in `lib/search/query.sx`)
|
||||
- [x] `lib/search/parse.sx` — query syntax parser: tokenizer + recursive-descent
|
||||
(OR < AND < NOT precedence, implicit AND on adjacency, quoted phrases, parens,
|
||||
case-insensitive keywords); `parseQuery`, `searchQuery`, `showQ`
|
||||
- [x] `lib/search/query.sx` — boolean eval via set ops on docid-sorted posting lists
|
||||
(sortedUnion/Inter/Diff, Not over allDocs universe)
|
||||
- [x] phrase eval — positional adjacency check (phraseInDoc / phraseStartsAt)
|
||||
- [x] `lib/search/tests/boolean.sx` — 28 cases: term, and, or, not, phrase,
|
||||
composition (parser edge cases move to the parse.sx suite)
|
||||
|
||||
## Phase 3 — Ranking
|
||||
|
||||
- [ ] document frequency tracking — extend index with `df` per term
|
||||
- [ ] TF-IDF scoring
|
||||
- [ ] BM25 scoring (configurable k1, b)
|
||||
- [ ] top-N retrieval (heap-based)
|
||||
- [ ] `lib/search/tests/rank.sx` — 20+ cases: TF-IDF behavior, BM25 vs TF-IDF,
|
||||
ranking stability, top-N correctness
|
||||
- [x] document frequency — `docFreq`/`idf`/`bm25idf` derived from the index
|
||||
(posting-list length); no separate df store needed
|
||||
- [x] TF-IDF scoring (`rankTfIdf`)
|
||||
- [x] BM25 scoring, configurable k1/b (`rankBm25 k1 b`)
|
||||
- [x] top-N retrieval (`topNTfIdf`/`topNBm25` — sortBy + take; stable DocId tiebreak)
|
||||
- [x] `lib/search/tests/rank.sx` — 23 cases: TF-IDF tf/idf behavior, BM25 length-norm
|
||||
+ tf-saturation flips vs TF-IDF, b-parameter effect, tiebreak stability, top-N
|
||||
|
||||
## Phase 4 — ACL filter + federation
|
||||
|
||||
- [ ] post-filter — each candidate result tested via `(acl/permit? viewer :read doc)`
|
||||
- [ ] federated query — fan out to peer instances via fed-sx, merge results
|
||||
- [ ] merge policy — interleave by rank, dedupe by `(peer, doc-id)`
|
||||
- [ ] `lib/search/tests/integration.sx` — federated search with ACL filter
|
||||
- [x] post-filter — `aclFilter`/`searchTfIdfAcl`/`topNTfIdfAcl`/`searchBm25Acl` take an
|
||||
injected `permit :: DocId -> Bool` predicate, applied post-rank (never in the index)
|
||||
- [x] federated query — `fedIndex :: [(PeerId, Index)] -> Index` merges per-peer
|
||||
inverted indices (union posting lists per term); rank/search run once over the merge
|
||||
- [x] merge policy — relabel local DocIds to global `gid = peer*1000 + local`
|
||||
(bijection ⇒ dedupe by (peer,doc-id) is automatic); ranking interleaves peers by score
|
||||
- [x] `lib/search/tests/integration.sx` — 21 cases: index merge, cross-peer df/lookup,
|
||||
position preservation, boolean/phrase over the merge, ACL filter + top-N + bm25
|
||||
|
||||
## Extensions (post-roadmap, search-shaped vocabulary)
|
||||
|
||||
- [x] prefix / wildcard queries (`prefixTerms`, `prefixDocs`, `prefixRankTfIdf`) — 14 tests
|
||||
- [x] fuzzy matching — edit distance term expansion (`editDist`, `fuzzyTerms`,
|
||||
`fuzzyDocs`, `fuzzyRankTfIdf`) — 18 tests
|
||||
- [x] result pagination (offset / limit) — `paginate`, `pageTfIdf`, `pageBm25`,
|
||||
`resultCount` — 12 tests
|
||||
- [x] snippet / highlight generation (`highlight`, `snippet`) — 12 tests
|
||||
- [x] stemming (suffix stripping) — `stem`, `stemText`, `stemTokens`, `indexStemmed`
|
||||
— 18 tests
|
||||
- [x] proximity / NEAR — `nearDocs k t1 t2` (unordered, within k positions) — 9 tests
|
||||
- [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests
|
||||
- [x] boolean-filtered ranked search — `queryTerms`, `searchRankTfIdf`,
|
||||
`searchRankBm25` (filter by boolean query, rank survivors by relevance) — 11 tests
|
||||
- [x] did-you-mean / spelling suggestion — `suggest`, `suggestN` (closest indexed
|
||||
terms by edit distance, alphabetical tiebreak) — 9 tests
|
||||
|
||||
## Progress log
|
||||
|
||||
(loop fills this in)
|
||||
- **Extension: did-you-mean / spelling suggestion (234/234 total).** `suggest`/`suggestN`
|
||||
rank indexed terms by edit distance to a (misspelled) query term, alphabetical
|
||||
tiebreak. 9 tests.
|
||||
- **Extension: boolean-filtered ranked search (225/225 total).** `searchRankTfIdf`/
|
||||
`searchRankBm25` parse a boolean query, filter docs via evalQuery, then rank the
|
||||
survivors by relevance over the query's leaf terms (`queryTerms`) — the real-world
|
||||
filter-then-rank pattern. 11 tests.
|
||||
- **Extension: synonyms/query expansion (214/214 total).** A synonym map
|
||||
`[(Term,[Term])]` expands a query term to itself + synonyms (`expandTerm`); `synDocs`
|
||||
unions, `synRankTfIdf` ranks the expanded set. 9 tests.
|
||||
- **Extension: proximity/NEAR (205/205 total).** `nearDocs k t1 t2 idx` returns docs
|
||||
where both terms occur within k positions (unordered), candidates = posting
|
||||
intersection, filtered on the positional postings. 9 tests.
|
||||
- **Extension: stemming (196/196 total).** Deterministic English suffix stripping
|
||||
(`stem`), `stemText`/`stemTokens`, `indexStemmed`. Two haskell-on-sx gotchas: take/drop
|
||||
over a String yield char CODES not char strings (rebuild via `joinChars . map chr`),
|
||||
and isSuffixOf's `reverse` trips `++` on the String repr (manual suffix compare). All
|
||||
five planned extensions now done; the loop can keep adding search vocabulary. 18 tests.
|
||||
- **Extension: highlight/snippet (178/178 total).** `highlight terms text` marks
|
||||
query-matching (normalized) tokens with [..]; `snippet ctx terms text` extracts a
|
||||
context window around the first match. 12 tests.
|
||||
- **Extension: fuzzy matching (166/166 total).** Levenshtein `editDist` as an O(m*n)
|
||||
row-based DP (the naive recursive version is exponential and times out under load),
|
||||
`fuzzyTerms`/`fuzzyDocs`/`fuzzyRankTfIdf` expand a term to indexed terms within a max
|
||||
edit distance. 18 tests.
|
||||
- **Extension: pagination (148/148 total).** `paginate off lim` windows a ranked list
|
||||
(take lim . drop off); `pageTfIdf`/`pageBm25` + `resultCount`. 12 tests. Note the
|
||||
full conformance now runs 8 suites sequentially and needs an overall timeout ~1900s
|
||||
under the heavy box load.
|
||||
- **Extension: prefix/wildcard queries (136/136 total).** `prefixTerms` matches every
|
||||
indexed term starting with a prefix (via allTerms + isPrefixOf); `prefixDocs` unions
|
||||
their docs; `prefixRankTfIdf` ranks treating the matched terms as the query. 14 tests.
|
||||
- **Phase 4 complete — federation + ACL (122/122 total). Roadmap done.** `fedIndex`
|
||||
merges per-peer inverted indices (union posting lists per term) after relabelling
|
||||
local DocIds to global `gid = peer*1000 + local` — the bijection makes (peer,doc-id)
|
||||
dedupe automatic and keeps positions, so ranking runs once over the merge and
|
||||
interleaves peers by score (rank-correct). ACL is a post-rank `filter` over an
|
||||
injected `permit :: DocId -> Bool` (viewer baked in by the caller) — never in the
|
||||
index; `searchTfIdfAcl`/`topNTfIdfAcl`/`searchBm25Acl`. 21 integration tests.
|
||||
- **Phase 3 complete — ranking (101/101 total).** TF-IDF (`rankTfIdf`) and BM25
|
||||
(`rankBm25 k1 b`) over the candidate set (docs containing any query term), scores
|
||||
as floats with deterministic DocId-ascending tiebreak; `topNTfIdf`/`topNBm25` via
|
||||
sortBy+take. df/idf derived from posting-list length (no separate df store). 23
|
||||
tests incl. a BM25-vs-TF-IDF flip (length-norm + tf-saturation) and the b-parameter
|
||||
effect. Float division/`log`/float literals all work in haskell-on-sx.
|
||||
- **Phase 2 complete — parser (78/78 total).** Query tokenizer (ord-based
|
||||
delimiters, quoted phrases) + recursive-descent parser with OR<AND<NOT precedence,
|
||||
implicit AND on adjacency, parens, case-insensitive keywords. `parseQuery`,
|
||||
`searchQuery`, `showQ` (canonical render for AST tests). 32 tests in parse.sx.
|
||||
**haskell-on-sx parser gotchas hit while writing this (see parse.sx header):**
|
||||
(1) escaped char literals like `'\"'` break the tokenizer — match delimiters by
|
||||
`ord c == 34`; (2) an `[]` *pattern* inside a `case` alt breaks the parser — use
|
||||
multi-clause functions instead; (3) `case`/constructor patterns and `let (a,b)=..`
|
||||
are fine. Embedded Haskell string literals in a `.sx` source string need single
|
||||
`\"`, not `\\\"`.
|
||||
- **Phase 2 boolean/phrase eval (46/46 total).** Query ADT
|
||||
`Term|And|Or|Not|Phrase` + `evalQuery :: Index -> Query -> [DocId]` in query.sx.
|
||||
Boolean ops are linear merges over docid-sorted posting lists; Not subtracts from
|
||||
the allDocs universe; Phrase checks positional adjacency. 28 tests in boolean.sx.
|
||||
Refactored both suites to **batch all cases into one program eval** (search-batch
|
||||
in testlib) — under the heavy CPU load on this box (~11 on 2 cores), 18–28 separate
|
||||
hk-eval-program calls timed out; one combined eval per suite is ~20× faster.
|
||||
Parser (parse.sx) is the remaining Phase 2 box.
|
||||
- **Phase 1 complete (18/18).** Tokenizer (lowercase + strip punctuation + positions),
|
||||
inverted index as sorted assoc-list `[(Term,[(DocId,[Pos])])]`, indexDoc/deleteDoc/
|
||||
lookupTerm/docFreq/allTerms. Search lib is Haskell source assembled into `search/src`
|
||||
and evaluated via the haskell-on-sx interpreter; tests reuse `hk-test` counters and a
|
||||
`search-eval` helper that forces HK values to plain SX. conformance.sh models
|
||||
lib/haskell (MODE=counters, COUNTERS_PASS/FAIL=hk-test-pass/fail).
|
||||
|
||||
## Blockers
|
||||
|
||||
(loop fills this in)
|
||||
- **None.** Note: the box is heavily CPU-oversubscribed by sibling loop agents
|
||||
(load ~11 on 2 cores); each program eval is ~10× slower than nominal, so suite
|
||||
timeout is set to 600s. Runs are correct, just slow.
|
||||
- **Data.Map public API gap (informational, not fixing):** the haskell-on-sx
|
||||
`import Data.Map` binds only empty/singleton/insert/lookup/member/size/null/delete/
|
||||
insertWith/adjust/findWithDefault — no toList/keys/elems/map/filter/unionWith. Index
|
||||
uses a pure assoc-list instead so term iteration and federation merge stay simple.
|
||||
|
||||
Reference in New Issue
Block a user