diff --git a/lib/content/conformance.sh b/lib/content/conformance.sh index 8f43b843..1a7e20e0 100755 --- a/lib/content/conformance.sh +++ b/lib/content/conformance.sh @@ -15,7 +15,7 @@ if [ ! -x "$SX_SERVER" ]; then fi fi -SUITES=(block doc render api store) +SUITES=(block doc render api store crdt) OUT_JSON="lib/content/scoreboard.json" OUT_MD="lib/content/scoreboard.md" @@ -43,6 +43,7 @@ run_suite() { (load "lib/content/render.sx") (load "lib/content/api.sx") (load "lib/content/store.sx") +(load "lib/content/crdt.sx") (epoch 2) (eval "(define content-test-pass 0)") (eval "(define content-test-fail 0)") diff --git a/lib/content/crdt.sx b/lib/content/crdt.sx new file mode 100644 index 00000000..c48cc16f --- /dev/null +++ b/lib/content/crdt.sx @@ -0,0 +1,378 @@ +;; content-on-sx — collaborative merge (state-based CvRDT). +;; +;; The merge is a join (least upper bound) on a semilattice, so it is +;; commutative, associative and idempotent BY CONSTRUCTION — applying ops in any +;; order, or merging replicas in any order / twice, converges to the same +;; document. This is NOT last-write-wins-as-cop-out: ordering uses unique dense +;; position keys (Logoot), presence uses OR-tombstones (remove-wins), and each +;; field is an LWW-Register keyed by a logical (ts, actor) clock — an explicit, +;; deterministic per-field conflict policy. +;; +;; Every op (insert/update/delete) contributes a PARTIAL element; the per-id +;; state is the join of all contributions. So update-before-insert and +;; delete-before-insert are not lost — they merge when the rest arrives. +;; +;; Shapes: +;; state = {:elements element>} +;; element = {:id :pos :type :deleted :fields register>} +;; register = {:value v :ts :actor } +;; position = list of cells; cell = (list digit actor); lexicographic order +;; +;; Requires (loaded by harness): block.sx, doc.sx. + +(define CRDT-BASE 65536) + +;; ── position order (Logoot) ── +(define + crdt-cell-cmp + (fn + (c1 c2) + (let + ((d1 (first c1)) (d2 (first c2))) + (cond + ((< d1 d2) -1) + ((> d1 d2) 1) + (else + (let + ((a1 (first (rest c1))) (a2 (first (rest c2)))) + (cond + ((< a1 a2) -1) + ((> a1 a2) 1) + (else 0)))))))) + +(define + crdt-pos-compare + (fn + (p1 p2) + (cond + ((and (= (len p1) 0) (= (len p2) 0)) 0) + ((= (len p1) 0) -1) + ((= (len p2) 0) 1) + (else + (let + ((c (crdt-cell-cmp (first p1) (first p2)))) + (if (= c 0) (crdt-pos-compare (rest p1) (rest p2)) c)))))) + +;; single-cell position constructor (handy for explicit tests) +(define crdt-pos (fn (digit actor) (list (list digit actor)))) + +;; allocate a position strictly between left and right (nil = unbounded) +(define + cr-alloc + (fn + (left right actor i acc) + (let + ((ld (if (< i (len left)) (first (nth left i)) 0)) + (rd (if (< i (len right)) (first (nth right i)) CRDT-BASE))) + (if + (> (- rd ld) 1) + (append + acc + (list + (list + (+ + ld + (+ + 1 + (floor (/ (- (- rd ld) 1) 2)))) + actor))) + (cr-alloc + left + right + actor + (+ i 1) + (append + acc + (list + (list + ld + (if (< i (len left)) (first (rest (nth left i))) actor))))))))) + +(define + crdt-pos-between + (fn + (left right actor) + (cr-alloc + (if (= left nil) (list) left) + (if (= right nil) (list) right) + actor + 0 + (list)))) + +;; ── register (LWW by logical (ts, actor)) ── +(define + crdt-reg-max + (fn + (r1 r2) + (cond + ((= r1 nil) r2) + ((= r2 nil) r1) + (else + (let + ((t1 (get r1 :ts)) (t2 (get r2 :ts))) + (cond + ((> t1 t2) r1) + ((< t1 t2) r2) + (else (if (>= (get r1 :actor) (get r2 :actor)) r1 r2)))))))) + +;; ── small set/dict helpers ── +(define + crdt-member? + (fn + (x xs) + (cond + ((= (len xs) 0) false) + ((= (first xs) x) true) + (else (crdt-member? x (rest xs)))))) + +(define + crdt-dedup-loop + (fn + (xs seen) + (if + (= (len xs) 0) + (reverse seen) + (if + (crdt-member? (first xs) seen) + (crdt-dedup-loop (rest xs) seen) + (crdt-dedup-loop (rest xs) (cons (first xs) seen)))))) + +(define crdt-dedup (fn (xs) (crdt-dedup-loop xs (list)))) + +(define + crdt-union-keys + (fn (d1 d2) (crdt-dedup (append (keys d1) (keys d2))))) + +;; ── element join ── +(define + crdt-merge-pos + (fn + (p1 p2) + (cond + ((= p1 nil) p2) + ((= p2 nil) p1) + ((<= (crdt-pos-compare p1 p2) 0) p1) + (else p2)))) + +(define crdt-merge-type (fn (t1 t2) (if (= t1 nil) t2 t1))) + +(define + crdt-merge-fields-loop + (fn + (names f1 f2 acc) + (if + (= (len names) 0) + acc + (let + ((nm (first names))) + (crdt-merge-fields-loop + (rest names) + f1 + f2 + (assoc acc nm (crdt-reg-max (get f1 nm) (get f2 nm)))))))) + +(define + crdt-merge-fields + (fn + (f1 f2) + (crdt-merge-fields-loop (crdt-union-keys f1 f2) f1 f2 {}))) + +(define crdt-merge-element (fn (e1 e2) {:fields (crdt-merge-fields (get e1 :fields) (get e2 :fields)) :id (get e1 :id) :type (crdt-merge-type (get e1 :type) (get e2 :type)) :deleted (or (= (get e1 :deleted) true) (= (get e2 :deleted) true)) :pos (crdt-merge-pos (get e1 :pos) (get e2 :pos))})) + +;; ── state ── +(define crdt-empty (fn () {:elements {}})) + +(define + crdt-add-element + (fn + (state elem) + (let + ((elems (get state :elements)) (id (get elem :id))) + (let + ((existing (get elems id))) + (assoc + state + :elements (assoc + elems + id + (if (= existing nil) elem (crdt-merge-element existing elem)))))))) + +(define + crdt-build-fields-loop + (fn + (pairs ts actor acc) + (if + (= (len pairs) 0) + acc + (crdt-build-fields-loop + (rest pairs) + ts + actor + (assoc acc (first (first pairs)) {:ts ts :actor actor :value (first (rest (first pairs)))}))))) + +(define + crdt-build-fields + (fn (pairs ts actor) (crdt-build-fields-loop pairs ts actor {}))) + +;; ── ops as partial-element contributions ── +(define + crdt-insert + (fn + (state id type pos fields ts actor) + (crdt-add-element state {:fields (crdt-build-fields fields ts actor) :id id :type type :deleted false :pos pos}))) + +(define + crdt-update + (fn (state id fname value ts actor) (crdt-add-element state {:fields (assoc {} fname {:ts ts :actor actor :value value}) :id id :type nil :deleted false :pos nil}))) + +(define crdt-delete (fn (state id) (crdt-add-element state {:fields {} :id id :type nil :deleted true :pos nil}))) + +;; ── state merge (join) ── +(define + crdt-merge-loop + (fn + (ids ea eb acc) + (if + (= (len ids) 0) + acc + (let + ((id (first ids))) + (let + ((x (get ea id)) (y (get eb id))) + (crdt-merge-loop + (rest ids) + ea + eb + (assoc + acc + id + (cond + ((= x nil) y) + ((= y nil) x) + (else (crdt-merge-element x y)))))))))) + +(define crdt-merge (fn (a b) {:elements (crdt-merge-loop (crdt-union-keys (get a :elements) (get b :elements)) (get a :elements) (get b :elements) {})})) + +(define + crdt-merge-all + (fn + (states) + (if + (= (len states) 0) + (crdt-empty) + (if + (= (len states) 1) + (first states) + (crdt-merge (first states) (crdt-merge-all (rest states))))))) + +;; ── op interpreter ── +(define crdt-op-insert (fn (id type pos fields ts actor) {:ts ts :fields fields :id id :type type :op "insert" :actor actor :pos pos})) + +(define crdt-op-update (fn (id field value ts actor) {:ts ts :field field :id id :op "update" :actor actor :value value})) + +(define crdt-op-delete (fn (id) {:id id :op "delete"})) + +(define + crdt-apply + (fn + (state op) + (let + ((k (get op :op))) + (cond + ((= k "insert") + (crdt-insert + state + (get op :id) + (get op :type) + (get op :pos) + (get op :fields) + (get op :ts) + (get op :actor))) + ((= k "update") + (crdt-update + state + (get op :id) + (get op :field) + (get op :value) + (get op :ts) + (get op :actor))) + ((= k "delete") (crdt-delete state (get op :id))) + (else (error (str "unknown crdt op: " k))))))) + +(define + crdt-apply-all + (fn + (state ops) + (if + (= (len ops) 0) + state + (crdt-apply-all (crdt-apply state (first ops)) (rest ops))))) + +;; ── materialise to a Phase-1 document ── +(define + crdt-elements-list + (fn + (state) + (map + (fn (id) (get (get state :elements) id)) + (keys (get state :elements))))) + +(define + crdt-live? + (fn + (e) + (and + (= (get e :deleted) false) + (if (= (get e :pos) nil) false true) + (if (= (get e :type) nil) false true)))) + +(define + crdt-live-elements + (fn (state) (filter crdt-live? (crdt-elements-list state)))) + +(define + crdt-insert-sorted + (fn + (e sorted) + (cond + ((= (len sorted) 0) (list e)) + ((< (crdt-pos-compare (get e :pos) (get (first sorted) :pos)) 0) + (cons e sorted)) + (else (cons (first sorted) (crdt-insert-sorted e (rest sorted))))))) + +(define + crdt-sort-by-pos + (fn + (elems) + (if + (= (len elems) 0) + (list) + (crdt-insert-sorted (first elems) (crdt-sort-by-pos (rest elems)))))) + +(define + crdt-field-pairs + (fn + (fields) + (map (fn (nm) (list nm (get (get fields nm) :value))) (keys fields)))) + +(define + crdt-element->block + (fn + (e) + (mk-block (get e :type) (get e :id) (crdt-field-pairs (get e :fields))))) + +(define + crdt-order + (fn + (state) + (map + (fn (e) (get e :id)) + (crdt-sort-by-pos (crdt-live-elements state))))) + +(define + crdt-materialize + (fn + (doc-id state) + (doc-new + doc-id + (map crdt-element->block (crdt-sort-by-pos (crdt-live-elements state)))))) diff --git a/lib/content/scoreboard.json b/lib/content/scoreboard.json index 25263cb1..490e10f4 100644 --- a/lib/content/scoreboard.json +++ b/lib/content/scoreboard.json @@ -4,9 +4,10 @@ "doc": {"pass": 40, "fail": 0}, "render": {"pass": 29, "fail": 0}, "api": {"pass": 26, "fail": 0}, - "store": {"pass": 29, "fail": 0} + "store": {"pass": 29, "fail": 0}, + "crdt": {"pass": 34, "fail": 0} }, - "total_pass": 162, + "total_pass": 196, "total_fail": 0, - "total": 162 + "total": 196 } diff --git a/lib/content/scoreboard.md b/lib/content/scoreboard.md index 6a922a4b..fc7fac98 100644 --- a/lib/content/scoreboard.md +++ b/lib/content/scoreboard.md @@ -9,4 +9,5 @@ _Generated by `lib/content/conformance.sh`_ | render | 29 | 0 | 29 | | api | 26 | 0 | 26 | | store | 29 | 0 | 29 | -| **Total** | **162** | **0** | **162** | +| crdt | 34 | 0 | 34 | +| **Total** | **196** | **0** | **196** | diff --git a/lib/content/tests/crdt.sx b/lib/content/tests/crdt.sx new file mode 100644 index 00000000..542554b4 --- /dev/null +++ b/lib/content/tests/crdt.sx @@ -0,0 +1,315 @@ +;; Phase 3 — collaborative merge (CvRDT). The merge is a join: commutative, +;; associative, idempotent. Tests apply ops in any order, twice, and merge +;; replicas both ways — all must converge to identical state. + +(st-bootstrap-classes!) +(content-bootstrap-blocks!) +(content-bootstrap-doc!) +(content-bootstrap-render!) + +(define same? (fn (a b) (= (get a :elements) (get b :elements)))) + +;; ── position order (Logoot) ── +(content-test + "pos lt" + (crdt-pos-compare + (crdt-pos 1 0) + (crdt-pos 2 0)) + -1) +(content-test + "pos gt" + (crdt-pos-compare + (crdt-pos 2 0) + (crdt-pos 1 0)) + 1) +(content-test + "pos eq" + (crdt-pos-compare + (crdt-pos 1 0) + (crdt-pos 1 0)) + 0) +(content-test + "pos actor tiebreak" + (crdt-pos-compare + (crdt-pos 1 1) + (crdt-pos 1 2)) + -1) +(content-test + "between > left" + (< + (crdt-pos-compare + (crdt-pos 1 0) + (crdt-pos-between + (crdt-pos 1 0) + (crdt-pos 2 0) + 9)) + 0) + true) +(content-test + "between < right" + (< + (crdt-pos-compare + (crdt-pos-between + (crdt-pos 1 0) + (crdt-pos 2 0) + 9) + (crdt-pos 2 0)) + 0) + true) +(content-test + "between start < right" + (< + (crdt-pos-compare + (crdt-pos-between nil (crdt-pos 5 0) 9) + (crdt-pos 5 0)) + 0) + true) +(content-test + "between end > left" + (< + (crdt-pos-compare + (crdt-pos 5 0) + (crdt-pos-between (crdt-pos 5 0) nil 9)) + 0) + true) + +;; ── build + materialise ── +(define + base + (crdt-insert + (crdt-insert + (crdt-empty) + "h" + "heading" + (crdt-pos 1 0) + (list (list "level" 1) (list "text" "Title")) + 1 + 0) + "p" + "text" + (crdt-pos 2 0) + (list (list "text" "Body")) + 1 + 0)) + +(content-test "order" (crdt-order base) (list "h" "p")) +(content-test + "materialize ids" + (doc-ids (crdt-materialize "d" base)) + (list "h" "p")) +(content-test + "materialize render" + (asHTML (crdt-materialize "d" base)) + "

Title

Body

") + +;; ── commutativity: ops in any order converge ── +(define + opA + (crdt-op-insert + "x" + "text" + (crdt-pos 3 0) + (list (list "text" "X")) + 2 + 1)) +(define opB (crdt-op-update "p" "text" "Edited" 5 1)) +(define opC (crdt-op-delete "h")) +(define s-abc (crdt-apply-all base (list opA opB opC))) +(define s-cba (crdt-apply-all base (list opC opB opA))) +(define s-bca (crdt-apply-all base (list opB opC opA))) +(content-test "commutative abc=cba" (same? s-abc s-cba) true) +(content-test "commutative abc=bca" (same? s-abc s-bca) true) +(content-test "commutative result order" (crdt-order s-abc) (list "p" "x")) + +;; ── idempotence: applying ops twice changes nothing ── +(content-test + "idempotent ops" + (same? s-abc (crdt-apply-all s-abc (list opA opB opC))) + true) + +;; ── update-before-insert is not lost ── +(define + ub + (crdt-apply-all + (crdt-empty) + (list + (crdt-op-update "z" "text" "late" 3 1) + (crdt-op-insert + "z" + "text" + (crdt-pos 1 0) + (list (list "text" "orig")) + 1 + 1)))) +(content-test + "update before insert kept" + (str (blk-send (doc-find (crdt-materialize "d" ub) "z") "text")) + "late") + +;; ── delete-before-insert: remove-wins ── +(define + db + (crdt-apply-all + (crdt-empty) + (list + (crdt-op-delete "k") + (crdt-op-insert + "k" + "text" + (crdt-pos 1 0) + (list (list "text" "x")) + 1 + 1)))) +(content-test "delete before insert removes" (crdt-order db) (list)) + +;; ── concurrent inserts converge + deterministic order ── +(define + rA + (crdt-insert + base + "a1" + "text" + (crdt-pos 5 1) + (list (list "text" "A")) + 2 + 1)) +(define + rB + (crdt-insert + base + "b1" + "text" + (crdt-pos 5 2) + (list (list "text" "B")) + 2 + 2)) +(content-test + "merge commutes" + (same? (crdt-merge rA rB) (crdt-merge rB rA)) + true) +(content-test + "merge order deterministic AB" + (crdt-order (crdt-merge rA rB)) + (list "h" "p" "a1" "b1")) +(content-test + "merge order deterministic BA" + (crdt-order (crdt-merge rB rA)) + (list "h" "p" "a1" "b1")) + +;; ── merge idempotence ── +(define mAB (crdt-merge rA rB)) +(content-test "merge idempotent self" (same? (crdt-merge mAB mAB) mAB) true) +(content-test + "merge idempotent remerge" + (same? (crdt-merge mAB rA) mAB) + true) + +;; ── concurrent same-field update: LWW by (ts, actor) ── +(define u1 (crdt-update base "p" "text" "v-ts5" 5 1)) +(define u2 (crdt-update base "p" "text" "v-ts7" 7 2)) +(content-test + "LWW higher ts wins" + (str + (blk-send + (doc-find (crdt-materialize "d" (crdt-merge u1 u2)) "p") + "text")) + "v-ts7") +(content-test + "LWW commutes" + (same? (crdt-merge u1 u2) (crdt-merge u2 u1)) + true) +(define t1 (crdt-update base "p" "text" "actor1" 9 1)) +(define t2 (crdt-update base "p" "text" "actor2" 9 2)) +(content-test + "LWW tie -> actor wins" + (str + (blk-send + (doc-find (crdt-materialize "d" (crdt-merge t1 t2)) "p") + "text")) + "actor2") + +;; ── concurrent disjoint-field updates both survive ── +(define f1 (crdt-update base "h" "text" "NewTitle" 5 1)) +(define f2 (crdt-update base "h" "level" 3 5 2)) +(define fm (crdt-merge f1 f2)) +(content-test + "disjoint field text" + (str (blk-send (doc-find (crdt-materialize "d" fm) "h") "text")) + "NewTitle") +(content-test + "disjoint field level" + (blk-send (doc-find (crdt-materialize "d" fm) "h") "level") + 3) +(content-test "disjoint commutes" (same? fm (crdt-merge f2 f1)) true) + +;; ── associativity ── +(define c1 (crdt-update base "p" "text" "c1" 4 1)) +(define + c2 + (crdt-insert + base + "n2" + "text" + (crdt-pos 6 0) + (list (list "text" "N")) + 2 + 2)) +(define c3 (crdt-delete base "h")) +(content-test + "associative" + (same? + (crdt-merge (crdt-merge c1 c2) c3) + (crdt-merge c1 (crdt-merge c2 c3))) + true) +(content-test + "merge-all = fold" + (same? + (crdt-merge-all (list c1 c2 c3)) + (crdt-merge c1 (crdt-merge c2 c3))) + true) + +;; ── full convergence: two replicas, divergent edits, merge both ways ── +(define + repl-1 + (crdt-apply-all + base + (list + (crdt-op-update "p" "text" "from-1" 5 1) + (crdt-op-insert + "img" + "image" + (crdt-pos-between + (crdt-pos 1 0) + (crdt-pos 2 0) + 1) + (list (list "src" "/a.png") (list "alt" "a")) + 6 + 1)))) +(define + repl-2 + (crdt-apply-all + base + (list + (crdt-op-delete "h") + (crdt-op-update "p" "text" "from-2" 7 2)))) +(content-test + "two-replica converges" + (same? (crdt-merge repl-1 repl-2) (crdt-merge repl-2 repl-1)) + true) +(content-test + "two-replica result order" + (crdt-order (crdt-merge repl-1 repl-2)) + (list "img" "p")) +(content-test + "two-replica LWW field" + (str + (blk-send + (doc-find (crdt-materialize "d" (crdt-merge repl-1 repl-2)) "p") + "text")) + "from-2") +(content-test + "two-replica idempotent" + (same? + (crdt-merge (crdt-merge repl-1 repl-2) repl-1) + (crdt-merge repl-1 repl-2)) + true) diff --git a/plans/content-on-sx.md b/plans/content-on-sx.md index 8a9782d8..2d8904cd 100644 --- a/plans/content-on-sx.md +++ b/plans/content-on-sx.md @@ -19,7 +19,7 @@ injected adapter, not core. ## Status (rolling) -`bash lib/content/conformance.sh` → **162/162** (Phase 1 complete + Phase 2: persist op log) +`bash lib/content/conformance.sh` → **196/196** (Phases 1–3 complete: blocks, doc, render, api, persist op log, CRDT merge) ## Ground rules @@ -67,8 +67,8 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─ - [x] `(content/history doc)`, diff between versions ## Phase 3 — Collaborative merge (CRDT) -- [ ] commutative/idempotent op merge -- [ ] concurrent-edit tests (any order, double-apply → identical) +- [x] commutative/idempotent op merge +- [x] concurrent-edit tests (any order, double-apply → identical) ## Phase 4 — External sync + federation - [ ] Ghost/CMS sync via injected adapter (import/export) @@ -77,6 +77,18 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─ ## Progress log +- 2026-06-07 — Phase 3 `crdt.sx` (**Phase 3 complete**): collaborative merge as + a state-based CvRDT. Merge is a join (lub) on a semilattice → commutative, + associative, idempotent by construction. Ordering = unique dense Logoot + position keys (cell = (digit actor), lexicographic); presence = OR-tombstones + (remove-wins); each field = an LWW-Register keyed by logical (ts, actor). Every + op contributes a PARTIAL element and per-id state is their join, so + update-/delete-before-insert are not lost. `crdt-materialize` bridges back to a + Phase-1 `CtDoc` (sort live elements by pos → blocks). Tests prove: ops in any + order converge, double-apply is a no-op, merge commutes/associates/is + idempotent, concurrent inserts order deterministically, same-field LWW by + (ts,actor), disjoint fields both survive, two divergent replicas converge both + ways. 34 tests; suite 196/196. - 2026-06-07 — Phase 2 `store.sx` (**Phase 2 complete**): op log + versioning over the persist event stream. `content/commit!` appends an edit op as a persist event to the doc's stream (`content:`); the log is the source of