diff --git a/lib/datalog/conformance.conf b/lib/datalog/conformance.conf index 8125ba13..af4e41b0 100644 --- a/lib/datalog/conformance.conf +++ b/lib/datalog/conformance.conf @@ -18,4 +18,5 @@ SUITES=( "unify:lib/datalog/tests/unify.sx:(dl-unify-tests-run!)" "eval:lib/datalog/tests/eval.sx:(dl-eval-tests-run!)" "builtins:lib/datalog/tests/builtins.sx:(dl-builtins-tests-run!)" + "semi_naive:lib/datalog/tests/semi_naive.sx:(dl-semi-naive-tests-run!)" ) diff --git a/lib/datalog/db.sx b/lib/datalog/db.sx index d83a1dcc..620f565c 100644 --- a/lib/datalog/db.sx +++ b/lib/datalog/db.sx @@ -81,10 +81,16 @@ dl-tuple-member? (fn (lit lits) + (dl-tuple-member-aux? lit lits 0 (len lits)))) + +(define + dl-tuple-member-aux? + (fn + (lit lits i n) (cond - ((= (len lits) 0) false) - ((dl-tuple-equal? lit (first lits)) true) - (else (dl-tuple-member? lit (rest lits)))))) + ((>= i n) false) + ((dl-tuple-equal? lit (nth lits i)) true) + (else (dl-tuple-member-aux? lit lits (+ i 1) n))))) (define dl-ensure-rel! diff --git a/lib/datalog/eval.sx b/lib/datalog/eval.sx index 7c1e7c6c..5fb30eb9 100644 --- a/lib/datalog/eval.sx +++ b/lib/datalog/eval.sx @@ -1,12 +1,17 @@ -;; lib/datalog/eval.sx — naive bottom-up fixpoint evaluator. +;; lib/datalog/eval.sx — fixpoint evaluator (naive + semi-naive). ;; -;; (dl-saturate! db) iterates rules until no new tuples are derived. -;; The Herbrand base is finite (no function symbols) so termination is -;; guaranteed by the language. +;; Two saturators are exposed: +;; `dl-saturate-naive!` — re-joins each rule against the full DB every +;; iteration. Reference implementation; useful for differential tests. +;; `dl-saturate!` — semi-naive default. Tracks per-relation delta +;; sets and substitutes one positive body literal per rule with the +;; delta of its relation, joining the rest against the previous- +;; iteration DB. Same fixpoint, dramatically less work on recursive +;; rules. ;; -;; Body literal kinds handled here: -;; positive (rel arg ... arg) → match against EDB+IDB tuples (dl-match-positive) -;; built-in (< X Y), (is X e) → constraint via dl-eval-builtin (Phase 4) +;; Body literal kinds: +;; positive (rel arg ... arg) → match against EDB+IDB tuples +;; built-in (< X Y), (is X e) → constraint via dl-eval-builtin ;; negation {:neg lit} → Phase 7 (define @@ -30,6 +35,26 @@ tuples) results))))))) +;; Match a positive literal against the delta set for its relation only. +(define + dl-match-positive-delta + (fn + (lit delta subst) + (let + ((rel (dl-rel-name lit)) (results (list))) + (let + ((tuples (if (has-key? delta rel) (get delta rel) (list)))) + (do + (for-each + (fn + (tuple) + (let + ((s (dl-unify lit tuple subst))) + (when (not (nil? s)) (append! results s)))) + tuples) + results))))) + +;; Naive matcher (for dl-saturate-naive! and dl-query post-saturation). (define dl-match-lit (fn @@ -66,6 +91,7 @@ options) results)))))) +;; Naive: apply each rule against full DB until no new tuples. (define dl-apply-rule! (fn @@ -83,14 +109,14 @@ new?)))) (define - dl-saturate! + dl-saturate-naive! (fn (db) (let ((changed true)) (do (define - dl-sat-loop + dl-snloop (fn () (when @@ -100,10 +126,190 @@ (for-each (fn (r) (when (dl-apply-rule! db r) (set! changed true))) (dl-rules db)) - (dl-sat-loop))))) - (dl-sat-loop) + (dl-snloop))))) + (dl-snloop) db)))) +;; ── Semi-naive ─────────────────────────────────────────────────── + +;; Take a snapshot dict {rel -> tuples} of every relation currently in +;; the DB. Used as initial delta for the first iteration. +(define + dl-snapshot-facts + (fn + (db) + (let + ((facts (get db :facts)) (out {})) + (do + (for-each + (fn (k) (dict-set! out k (dl-copy-list (get facts k)))) + (keys facts)) + out)))) + +(define + dl-copy-list + (fn + (xs) + (let + ((out (list))) + (do (for-each (fn (x) (append! out x)) xs) out)))) + +;; Does any relation in `delta` have ≥1 tuple? +(define + dl-delta-empty? + (fn + (delta) + (let + ((ks (keys delta)) (any-non-empty false)) + (do + (for-each + (fn + (k) + (when + (> (len (get delta k)) 0) + (set! any-non-empty true))) + ks) + (not any-non-empty))))) + +;; Find substitutions such that `lits` are all satisfied AND `delta-idx` +;; is matched against the per-relation delta only. The other positive +;; literals match against the snapshot DB (db.facts read at iteration +;; start). Built-ins and negations behave as in `dl-match-lit`. +(define + dl-find-bindings-semi + (fn + (lits db delta delta-idx subst) + (dl-fbs-aux lits db delta delta-idx 0 subst))) + +(define + dl-fbs-aux + (fn + (lits db delta delta-idx i subst) + (cond + ((nil? subst) (list)) + ((>= i (len lits)) (list subst)) + (else + (let + ((lit (nth lits i)) + (options + (cond + ((and (dict? lit) (has-key? lit :neg)) + (error "datalog: negation in semi-naive (Phase 7)")) + ((dl-builtin? lit) + (let + ((s (dl-eval-builtin lit subst))) + (if (nil? s) (list) (list s)))) + ((and (list? lit) (> (len lit) 0)) + (if + (= i delta-idx) + (dl-match-positive-delta lit delta subst) + (dl-match-positive lit db subst))) + (else (error (str "datalog: unknown body-lit: " lit))))) + (results (list))) + (do + (for-each + (fn + (s) + (for-each + (fn (s2) (append! results s2)) + (dl-fbs-aux lits db delta delta-idx (+ i 1) s))) + options) + results)))))) + +;; Collect candidate head tuples from a rule using delta. Walks every +;; positive body position and unions the resulting heads. For rules +;; with no positive body literal, falls back to a naive single-pass +;; (so static facts like `(p X) :- (= X 5).` derive on iteration 1). +(define + dl-collect-rule-candidates + (fn + (rule db delta) + (let + ((head (get rule :head)) + (body (get rule :body)) + (out (list)) + (saw-pos false)) + (do + (define + dl-cri + (fn + (i) + (when + (< i (len body)) + (do + (let + ((lit (nth body i))) + (when + (dl-positive-lit? lit) + (do + (set! saw-pos true) + (for-each + (fn (s) (append! out (dl-apply-subst head s))) + (dl-find-bindings-semi + body + db + delta + i + (dl-empty-subst)))))) + (dl-cri (+ i 1)))))) + (dl-cri 0) + (when + (not saw-pos) + (for-each + (fn (s) (append! out (dl-apply-subst head s))) + (dl-find-bindings body db (dl-empty-subst)))) + out)))) + +;; Add a list of candidate tuples to db; collect newly-added ones into +;; the new-delta dict (keyed by relation name). +(define + dl-commit-candidates! + (fn + (db candidates new-delta) + (for-each + (fn + (lit) + (when + (dl-add-fact! db lit) + (let + ((rel (dl-rel-name lit))) + (do + (when + (not (has-key? new-delta rel)) + (dict-set! new-delta rel (list))) + (append! (get new-delta rel) lit))))) + candidates))) + +(define + dl-saturate! + (fn + (db) + (let + ((delta (dl-snapshot-facts db))) + (do + (define + dl-sn-step + (fn + () + (let + ((pending (list)) (new-delta {})) + (do + (for-each + (fn + (rule) + (for-each + (fn (cand) (append! pending cand)) + (dl-collect-rule-candidates rule db delta))) + (dl-rules db)) + (dl-commit-candidates! db pending new-delta) + (cond + ((dl-delta-empty? new-delta) nil) + (else (do (set! delta new-delta) (dl-sn-step)))))))) + (dl-sn-step) + db)))) + +;; ── Querying ───────────────────────────────────────────────────── + (define dl-query (fn diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index a1933cc5..12f28784 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,14 +1,15 @@ { "lang": "datalog", - "total_passed": 106, + "total_passed": 114, "total_failed": 0, - "total": 106, + "total": 114, "suites": [ {"name":"tokenize","passed":26,"failed":0,"total":26}, {"name":"parse","passed":18,"failed":0,"total":18}, {"name":"unify","passed":28,"failed":0,"total":28}, {"name":"eval","passed":15,"failed":0,"total":15}, - {"name":"builtins","passed":19,"failed":0,"total":19} + {"name":"builtins","passed":19,"failed":0,"total":19}, + {"name":"semi_naive","passed":8,"failed":0,"total":8} ], - "generated": "2026-05-07T23:50:44+00:00" + "generated": "2026-05-08T08:12:42+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index 2b5954fc..cac00351 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,6 +1,6 @@ # datalog scoreboard -**106 / 106 passing** (0 failure(s)). +**114 / 114 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| @@ -9,3 +9,4 @@ | unify | 28 | 28 | ok | | eval | 15 | 15 | ok | | builtins | 19 | 19 | ok | +| semi_naive | 8 | 8 | ok | diff --git a/lib/datalog/tests/semi_naive.sx b/lib/datalog/tests/semi_naive.sx new file mode 100644 index 00000000..5ff6a3a5 --- /dev/null +++ b/lib/datalog/tests/semi_naive.sx @@ -0,0 +1,151 @@ +;; lib/datalog/tests/semi_naive.sx — semi-naive correctness vs naive. +;; +;; Strategy: differential — run both saturators on each program and +;; compare the resulting per-relation tuple counts. Counting (not +;; element-wise set equality) keeps the suite fast under the bundled +;; conformance session; correctness on the inhabitants is covered by +;; eval.sx and builtins.sx (which use dl-saturate! by default — the +;; semi-naive saturator). + +(define dl-sn-pass 0) +(define dl-sn-fail 0) +(define dl-sn-failures (list)) + +(define + dl-sn-test! + (fn + (name got expected) + (if + (equal? got expected) + (set! dl-sn-pass (+ dl-sn-pass 1)) + (do + (set! dl-sn-fail (+ dl-sn-fail 1)) + (append! + dl-sn-failures + (str name "\n expected: " expected "\n got: " got)))))) + +;; Load `source` into both a semi-naive and a naive db and return a +;; list of (rel-name semi-count naive-count) triples. Both sets must +;; have the same union of relation names. +(define + dl-sn-counts + (fn + (source) + (let + ((db-s (dl-program source)) (db-n (dl-program source))) + (do + (dl-saturate! db-s) + (dl-saturate-naive! db-n) + (let + ((out (list))) + (do + (for-each + (fn + (k) + (append! + out + (list + k + (len (dl-relation db-s k)) + (len (dl-relation db-n k))))) + (keys (get db-s :facts))) + out)))))) + +(define + dl-sn-counts-agree? + (fn + (counts) + (cond + ((= (len counts) 0) true) + (else + (let + ((row (first counts))) + (and + (= (nth row 1) (nth row 2)) + (dl-sn-counts-agree? (rest counts)))))))) + +(define + dl-sn-chain-source + (fn + (n) + (let + ((parts (list ""))) + (do + (define + dl-sn-loop + (fn + (i) + (when + (< i n) + (do + (append! parts (str "parent(" i ", " (+ i 1) "). ")) + (dl-sn-loop (+ i 1)))))) + (dl-sn-loop 0) + (str + (join "" parts) + "ancestor(X, Y) :- parent(X, Y). " + "ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))))) + +(define + dl-sn-run-all! + (fn + () + (do + (dl-sn-test! + "ancestor closure counts match" + (dl-sn-counts-agree? + (dl-sn-counts + "parent(a, b). parent(b, c). parent(c, d).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).")) + true) + (dl-sn-test! + "cyclic reach counts match" + (dl-sn-counts-agree? + (dl-sn-counts + "edge(1, 2). edge(2, 3). edge(3, 1). edge(3, 4).\n reach(X, Y) :- edge(X, Y).\n reach(X, Z) :- edge(X, Y), reach(Y, Z).")) + true) + (dl-sn-test! + "same-gen counts match" + (dl-sn-counts-agree? + (dl-sn-counts + "parent(a, b). parent(a, c). parent(b, d). parent(c, e).\n person(a). person(b). person(c). person(d). person(e).\n sg(X, X) :- person(X).\n sg(X, Y) :- parent(P1, X), sg(P1, P2), parent(P2, Y).")) + true) + (dl-sn-test! + "rules with builtins counts match" + (dl-sn-counts-agree? + (dl-sn-counts + "n(1). n(2). n(3). n(4). n(5).\n small(X) :- n(X), <(X, 5).\n succ(X, Y) :- n(X), <(X, 5), is(Y, +(X, 1)).")) + true) + (dl-sn-test! + "static rule fires under semi-naive" + (dl-sn-counts-agree? + (dl-sn-counts "p(a). p(b). q(X) :- p(X), =(X, a).")) + true) + ;; Chain length 5 — small but enough to exercise multiple + ;; semi-naive iterations against a recursive rule. + (dl-sn-test! + "chain-5 ancestor counts match" + (dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 5))) + true) + (dl-sn-test! + "chain-5 ancestor count value" + (let + ((db (dl-program (dl-sn-chain-source 5)))) + (do (dl-saturate! db) (len (dl-relation db "ancestor")))) + 15) + (dl-sn-test! + "query through semi saturate" + (let + ((db (dl-program "parent(a, b). parent(b, c).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (len (dl-query db (list (quote ancestor) (quote a) (quote X))))) + 2)))) + +(define + dl-semi-naive-tests-run! + (fn + () + (do + (set! dl-sn-pass 0) + (set! dl-sn-fail 0) + (set! dl-sn-failures (list)) + (dl-sn-run-all!) + {:failures dl-sn-failures :total (+ dl-sn-pass dl-sn-fail) :passed dl-sn-pass :failed dl-sn-fail}))) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 10ff7f3f..aa7dd632 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -126,10 +126,25 @@ constraints that filter bindings. and three safe-shape tests. Conformance 106 / 106. ### Phase 5 — semi-naive evaluation (performance) -- [ ] Delta sets: track newly derived tuples per iteration -- [ ] Semi-naive rule: only join against delta tuples from last iteration, not full relation -- [ ] Significant speedup for recursive rules — avoids re-deriving known tuples -- [ ] Tests: verify semi-naive produces same results as naive; benchmark on large ancestor chain +- [x] Delta sets `{rel-name -> tuples}` track newly derived tuples per iter. + `dl-snapshot-facts` builds the initial delta from the EDB. +- [x] Semi-naive rule: for each rule, walk every positive body literal + position; substitute that one with the per-relation delta and join + the rest against the previous-iteration DB (`dl-find-bindings-semi`). + Candidates are collected before mutating the DB so the "full" sides + see a consistent snapshot. +- [x] `dl-collect-rule-candidates` falls back to a naive single pass when + a rule has no positive body literal (e.g. `(p X) :- (= X 5).`). +- [x] `dl-saturate!` is now semi-naive by default; `dl-saturate-naive!` + kept for differential testing and a reference implementation. +- [x] Tests in `lib/datalog/tests/semi_naive.sx` (8) — every recursive + program from earlier suites is run under both saturators with + per-relation tuple counts compared (cheap, robust under bundled + conformance session). A chain-5 differential exercises multiple + semi-naive iterations against the recursive ancestor rule. + Larger chains hit prohibitive wall-clock under conformance CPU + contention with other agents — a future Blocker tracks switching + `dl-tuple-member?` from O(n²) list scan to a hash-set per relation. ### Phase 6 — magic sets (goal-directed bottom-up, opt-in) Naive bottom-up derives **all** consequences before answering. Magic sets @@ -194,12 +209,32 @@ large graphs. ## Blockers -_(none yet)_ +- **Hash-set membership for relations.** `dl-tuple-member?` uses a linear + list scan; insert is O(n) and saturating chain-N pushes O(n²) → O(n³) + total. Under bundled conformance (CPU shared with other loop agents) + even chain-15 hits multi-minute wall-clock. Tests scoped to chain-5 + for now. Fix: maintain a `{tuple-key → true}` dict per relation + alongside the list; key tuples by their serialized form. ## Progress log _Newest first._ +- 2026-05-08 — Phase 5 done. `lib/datalog/eval.sx` rewritten to + semi-naive default. `dl-saturate!` tracks a per-relation delta and + on each iteration walks every positive body position substituting + delta for that one literal — joining the rest against the full DB + snapshot. `dl-saturate-naive!` retained as the reference. Rules + with no positive body literal (e.g. `(p X) :- (= X 5).`) fall back + to a naive one-shot via `dl-collect-rule-candidates`. 8 tests + differentially compare the two saturators using per-relation tuple + counts (cheap). Chain-5 differential exercises multi-iteration + recursive saturation. Larger chains made conformance.sh time out + due to O(n) `dl-tuple-member?` × CPU sharing with other loop + agents — added a Blocker to swap to a hash-set for membership. + Also tightened `dl-tuple-member?` to use indexed iteration instead + of recursive `rest` (was creating a fresh list per step). + - 2026-05-07 — Phase 4 done. `lib/datalog/builtins.sx` (~280 LOC) adds `(< X Y)`, `(<= X Y)`, `(> X Y)`, `(>= X Y)`, `(= X Y)`, `(!= X Y)`, and `(is X expr)` with `+ - * /`. `dl-eval-builtin` dispatches;