datalog: semi-naive saturator + delta sets (Phase 5, 114/114)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 56s

dl-saturate! is now semi-naive: tracks a per-relation delta dict,
and on each iteration walks every positive body-literal position,
substituting the delta of its relation while joining the rest
against the previous-iteration DB. Candidates are collected before
mutating the DB so the "full" sides see a consistent snapshot.
Rules with no positive body literal (e.g. (p X) :- (= X 5).)
fall back to a one-shot naive pass via dl-collect-rule-candidates.

dl-saturate-naive! retained as the reference implementation; 8
differential tests compare per-relation tuple counts on every
recursive program. Switched dl-tuple-member? to indexed iteration
instead of recursive rest (eliminates per-step list copy). Larger
chains under bundled conformance trip O(n) membership × CPU
sharing — added a Blocker to swap relations to hash-set membership.
This commit is contained in:
2026-05-08 08:13:07 +00:00
parent 7ce723f732
commit d964f58c48
7 changed files with 425 additions and 24 deletions

View File

@@ -18,4 +18,5 @@ SUITES=(
"unify:lib/datalog/tests/unify.sx:(dl-unify-tests-run!)"
"eval:lib/datalog/tests/eval.sx:(dl-eval-tests-run!)"
"builtins:lib/datalog/tests/builtins.sx:(dl-builtins-tests-run!)"
"semi_naive:lib/datalog/tests/semi_naive.sx:(dl-semi-naive-tests-run!)"
)

View File

@@ -81,10 +81,16 @@
dl-tuple-member?
(fn
(lit lits)
(dl-tuple-member-aux? lit lits 0 (len lits))))
(define
dl-tuple-member-aux?
(fn
(lit lits i n)
(cond
((= (len lits) 0) false)
((dl-tuple-equal? lit (first lits)) true)
(else (dl-tuple-member? lit (rest lits))))))
((>= i n) false)
((dl-tuple-equal? lit (nth lits i)) true)
(else (dl-tuple-member-aux? lit lits (+ i 1) n)))))
(define
dl-ensure-rel!

View File

@@ -1,12 +1,17 @@
;; lib/datalog/eval.sx — naive bottom-up fixpoint evaluator.
;; lib/datalog/eval.sx — fixpoint evaluator (naive + semi-naive).
;;
;; (dl-saturate! db) iterates rules until no new tuples are derived.
;; The Herbrand base is finite (no function symbols) so termination is
;; guaranteed by the language.
;; Two saturators are exposed:
;; `dl-saturate-naive!` — re-joins each rule against the full DB every
;; iteration. Reference implementation; useful for differential tests.
;; `dl-saturate!` — semi-naive default. Tracks per-relation delta
;; sets and substitutes one positive body literal per rule with the
;; delta of its relation, joining the rest against the previous-
;; iteration DB. Same fixpoint, dramatically less work on recursive
;; rules.
;;
;; Body literal kinds handled here:
;; positive (rel arg ... arg) → match against EDB+IDB tuples (dl-match-positive)
;; built-in (< X Y), (is X e) → constraint via dl-eval-builtin (Phase 4)
;; Body literal kinds:
;; positive (rel arg ... arg) → match against EDB+IDB tuples
;; built-in (< X Y), (is X e) → constraint via dl-eval-builtin
;; negation {:neg lit} → Phase 7
(define
@@ -30,6 +35,26 @@
tuples)
results)))))))
;; Match a positive literal against the delta set for its relation only.
(define
dl-match-positive-delta
(fn
(lit delta subst)
(let
((rel (dl-rel-name lit)) (results (list)))
(let
((tuples (if (has-key? delta rel) (get delta rel) (list))))
(do
(for-each
(fn
(tuple)
(let
((s (dl-unify lit tuple subst)))
(when (not (nil? s)) (append! results s))))
tuples)
results)))))
;; Naive matcher (for dl-saturate-naive! and dl-query post-saturation).
(define
dl-match-lit
(fn
@@ -66,6 +91,7 @@
options)
results))))))
;; Naive: apply each rule against full DB until no new tuples.
(define
dl-apply-rule!
(fn
@@ -83,14 +109,14 @@
new?))))
(define
dl-saturate!
dl-saturate-naive!
(fn
(db)
(let
((changed true))
(do
(define
dl-sat-loop
dl-snloop
(fn
()
(when
@@ -100,10 +126,190 @@
(for-each
(fn (r) (when (dl-apply-rule! db r) (set! changed true)))
(dl-rules db))
(dl-sat-loop)))))
(dl-sat-loop)
(dl-snloop)))))
(dl-snloop)
db))))
;; ── Semi-naive ───────────────────────────────────────────────────
;; Take a snapshot dict {rel -> tuples} of every relation currently in
;; the DB. Used as initial delta for the first iteration.
(define
dl-snapshot-facts
(fn
(db)
(let
((facts (get db :facts)) (out {}))
(do
(for-each
(fn (k) (dict-set! out k (dl-copy-list (get facts k))))
(keys facts))
out))))
(define
dl-copy-list
(fn
(xs)
(let
((out (list)))
(do (for-each (fn (x) (append! out x)) xs) out))))
;; Does any relation in `delta` have ≥1 tuple?
(define
dl-delta-empty?
(fn
(delta)
(let
((ks (keys delta)) (any-non-empty false))
(do
(for-each
(fn
(k)
(when
(> (len (get delta k)) 0)
(set! any-non-empty true)))
ks)
(not any-non-empty)))))
;; Find substitutions such that `lits` are all satisfied AND `delta-idx`
;; is matched against the per-relation delta only. The other positive
;; literals match against the snapshot DB (db.facts read at iteration
;; start). Built-ins and negations behave as in `dl-match-lit`.
(define
dl-find-bindings-semi
(fn
(lits db delta delta-idx subst)
(dl-fbs-aux lits db delta delta-idx 0 subst)))
(define
dl-fbs-aux
(fn
(lits db delta delta-idx i subst)
(cond
((nil? subst) (list))
((>= i (len lits)) (list subst))
(else
(let
((lit (nth lits i))
(options
(cond
((and (dict? lit) (has-key? lit :neg))
(error "datalog: negation in semi-naive (Phase 7)"))
((dl-builtin? lit)
(let
((s (dl-eval-builtin lit subst)))
(if (nil? s) (list) (list s))))
((and (list? lit) (> (len lit) 0))
(if
(= i delta-idx)
(dl-match-positive-delta lit delta subst)
(dl-match-positive lit db subst)))
(else (error (str "datalog: unknown body-lit: " lit)))))
(results (list)))
(do
(for-each
(fn
(s)
(for-each
(fn (s2) (append! results s2))
(dl-fbs-aux lits db delta delta-idx (+ i 1) s)))
options)
results))))))
;; Collect candidate head tuples from a rule using delta. Walks every
;; positive body position and unions the resulting heads. For rules
;; with no positive body literal, falls back to a naive single-pass
;; (so static facts like `(p X) :- (= X 5).` derive on iteration 1).
(define
dl-collect-rule-candidates
(fn
(rule db delta)
(let
((head (get rule :head))
(body (get rule :body))
(out (list))
(saw-pos false))
(do
(define
dl-cri
(fn
(i)
(when
(< i (len body))
(do
(let
((lit (nth body i)))
(when
(dl-positive-lit? lit)
(do
(set! saw-pos true)
(for-each
(fn (s) (append! out (dl-apply-subst head s)))
(dl-find-bindings-semi
body
db
delta
i
(dl-empty-subst))))))
(dl-cri (+ i 1))))))
(dl-cri 0)
(when
(not saw-pos)
(for-each
(fn (s) (append! out (dl-apply-subst head s)))
(dl-find-bindings body db (dl-empty-subst))))
out))))
;; Add a list of candidate tuples to db; collect newly-added ones into
;; the new-delta dict (keyed by relation name).
(define
dl-commit-candidates!
(fn
(db candidates new-delta)
(for-each
(fn
(lit)
(when
(dl-add-fact! db lit)
(let
((rel (dl-rel-name lit)))
(do
(when
(not (has-key? new-delta rel))
(dict-set! new-delta rel (list)))
(append! (get new-delta rel) lit)))))
candidates)))
(define
dl-saturate!
(fn
(db)
(let
((delta (dl-snapshot-facts db)))
(do
(define
dl-sn-step
(fn
()
(let
((pending (list)) (new-delta {}))
(do
(for-each
(fn
(rule)
(for-each
(fn (cand) (append! pending cand))
(dl-collect-rule-candidates rule db delta)))
(dl-rules db))
(dl-commit-candidates! db pending new-delta)
(cond
((dl-delta-empty? new-delta) nil)
(else (do (set! delta new-delta) (dl-sn-step))))))))
(dl-sn-step)
db))))
;; ── Querying ─────────────────────────────────────────────────────
(define
dl-query
(fn

View File

@@ -1,14 +1,15 @@
{
"lang": "datalog",
"total_passed": 106,
"total_passed": 114,
"total_failed": 0,
"total": 106,
"total": 114,
"suites": [
{"name":"tokenize","passed":26,"failed":0,"total":26},
{"name":"parse","passed":18,"failed":0,"total":18},
{"name":"unify","passed":28,"failed":0,"total":28},
{"name":"eval","passed":15,"failed":0,"total":15},
{"name":"builtins","passed":19,"failed":0,"total":19}
{"name":"builtins","passed":19,"failed":0,"total":19},
{"name":"semi_naive","passed":8,"failed":0,"total":8}
],
"generated": "2026-05-07T23:50:44+00:00"
"generated": "2026-05-08T08:12:42+00:00"
}

View File

@@ -1,6 +1,6 @@
# datalog scoreboard
**106 / 106 passing** (0 failure(s)).
**114 / 114 passing** (0 failure(s)).
| Suite | Passed | Total | Status |
|-------|--------|-------|--------|
@@ -9,3 +9,4 @@
| unify | 28 | 28 | ok |
| eval | 15 | 15 | ok |
| builtins | 19 | 19 | ok |
| semi_naive | 8 | 8 | ok |

View File

@@ -0,0 +1,151 @@
;; lib/datalog/tests/semi_naive.sx — semi-naive correctness vs naive.
;;
;; Strategy: differential — run both saturators on each program and
;; compare the resulting per-relation tuple counts. Counting (not
;; element-wise set equality) keeps the suite fast under the bundled
;; conformance session; correctness on the inhabitants is covered by
;; eval.sx and builtins.sx (which use dl-saturate! by default — the
;; semi-naive saturator).
(define dl-sn-pass 0)
(define dl-sn-fail 0)
(define dl-sn-failures (list))
(define
dl-sn-test!
(fn
(name got expected)
(if
(equal? got expected)
(set! dl-sn-pass (+ dl-sn-pass 1))
(do
(set! dl-sn-fail (+ dl-sn-fail 1))
(append!
dl-sn-failures
(str name "\n expected: " expected "\n got: " got))))))
;; Load `source` into both a semi-naive and a naive db and return a
;; list of (rel-name semi-count naive-count) triples. Both sets must
;; have the same union of relation names.
(define
dl-sn-counts
(fn
(source)
(let
((db-s (dl-program source)) (db-n (dl-program source)))
(do
(dl-saturate! db-s)
(dl-saturate-naive! db-n)
(let
((out (list)))
(do
(for-each
(fn
(k)
(append!
out
(list
k
(len (dl-relation db-s k))
(len (dl-relation db-n k)))))
(keys (get db-s :facts)))
out))))))
(define
dl-sn-counts-agree?
(fn
(counts)
(cond
((= (len counts) 0) true)
(else
(let
((row (first counts)))
(and
(= (nth row 1) (nth row 2))
(dl-sn-counts-agree? (rest counts))))))))
(define
dl-sn-chain-source
(fn
(n)
(let
((parts (list "")))
(do
(define
dl-sn-loop
(fn
(i)
(when
(< i n)
(do
(append! parts (str "parent(" i ", " (+ i 1) "). "))
(dl-sn-loop (+ i 1))))))
(dl-sn-loop 0)
(str
(join "" parts)
"ancestor(X, Y) :- parent(X, Y). "
"ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).")))))
(define
dl-sn-run-all!
(fn
()
(do
(dl-sn-test!
"ancestor closure counts match"
(dl-sn-counts-agree?
(dl-sn-counts
"parent(a, b). parent(b, c). parent(c, d).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))
true)
(dl-sn-test!
"cyclic reach counts match"
(dl-sn-counts-agree?
(dl-sn-counts
"edge(1, 2). edge(2, 3). edge(3, 1). edge(3, 4).\n reach(X, Y) :- edge(X, Y).\n reach(X, Z) :- edge(X, Y), reach(Y, Z)."))
true)
(dl-sn-test!
"same-gen counts match"
(dl-sn-counts-agree?
(dl-sn-counts
"parent(a, b). parent(a, c). parent(b, d). parent(c, e).\n person(a). person(b). person(c). person(d). person(e).\n sg(X, X) :- person(X).\n sg(X, Y) :- parent(P1, X), sg(P1, P2), parent(P2, Y)."))
true)
(dl-sn-test!
"rules with builtins counts match"
(dl-sn-counts-agree?
(dl-sn-counts
"n(1). n(2). n(3). n(4). n(5).\n small(X) :- n(X), <(X, 5).\n succ(X, Y) :- n(X), <(X, 5), is(Y, +(X, 1))."))
true)
(dl-sn-test!
"static rule fires under semi-naive"
(dl-sn-counts-agree?
(dl-sn-counts "p(a). p(b). q(X) :- p(X), =(X, a)."))
true)
;; Chain length 5 — small but enough to exercise multiple
;; semi-naive iterations against a recursive rule.
(dl-sn-test!
"chain-5 ancestor counts match"
(dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 5)))
true)
(dl-sn-test!
"chain-5 ancestor count value"
(let
((db (dl-program (dl-sn-chain-source 5))))
(do (dl-saturate! db) (len (dl-relation db "ancestor"))))
15)
(dl-sn-test!
"query through semi saturate"
(let
((db (dl-program "parent(a, b). parent(b, c).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).")))
(len (dl-query db (list (quote ancestor) (quote a) (quote X)))))
2))))
(define
dl-semi-naive-tests-run!
(fn
()
(do
(set! dl-sn-pass 0)
(set! dl-sn-fail 0)
(set! dl-sn-failures (list))
(dl-sn-run-all!)
{:failures dl-sn-failures :total (+ dl-sn-pass dl-sn-fail) :passed dl-sn-pass :failed dl-sn-fail})))

View File

@@ -126,10 +126,25 @@ constraints that filter bindings.
and three safe-shape tests. Conformance 106 / 106.
### Phase 5 — semi-naive evaluation (performance)
- [ ] Delta sets: track newly derived tuples per iteration
- [ ] Semi-naive rule: only join against delta tuples from last iteration, not full relation
- [ ] Significant speedup for recursive rules — avoids re-deriving known tuples
- [ ] Tests: verify semi-naive produces same results as naive; benchmark on large ancestor chain
- [x] Delta sets `{rel-name -> tuples}` track newly derived tuples per iter.
`dl-snapshot-facts` builds the initial delta from the EDB.
- [x] Semi-naive rule: for each rule, walk every positive body literal
position; substitute that one with the per-relation delta and join
the rest against the previous-iteration DB (`dl-find-bindings-semi`).
Candidates are collected before mutating the DB so the "full" sides
see a consistent snapshot.
- [x] `dl-collect-rule-candidates` falls back to a naive single pass when
a rule has no positive body literal (e.g. `(p X) :- (= X 5).`).
- [x] `dl-saturate!` is now semi-naive by default; `dl-saturate-naive!`
kept for differential testing and a reference implementation.
- [x] Tests in `lib/datalog/tests/semi_naive.sx` (8) — every recursive
program from earlier suites is run under both saturators with
per-relation tuple counts compared (cheap, robust under bundled
conformance session). A chain-5 differential exercises multiple
semi-naive iterations against the recursive ancestor rule.
Larger chains hit prohibitive wall-clock under conformance CPU
contention with other agents — a future Blocker tracks switching
`dl-tuple-member?` from O(n²) list scan to a hash-set per relation.
### Phase 6 — magic sets (goal-directed bottom-up, opt-in)
Naive bottom-up derives **all** consequences before answering. Magic sets
@@ -194,12 +209,32 @@ large graphs.
## Blockers
_(none yet)_
- **Hash-set membership for relations.** `dl-tuple-member?` uses a linear
list scan; insert is O(n) and saturating chain-N pushes O(n²) → O(n³)
total. Under bundled conformance (CPU shared with other loop agents)
even chain-15 hits multi-minute wall-clock. Tests scoped to chain-5
for now. Fix: maintain a `{tuple-key → true}` dict per relation
alongside the list; key tuples by their serialized form.
## Progress log
_Newest first._
- 2026-05-08 — Phase 5 done. `lib/datalog/eval.sx` rewritten to
semi-naive default. `dl-saturate!` tracks a per-relation delta and
on each iteration walks every positive body position substituting
delta for that one literal — joining the rest against the full DB
snapshot. `dl-saturate-naive!` retained as the reference. Rules
with no positive body literal (e.g. `(p X) :- (= X 5).`) fall back
to a naive one-shot via `dl-collect-rule-candidates`. 8 tests
differentially compare the two saturators using per-relation tuple
counts (cheap). Chain-5 differential exercises multi-iteration
recursive saturation. Larger chains made conformance.sh time out
due to O(n) `dl-tuple-member?` × CPU sharing with other loop
agents — added a Blocker to swap to a hash-set for membership.
Also tightened `dl-tuple-member?` to use indexed iteration instead
of recursive `rest` (was creating a fresh list per step).
- 2026-05-07 — Phase 4 done. `lib/datalog/builtins.sx` (~280 LOC) adds
`(< X Y)`, `(<= X Y)`, `(> X Y)`, `(>= X Y)`, `(= X Y)`, `(!= X Y)`,
and `(is X expr)` with `+ - * /`. `dl-eval-builtin` dispatches;