From 790c17dfc1d7f0cc1e91637fdd60f5756dad90d4 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:50:24 +0000 Subject: [PATCH] datalog: indexed dl-find-bindings + chain-15 differential (Phase 5c, 153/153) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit dl-find-bindings now uses dl-fb-aux lits db subst i n (indexed iteration via nth) instead of recursive (rest lits). Eliminates O(N²) list-copy per body of length N. chain-15 saturation 25s → 16s; chain-25 finishes in 33s real (vs. timeout previously). Bumped semi_naive tests to chain-10 differential + chain-15 semi-only count (was chain-5/chain-5). Blocker entry refreshed. --- lib/datalog/eval.sx | 12 ++++++++---- lib/datalog/scoreboard.json | 2 +- lib/datalog/tests/semi_naive.sx | 14 +++++++------- plans/datalog-on-sx.md | 24 ++++++++++++++++-------- 4 files changed, 32 insertions(+), 20 deletions(-) diff --git a/lib/datalog/eval.sx b/lib/datalog/eval.sx index 93902349..acd74e5e 100644 --- a/lib/datalog/eval.sx +++ b/lib/datalog/eval.sx @@ -83,14 +83,18 @@ (define dl-find-bindings + (fn (lits db subst) (dl-fb-aux lits db subst 0 (len lits)))) + +(define + dl-fb-aux (fn - (lits db subst) + (lits db subst i n) (cond ((nil? subst) (list)) - ((= (len lits) 0) (list subst)) + ((>= i n) (list subst)) (else (let - ((options (dl-match-lit (first lits) db subst)) + ((options (dl-match-lit (nth lits i) db subst)) (results (list))) (do (for-each @@ -98,7 +102,7 @@ (s) (for-each (fn (s2) (append! results s2)) - (dl-find-bindings (rest lits) db s))) + (dl-fb-aux lits db s (+ i 1) n))) options) results)))))) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index c6eba8b4..e30419f8 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -15,5 +15,5 @@ {"name":"api","passed":9,"failed":0,"total":9}, {"name":"demo","passed":10,"failed":0,"total":10} ], - "generated": "2026-05-08T08:45:37+00:00" + "generated": "2026-05-08T08:49:52+00:00" } diff --git a/lib/datalog/tests/semi_naive.sx b/lib/datalog/tests/semi_naive.sx index 5ff6a3a5..914dcb2b 100644 --- a/lib/datalog/tests/semi_naive.sx +++ b/lib/datalog/tests/semi_naive.sx @@ -120,18 +120,18 @@ (dl-sn-counts-agree? (dl-sn-counts "p(a). p(b). q(X) :- p(X), =(X, a).")) true) - ;; Chain length 5 — small but enough to exercise multiple - ;; semi-naive iterations against a recursive rule. + ;; Chain length 10 — exercises multiple semi-naive iterations + ;; against the recursive ancestor rule. (dl-sn-test! - "chain-5 ancestor counts match" - (dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 5))) + "chain-10 ancestor counts match" + (dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 10))) true) (dl-sn-test! - "chain-5 ancestor count value" + "chain-15 ancestor count value (semi only)" (let - ((db (dl-program (dl-sn-chain-source 5)))) + ((db (dl-program (dl-sn-chain-source 15)))) (do (dl-saturate! db) (len (dl-relation db "ancestor")))) - 15) + 120) (dl-sn-test! "query through semi saturate" (let diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 57edf541..d24293a3 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -256,19 +256,27 @@ large graphs. ## Blockers -- **Saturation perf on long chains.** Resolved one bottleneck (hash-set - membership in `dl-add-fact!`) but `dl-saturate!` still spends - significant time per iteration on rule body joins — chain-15 takes - ~25s real / 3s user under contention even after the membership fix. - Two follow-ups to consider: (a) avoid `(rest lits)` in - `dl-find-bindings`/`dl-fbs-aux` (uses indexed iteration like the - membership fix), (b) memoize the per-rule body shape so `(len lits)` - and accessor calls don't re-walk the list each step. +- **Saturation perf** improving but not free. Resolved hash-set + membership in `dl-add-fact!` and replaced recursive `(rest lits)` in + `dl-find-bindings` with indexed iteration. chain-15 drops from ~25s + to ~16s and chain-25 saturates in ~33s real / 11s user — most CPU + now in unification (assoc-based subst dict copies) and dict + lookups during walks. Future: a per-rule "compiled" body that + pre-resolves arg positions and intern variable indices, then + unification can use array slots instead of dict assoc. ## Progress log _Newest first._ +- 2026-05-08 — Phase 5c perf: indexed `dl-find-bindings`. Replaced + the recursive `(rest lits)` walk with `dl-fb-aux lits db subst i n` + using `nth lits i`. Eliminates O(N²) list-copy per body of length + N. chain-15 saturation 25s → 16s; chain-25 finishes in 33s real + (vs. timeout previously). Bumped semi_naive tests: differential + on chain-10, semi-only count on chain-15 (was chain-5/chain-5). + 153/153. + - 2026-05-08 — Phase 10 syntactic demo. New `lib/datalog/demo.sx` with three programs over rose-ash-shaped data: federation (`mutual`, `reachable`, `foaf`), content recommendation