From 82dfa20e827b937b4f89e5a412cd8418d40eafe9 Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 08:59:28 +0000 Subject: [PATCH] datalog: dl-magic-query pre-saturates for aggregate correctness dl-magic-query could silently diverge from dl-query when an aggregate's inner-goal relation was IDB. The rewriter passes aggregate body lits through unchanged (no magic propagation generated for them), so the inner relation was empty in the magic db and the aggregate returned 0. Repro: (dl-eval-magic "u(a). u(b). u(c). u(d). banned(b). banned(d). active(X) :- u(X), not(banned(X)). n(N) :- count(N, X, active(X))." "?- n(N).") => ({:N 0}) ; should be ({:N 2}) dl-magic-query now pre-saturates the source db before copying facts into the magic db. This guarantees equivalence with dl-query for every stratified program; the magic benefit still comes from goal-directed re-derivation of the query relation under the seed (which matters for large recursive joins). The existing test cases happened to dodge this because their aggregate inner-goals were all EDB. 1 new regression test; conformance 274/274. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/datalog/magic.sx | 64 ++++++++++++++++++++++--------------- lib/datalog/scoreboard.json | 8 ++--- lib/datalog/scoreboard.md | 4 +-- lib/datalog/tests/magic.sx | 23 +++++++++++++ plans/datalog-on-sx.md | 19 ++++++++++- 5 files changed, 85 insertions(+), 33 deletions(-) diff --git a/lib/datalog/magic.sx b/lib/datalog/magic.sx index ce8b20a8..43ab7404 100644 --- a/lib/datalog/magic.sx +++ b/lib/datalog/magic.sx @@ -392,31 +392,43 @@ (and (dict? query-goal) (has-key? query-goal :neg))) (dl-query db query-goal)) (else - (let - ((query-rel (dl-rel-name query-goal)) - (query-adn (dl-adorn-goal query-goal))) + (do + ;; Pre-saturate the source db. The magic rewriter passes + ;; aggregate body lits through unchanged (no magic + ;; propagation generated for them) — so if an aggregate's + ;; inner-goal relation is IDB, it would be empty in the + ;; magic db unless its tuples are already materialised in + ;; the source. Same for negation against IDB. Pre-saturating + ;; guarantees `dl-magic-query` returns the same answers as + ;; `dl-query` for every stratified program; the efficiency + ;; benefit then comes from goal-directed re-derivation of + ;; the query relation under the magic seed. + (dl-saturate! db) (let - ((query-args (dl-bound-args query-goal query-adn)) - (rules (dl-rules db))) + ((query-rel (dl-rel-name query-goal)) + (query-adn (dl-adorn-goal query-goal))) (let - ((rewritten (dl-magic-rewrite rules query-rel query-adn query-args)) - (mdb (dl-make-db)) - (rule-heads (dl-magic-rule-heads rules))) - (do - ;; Copy ALL existing facts. EDB-only relations bring their - ;; tuples; mixed EDB+IDB relations bring both their EDB - ;; portion and any pre-saturated IDB tuples (which the - ;; rewritten rules would re-derive anyway). Skipping facts - ;; for rule-headed relations would leave the magic run - ;; without the EDB portion of mixed relations. - (for-each - (fn - (rel) - (for-each - (fn (t) (dl-add-fact! mdb t)) - (dl-rel-tuples db rel))) - (keys (get db :facts))) - ;; Seed + rewritten rules. - (dl-add-fact! mdb (get rewritten :seed)) - (for-each (fn (r) (dl-add-rule! mdb r)) (get rewritten :rules)) - (dl-query mdb query-goal))))))))) + ((query-args (dl-bound-args query-goal query-adn)) + (rules (dl-rules db))) + (let + ((rewritten (dl-magic-rewrite rules query-rel query-adn query-args)) + (mdb (dl-make-db)) + (rule-heads (dl-magic-rule-heads rules))) + (do + ;; Copy ALL existing facts. EDB-only relations bring their + ;; tuples; mixed EDB+IDB relations bring both their EDB + ;; portion and any pre-saturated IDB tuples (which the + ;; rewritten rules would re-derive anyway). Skipping facts + ;; for rule-headed relations would leave the magic run + ;; without the EDB portion of mixed relations. + (for-each + (fn + (rel) + (for-each + (fn (t) (dl-add-fact! mdb t)) + (dl-rel-tuples db rel))) + (keys (get db :facts))) + ;; Seed + rewritten rules. + (dl-add-fact! mdb (get rewritten :seed)) + (for-each (fn (r) (dl-add-rule! mdb r)) (get rewritten :rules)) + (dl-query mdb query-goal)))))))))) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index 8f389f01..ae3a7255 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,8 +1,8 @@ { "lang": "datalog", - "total_passed": 273, + "total_passed": 274, "total_failed": 0, - "total": 273, + "total": 274, "suites": [ {"name":"tokenize","passed":31,"failed":0,"total":31}, {"name":"parse","passed":23,"failed":0,"total":23}, @@ -13,8 +13,8 @@ {"name":"negation","passed":12,"failed":0,"total":12}, {"name":"aggregates","passed":23,"failed":0,"total":23}, {"name":"api","passed":22,"failed":0,"total":22}, - {"name":"magic","passed":36,"failed":0,"total":36}, + {"name":"magic","passed":37,"failed":0,"total":37}, {"name":"demo","passed":21,"failed":0,"total":21} ], - "generated": "2026-05-11T08:49:02+00:00" + "generated": "2026-05-11T08:59:09+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index b13d97c0..0e44891b 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,6 +1,6 @@ # datalog scoreboard -**273 / 273 passing** (0 failure(s)). +**274 / 274 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| @@ -13,5 +13,5 @@ | negation | 12 | 12 | ok | | aggregates | 23 | 23 | ok | | api | 22 | 22 | ok | -| magic | 36 | 36 | ok | +| magic | 37 | 37 | ok | | demo | 21 | 21 | ok | diff --git a/lib/datalog/tests/magic.sx b/lib/datalog/tests/magic.sx index 29508c87..b6a895ca 100644 --- a/lib/datalog/tests/magic.sx +++ b/lib/datalog/tests/magic.sx @@ -388,6 +388,29 @@ (= (len semi) (len magic)))) true) + ;; The magic rewriter passes aggregate body lits through + ;; unchanged, so an aggregate over an IDB relation would see an + ;; empty inner-goal in the magic db unless the IDB is already + ;; materialised. dl-magic-query now pre-saturates the source db + ;; to guarantee equivalence with dl-query for every stratified + ;; program. Previously this returned `({:N 0})` because `active` + ;; (IDB, derived through negation) was never derived in the + ;; magic db. + (dl-mt-test! "magic over aggregate-of-IDB matches vanilla" + (let + ((src + "u(a). u(b). u(c). u(d). banned(b). banned(d). + active(X) :- u(X), not(banned(X)). + n(N) :- count(N, X, active(X)).")) + (let + ((vanilla (dl-eval src "?- n(N).")) + (magic (dl-eval-magic src "?- n(N)."))) + (and (= (len vanilla) 1) + (= (len magic) 1) + (= (get (first vanilla) "N") + (get (first magic) "N"))))) + true) + ;; magic-query doesn't mutate caller db. (dl-mt-test! "magic-query preserves caller db" (let diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 75d49ba2..81a5ddf0 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -15,7 +15,7 @@ for rose-ash data (e.g. federation graph, content relationships). ## Status (rolling) -`bash lib/datalog/conformance.sh` → **273/273 across 11 suites** +`bash lib/datalog/conformance.sh` → **274/274 across 11 suites** (tokenize, parse, unify, eval, builtins, semi_naive, negation, aggregates, api, magic, demo). Source is ~3100 LOC, tests ~2900 LOC, public API documented in `lib/datalog/datalog.sx`. @@ -320,6 +320,23 @@ large graphs. _Newest first._ +- 2026-05-11 — `dl-magic-query` could silently diverge from + `dl-query` when an aggregate's inner-goal relation was IDB. The + rewriter passes aggregate body lits through unchanged (no magic + propagation for them), so the inner relation was empty in the + magic db and the aggregate returned 0. Probe: + `dl-eval-magic "u(a). u(b). u(c). u(d). banned(b). banned(d). + active(X) :- u(X), not(banned(X)). + n(N) :- count(N, X, active(X))." "?- n(N)."` + returned `N=0` instead of `N=2`. Fix: `dl-magic-query` now + pre-saturates the source db before copying facts into the magic + db. This guarantees equivalence with `dl-query` for every + stratified program; the magic benefit comes from goal-directed + re-derivation of the query relation under the seed (which still + matters for large recursive joins). The existing test suite's + aggregate cases happened to dodge this because the inner goals + were all EDB. 1 new regression test; 274/274. + - 2026-05-11 — Anonymous `_` in a negated literal was incorrectly flagged by the safety check. The canonical idiom `orphan(X) :- person(X), not(parent(X, _))` was rejected with