diff --git a/lib/datalog/magic.sx b/lib/datalog/magic.sx index 3d593356..7c8983a1 100644 --- a/lib/datalog/magic.sx +++ b/lib/datalog/magic.sx @@ -200,3 +200,144 @@ (dl-ba-loop (+ i 1)))))) (dl-ba-loop 0) out)))) + +;; ── Magic-sets rewriter ───────────────────────────────────────── +;; +;; Given the original rule list and a query (rel, adornment) pair, +;; generates the magic-rewritten program: a list of rules that +;; (a) gate each original rule with a `magic_^` filter and +;; (b) propagate the magic relation through SIPS so that only +;; query-relevant tuples are derived. Seed facts are returned +;; separately and must be added to the db at evaluation time. +;; +;; Output: {:rules :seed } +;; +;; The rewriter only rewrites IDB rules; EDB facts pass through. +;; Built-in predicates and negation in body literals are kept in +;; place but do not generate propagation rules of their own. + +(define + dl-magic-pair-key + (fn (rel adornment) (str rel "^" adornment))) + +(define + dl-magic-rewrite + (fn + (rules query-rel query-adornment query-args) + (let + ((seen (list)) + (queue (list)) + (out (list))) + (do + (define + dl-mq-mark! + (fn + (rel adornment) + (let ((k (dl-magic-pair-key rel adornment))) + (when + (not (dl-member-string? k seen)) + (do + (append! seen k) + (append! queue {:rel rel :adn adornment})))))) + + (define + dl-mq-rewrite-rule! + (fn + (rule adn) + (let + ((head (get rule :head)) + (body (get rule :body)) + (sips (dl-rule-sips rule adn))) + (let + ((magic-filter + (dl-magic-lit + (dl-rel-name head) + adn + (dl-bound-args head adn)))) + (do + ;; Adorned rule: head :- magic-filter, body... + (let ((new-body (list))) + (do + (append! new-body magic-filter) + (for-each + (fn (lit) (append! new-body lit)) + body) + (append! out {:head head :body new-body}))) + ;; Propagation rules for each positive non-builtin + ;; body literal at position i. + (define + dl-mq-prop-loop + (fn + (i) + (when + (< i (len body)) + (do + (let + ((lit (nth body i)) + (sip-entry (nth sips i))) + (when + (and (list? lit) + (> (len lit) 0) + (not (and (dict? lit) (has-key? lit :neg))) + (not (dl-builtin? lit)) + (not (dl-aggregate? lit))) + (let + ((lit-adn (get sip-entry :adornment)) + (lit-rel (dl-rel-name lit))) + (let + ((prop-head + (dl-magic-lit + lit-rel + lit-adn + (dl-bound-args lit lit-adn)))) + (let ((prop-body (list))) + (do + (append! prop-body magic-filter) + (define + dl-mq-prefix-loop + (fn + (j) + (when + (< j i) + (do + (append! + prop-body + (nth body j)) + (dl-mq-prefix-loop (+ j 1)))))) + (dl-mq-prefix-loop 0) + (append! + out + {:head prop-head :body prop-body}) + (dl-mq-mark! lit-rel lit-adn))))))) + (dl-mq-prop-loop (+ i 1)))))) + (dl-mq-prop-loop 0)))))) + + (dl-mq-mark! query-rel query-adornment) + + (define + dl-mq-process + (fn + () + (when + (> (len queue) 0) + (let ((item (first queue))) + (do + (set! queue (rest queue)) + (let + ((rel (get item :rel)) (adn (get item :adn))) + (for-each + (fn + (rule) + (when + (= (dl-rel-name (get rule :head)) rel) + (dl-mq-rewrite-rule! rule adn))) + rules)) + (dl-mq-process)))))) + (dl-mq-process) + + {:rules out + :seed + (dl-magic-lit + query-rel + query-adornment + query-args)})))) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index 73ae3b19..481f48f9 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,8 +1,8 @@ { "lang": "datalog", - "total_passed": 199, + "total_passed": 202, "total_failed": 0, - "total": 199, + "total": 202, "suites": [ {"name":"tokenize","passed":26,"failed":0,"total":26}, {"name":"parse","passed":18,"failed":0,"total":18}, @@ -13,8 +13,8 @@ {"name":"negation","passed":10,"failed":0,"total":10}, {"name":"aggregates","passed":18,"failed":0,"total":18}, {"name":"api","passed":14,"failed":0,"total":14}, - {"name":"magic","passed":15,"failed":0,"total":15}, + {"name":"magic","passed":18,"failed":0,"total":18}, {"name":"demo","passed":18,"failed":0,"total":18} ], - "generated": "2026-05-08T09:53:23+00:00" + "generated": "2026-05-08T09:58:16+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index a8e21498..def16294 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,6 +1,6 @@ # datalog scoreboard -**199 / 199 passing** (0 failure(s)). +**202 / 202 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| @@ -13,5 +13,5 @@ | negation | 10 | 10 | ok | | aggregates | 18 | 18 | ok | | api | 14 | 14 | ok | -| magic | 15 | 15 | ok | +| magic | 18 | 18 | ok | | demo | 18 | 18 | ok | diff --git a/lib/datalog/tests/magic.sx b/lib/datalog/tests/magic.sx index dff20345..78ad1d27 100644 --- a/lib/datalog/tests/magic.sx +++ b/lib/datalog/tests/magic.sx @@ -147,7 +147,78 @@ ;; Magic literal construction. (dl-mt-test! "magic-lit" (dl-magic-lit "ancestor" "bf" (list (quote tom))) - (list (string->symbol "magic_ancestor^bf") (quote tom)))))) + (list (string->symbol "magic_ancestor^bf") (quote tom))) + + ;; Magic-sets rewriter: structural sanity. + (dl-mt-test! "rewrite ancestor produces seed" + (let + ((rules + (list + {:head (list (quote ancestor) (quote X) (quote Y)) + :body (list (list (quote parent) (quote X) (quote Y)))} + {:head (list (quote ancestor) (quote X) (quote Z)) + :body + (list (list (quote parent) (quote X) (quote Y)) + (list (quote ancestor) (quote Y) (quote Z)))}))) + (get + (dl-magic-rewrite rules "ancestor" "bf" (list (quote a))) + :seed)) + (list (string->symbol "magic_ancestor^bf") (quote a))) + + ;; Equivalence: rewritten program derives same ancestor tuples. + ;; In a chain a→b→c→d, magic-rewritten run still derives all + ;; ancestor pairs reachable from any node a/b/c/d propagated via + ;; magic_ancestor^bf — i.e. the full closure (6 tuples). Magic + ;; saves work only when the EDB has irrelevant nodes outside + ;; the seed's transitive cone. + (dl-mt-test! "magic-rewritten ancestor count" + (let + ((rules + (list + {:head (list (quote ancestor) (quote X) (quote Y)) + :body (list (list (quote parent) (quote X) (quote Y)))} + {:head (list (quote ancestor) (quote X) (quote Z)) + :body + (list (list (quote parent) (quote X) (quote Y)) + (list (quote ancestor) (quote Y) (quote Z)))})) + (edb (list + (list (quote parent) (quote a) (quote b)) + (list (quote parent) (quote b) (quote c)) + (list (quote parent) (quote c) (quote d))))) + (let + ((rewritten (dl-magic-rewrite rules "ancestor" "bf" (list (quote a)))) + (db (dl-make-db))) + (do + (for-each (fn (f) (dl-add-fact! db f)) edb) + (dl-add-fact! db (get rewritten :seed)) + (for-each (fn (r) (dl-add-rule! db r)) (get rewritten :rules)) + (dl-saturate! db) + (len (dl-relation db "ancestor"))))) + 6) + + (dl-mt-test! "magic-rewritten finds same answers" + (let + ((rules + (list + {:head (list (quote ancestor) (quote X) (quote Y)) + :body (list (list (quote parent) (quote X) (quote Y)))} + {:head (list (quote ancestor) (quote X) (quote Z)) + :body + (list (list (quote parent) (quote X) (quote Y)) + (list (quote ancestor) (quote Y) (quote Z)))})) + (edb (list + (list (quote parent) (quote a) (quote b)) + (list (quote parent) (quote b) (quote c))))) + (let + ((rewritten (dl-magic-rewrite rules "ancestor" "bf" (list (quote a)))) + (db (dl-make-db))) + (do + (for-each (fn (f) (dl-add-fact! db f)) edb) + (dl-add-fact! db (get rewritten :seed)) + (for-each (fn (r) (dl-add-rule! db r)) (get rewritten :rules)) + (dl-saturate! db) + (len (dl-query db (list (quote ancestor) (quote a) (quote X))))))) + 2)))) (define dl-magic-tests-run! diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 560ec828..4b9d41a8 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -154,10 +154,13 @@ large graphs. - [x] Adornments: `dl-adorn-goal goal` and `dl-adorn-lit lit bound` in `lib/datalog/magic.sx`. Per-arg `b`/`f` based on whether the arg is a constant or a variable already in the bound set. -- [ ] Magic transformation: for each adorned predicate, generate a - `magic_` relation and rewrite rule bodies to filter through it. - *Building blocks present in `magic.sx`: `dl-magic-rel-name`, - `dl-magic-lit`, `dl-bound-args`. Full rewriter still TODO.* +- [x] Magic transformation: `dl-magic-rewrite rules query-rel adn args` + generates `{:rules :seed }`. Each + original rule is gated with a `magic_^(bound)` filter, + and propagation rules are emitted for each positive non-builtin + body literal. Worklist over `(rel, adn)` pairs starts from the + query and stops when no new pairs appear. EDB facts pass through + unchanged. - [x] Sideways information passing strategy (SIPS): left-to-right `dl-rule-sips rule head-adornment` walks body literals tracking the bound set, returning `({:lit :adornment} ...)`. Recognises @@ -168,9 +171,14 @@ large graphs. `:semi-naive`. `:magic` accepted but the transformation itself is deferred — saturator currently falls back to semi-naive. Tests verify hook, default, and equivalence under the alternate setting. -- [ ] Tests: equivalence vs naive on small inputs; perf win on a 10k-node - reachability query from a single root. *Pending real magic-set - transformation.* +- [x] Equivalence test: rewritten ancestor program over the same EDB + derives the same number of `ancestor` tuples and returns the + same query answers as the unrewritten program (chain-3 case). +- [ ] Perf test: 10k-node reachability with magic vs semi-naive. + Pending — would need a `dl-saturate-magic!` driver that builds + a temporary db from rewrite output. The rewriter itself is in + place; benchmarking against semi-naive on large graphs is left + to a future iteration. ### Phase 7 — stratified negation - [x] Dependency graph: `dl-build-dep-graph db` returns `{head -> ({:rel @@ -291,6 +299,17 @@ large graphs. _Newest first._ +- 2026-05-08 — Phase 6 magic-sets rewriter. `dl-magic-rewrite rules + query-rel adn args` returns `{:rules :seed }`. + Worklist over `(rel, adn)` pairs starts from the query, gates each + original rule with a `magic_^(bound)` filter, and emits + propagation rules for each positive non-builtin body literal so + that magic spreads to body relations. EDB facts pass through. + 3 new tests cover seed structure, equivalence on chain-3 by + ancestor-relation tuple count, and same-query-answers under + the rewritten program. The plumbing for a `dl-saturate-magic!` + driver and large-graph perf benchmarks is still future work. + - 2026-05-08 — Phase 6 building blocks for the magic-sets transformation: `dl-magic-rel-name`, `dl-magic-lit`, `dl-bound-args`. The rewriter that generates magic seed and