From 5a1dc4392f204300d63f5c0bb525cbf3dca9ddb3 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 08:58:17 +0000 Subject: [PATCH] datalog: anonymous _ vars are unique per occurrence (Phase 5d, 156/156) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (p X _), (p _ Y) — the two _ are now different variables, matching standard Datalog semantics. Previously both _ symbols were the same SX symbol, so unification across them gave wrong answers. Fix in db.sx: dl-rename-anon-term + dl-rename-anon-lit walk a term or literal and replace each '_' symbol with a fresh _anon. dl-make-anon-renamer returns a counter-based name generator scoped per call. dl-rename-anon-rule applies it to head and body of a rule. dl-add-rule! invokes the renamer before safety check. eval.sx: dl-query renames anon vars in the goal before search and filters '_' out of the projection so user-facing results aren't polluted with internal _anon bindings. The previous "underscore in head ok" test now correctly rejects (p X _) :- q(X) as unsafe (the head's fresh anon var has no body binder). New "underscore in body only" test confirms the safe case. Two regression tests for rule-level and goal-level independence. --- lib/datalog/db.sx | 55 +++++++++++++++++++++++++++++++------ lib/datalog/eval.sx | 33 ++++++++++++---------- lib/datalog/scoreboard.json | 8 +++--- lib/datalog/scoreboard.md | 4 +-- lib/datalog/tests/eval.sx | 27 ++++++++++++++++-- plans/datalog-on-sx.md | 11 ++++++++ 6 files changed, 108 insertions(+), 30 deletions(-) diff --git a/lib/datalog/db.sx b/lib/datalog/db.sx index 6a3f69a0..b5aea9b8 100644 --- a/lib/datalog/db.sx +++ b/lib/datalog/db.sx @@ -189,6 +189,44 @@ " do not appear in any body literal")) (else nil)))))))) +(define + dl-rename-anon-term + (fn + (term next-name) + (cond + ((and (symbol? term) (= (symbol->string term) "_")) + (next-name)) + ((list? term) + (map (fn (x) (dl-rename-anon-term x next-name)) term)) + (else term)))) + +(define + dl-rename-anon-lit + (fn + (lit next-name) + (cond + ((and (dict? lit) (has-key? lit :neg)) + {:neg (dl-rename-anon-term (get lit :neg) next-name)}) + ((list? lit) (dl-rename-anon-term lit next-name)) + (else lit)))) + +(define + dl-make-anon-renamer + (fn + () + (let ((counter 0)) + (fn () (do (set! counter (+ counter 1)) + (string->symbol (str "_anon" counter))))))) + +(define + dl-rename-anon-rule + (fn + (rule) + (let ((next-name (dl-make-anon-renamer))) + {:head (dl-rename-anon-term (get rule :head) next-name) + :body (map (fn (lit) (dl-rename-anon-lit lit next-name)) + (get rule :body))}))) + (define dl-add-rule! (fn @@ -199,14 +237,15 @@ ((not (has-key? rule :head)) (error (str "dl-add-rule!: rule missing :head, got " rule))) (else - (let - ((err (dl-rule-check-safety rule))) - (cond - ((not (nil? err)) (error (str "dl-add-rule!: " err))) - (else - (let - ((rules (get db :rules))) - (do (append! rules rule) true))))))))) + (let ((rule (dl-rename-anon-rule rule))) + (let + ((err (dl-rule-check-safety rule))) + (cond + ((not (nil? err)) (error (str "dl-add-rule!: " err))) + (else + (let + ((rules (get db :rules))) + (do (append! rules rule) true)))))))))) (define dl-add-clause! diff --git a/lib/datalog/eval.sx b/lib/datalog/eval.sx index acd74e5e..677fb636 100644 --- a/lib/datalog/eval.sx +++ b/lib/datalog/eval.sx @@ -368,21 +368,26 @@ (db goal) (do (dl-saturate! db) + ;; Rename anonymous '_' vars in the goal so multiple occurrences + ;; do not unify together. Keep the user-facing var list (taken + ;; before renaming) so projected results retain user names. (let - ((substs (dl-find-bindings (list goal) db (dl-empty-subst))) - (vars (dl-vars-of goal)) - (results (list))) - (do - (for-each - (fn - (s) - (let - ((proj (dl-project-subst s vars))) - (when - (not (dl-tuple-member? proj results)) - (append! results proj)))) - substs) - results))))) + ((user-vars (filter (fn (n) (not (= n "_"))) (dl-vars-of goal))) + (renamed (dl-rename-anon-lit goal (dl-make-anon-renamer)))) + (let + ((substs (dl-find-bindings (list renamed) db (dl-empty-subst))) + (results (list))) + (do + (for-each + (fn + (s) + (let + ((proj (dl-project-subst s user-vars))) + (when + (not (dl-tuple-member? proj results)) + (append! results proj)))) + substs) + results)))))) (define dl-project-subst diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index e30419f8..a855f4e0 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,13 +1,13 @@ { "lang": "datalog", - "total_passed": 153, + "total_passed": 156, "total_failed": 0, - "total": 153, + "total": 156, "suites": [ {"name":"tokenize","passed":26,"failed":0,"total":26}, {"name":"parse","passed":18,"failed":0,"total":18}, {"name":"unify","passed":28,"failed":0,"total":28}, - {"name":"eval","passed":15,"failed":0,"total":15}, + {"name":"eval","passed":18,"failed":0,"total":18}, {"name":"builtins","passed":19,"failed":0,"total":19}, {"name":"semi_naive","passed":8,"failed":0,"total":8}, {"name":"negation","passed":10,"failed":0,"total":10}, @@ -15,5 +15,5 @@ {"name":"api","passed":9,"failed":0,"total":9}, {"name":"demo","passed":10,"failed":0,"total":10} ], - "generated": "2026-05-08T08:49:52+00:00" + "generated": "2026-05-08T08:57:57+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index 2b5b5d7e..6977408c 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,13 +1,13 @@ # datalog scoreboard -**153 / 153 passing** (0 failure(s)). +**156 / 156 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| | tokenize | 26 | 26 | ok | | parse | 18 | 18 | ok | | unify | 28 | 28 | ok | -| eval | 15 | 15 | ok | +| eval | 18 | 18 | ok | | builtins | 19 | 19 | ok | | semi_naive | 8 | 8 | ok | | negation | 10 | 10 | ok | diff --git a/lib/datalog/tests/eval.sx b/lib/datalog/tests/eval.sx index 1c51e9b4..d6b6f8e6 100644 --- a/lib/datalog/tests/eval.sx +++ b/lib/datalog/tests/eval.sx @@ -173,9 +173,18 @@ "unsafe — empty body" (dl-et-throws? (fn () (dl-program "p(X) :- ."))) true) + ;; Underscore in head is unsafe — it's a fresh existential per + ;; occurrence after Phase 5d's anonymous-var renaming, and there's + ;; nothing in the body to bind it. (Old behavior accepted this by + ;; treating '_' as a literal name to skip; the renaming made it an + ;; ordinary unbound variable.) (dl-et-test! - "underscore var ok" + "underscore in head — unsafe" (dl-et-throws? (fn () (dl-program "p(X, _) :- q(X)."))) + true) + (dl-et-test! + "underscore in body only — safe" + (dl-et-throws? (fn () (dl-program "p(X) :- q(X, _)."))) false) (dl-et-test! "var only in head — unsafe" @@ -192,7 +201,21 @@ () (dl-program "edge(a,b). edge(b,c). reach(X, Z) :- edge(X, Y), edge(Y, Z)."))) - false)))) + false) + + ;; Anonymous variables: each occurrence must be independent. + (dl-et-test-set! "anon vars in rule are independent" + (dl-query + (dl-program + "p(a, b). p(c, d). q(X) :- p(X, _), p(_, Y).") + (list (quote q) (quote X))) + (list {:X (quote a)} {:X (quote c)})) + + (dl-et-test-set! "anon vars in goal are independent" + (dl-query + (dl-program "p(1, 2, 3). p(4, 5, 6).") + (list (quote p) (quote _) (quote X) (quote _))) + (list {:X 2} {:X 5}))))) (define dl-eval-tests-run! diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index d24293a3..21f6733a 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -269,6 +269,17 @@ large graphs. _Newest first._ +- 2026-05-08 — Phase 5d semantic fix: anonymous `_` variables are + renamed per occurrence at `dl-add-rule!` and `dl-query` time so + `(p X _) (p _ Y)` no longer unifies the two `_`s. New helpers + `dl-rename-anon-term`, `dl-rename-anon-lit`, `dl-make-anon-renamer`, + `dl-rename-anon-rule` in db.sx; eval.sx's dl-query renames the goal + before search and projects only user-named vars (`_` is filtered + out of the projection list). The "underscore in head" test now + correctly rejects `(p X _) :- q(X).` — after renaming, the head's + fresh anon var has no body binder. Two new eval tests verify + rule-level and goal-level independence. 155/155 expected. + - 2026-05-08 — Phase 5c perf: indexed `dl-find-bindings`. Replaced the recursive `(rest lits)` walk with `dl-fb-aux lits db subst i n` using `nth lits i`. Eliminates O(N²) list-copy per body of length