From ce98d97728eb112fcad2b0ada6ce313ded5475da Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 11 May 2026 09:34:41 +0000 Subject: [PATCH] datalog: anonymous-renamer avoids user `_anon` collision MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The renamer for anonymous `_` variables started at counter 0 and produced `_anon1, _anon2, ...` unconditionally. A user writing the same naming convention would see their variables shadowed: (dl-eval "p(a, b). p(c, d). q(_anon1) :- p(_anon1, _)." "?- q(X).") => () ; should be ({:X a} {:X c}) The `_` got renamed to `_anon1` too, collapsing the two positions of `p` to a single var (forcing args to be equal — which neither tuple satisfies). Fix: scan each rule (and query goal) for the highest `_anon` already present and start the renamer past it. New helpers `dl-max-anon-num` / `dl-max-anon-num-list` / `dl-try-parse-int` walk the rule tree; `dl-make-anon-renamer` now takes a `start` argument; `dl-rename-anon-rule` and the query-time renamer in `dl-query` both compute the start from the input. 1 regression test; conformance 275/275. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/datalog/db.sx | 75 ++++++++++++++++++++++++++++++++++--- lib/datalog/eval.sx | 5 ++- lib/datalog/scoreboard.json | 8 ++-- lib/datalog/scoreboard.md | 4 +- lib/datalog/tests/eval.sx | 14 +++++++ plans/datalog-on-sx.md | 13 ++++++- 6 files changed, 105 insertions(+), 14 deletions(-) diff --git a/lib/datalog/db.sx b/lib/datalog/db.sx index 230d1c1c..9df4234b 100644 --- a/lib/datalog/db.sx +++ b/lib/datalog/db.sx @@ -367,19 +367,82 @@ (define dl-make-anon-renamer (fn - () - (let ((counter 0)) + (start) + (let ((counter start)) (fn () (do (set! counter (+ counter 1)) (string->symbol (str "_anon" counter))))))) +;; Scan a rule for variables already named `_anon` (which would +;; otherwise collide with the renamer's output). Returns the max N +;; seen, or 0 if none. The renamer then starts at that max + 1, so +;; freshly-introduced anonymous names can't shadow a user-written +;; `_anon` symbol. +(define + dl-max-anon-num + (fn + (term acc) + (cond + ((symbol? term) + (let ((s (symbol->string term))) + (cond + ((and (>= (len s) 6) (= (slice s 0 5) "_anon")) + (let ((n (dl-try-parse-int (slice s 5 (len s))))) + (cond + ((nil? n) acc) + ((> n acc) n) + (else acc)))) + (else acc)))) + ((dict? term) + (cond + ((has-key? term :neg) + (dl-max-anon-num (get term :neg) acc)) + (else acc))) + ((list? term) (dl-max-anon-num-list term acc 0)) + (else acc)))) + +(define + dl-max-anon-num-list + (fn + (xs acc i) + (cond + ((>= i (len xs)) acc) + (else + (dl-max-anon-num-list xs (dl-max-anon-num (nth xs i) acc) (+ i 1)))))) + +;; Cheap "is this string a decimal int" check. Returns the number or +;; nil. Avoids relying on host parse-number, which on non-int strings +;; might raise rather than return nil. +(define + dl-try-parse-int + (fn + (s) + (cond + ((= (len s) 0) nil) + ((not (dl-all-digits? s 0 (len s))) nil) + (else (parse-number s))))) + +(define + dl-all-digits? + (fn + (s i n) + (cond + ((>= i n) true) + ((let ((c (slice s i (+ i 1)))) + (not (and (>= c "0") (<= c "9")))) + false) + (else (dl-all-digits? s (+ i 1) n))))) + (define dl-rename-anon-rule (fn (rule) - (let ((next-name (dl-make-anon-renamer))) - {:head (dl-rename-anon-term (get rule :head) next-name) - :body (map (fn (lit) (dl-rename-anon-lit lit next-name)) - (get rule :body))}))) + (let + ((start (dl-max-anon-num (get rule :head) + (dl-max-anon-num-list (get rule :body) 0 0)))) + (let ((next-name (dl-make-anon-renamer start))) + {:head (dl-rename-anon-term (get rule :head) next-name) + :body (map (fn (lit) (dl-rename-anon-lit lit next-name)) + (get rule :body))})))) (define dl-add-rule! diff --git a/lib/datalog/eval.sx b/lib/datalog/eval.sx index 17d9a4c6..abf8e458 100644 --- a/lib/datalog/eval.sx +++ b/lib/datalog/eval.sx @@ -428,7 +428,10 @@ ;; names. (let ((goals (dl-query-coerce goal)) - (renamer (dl-make-anon-renamer))) + ;; Start the renamer past any `_anon` symbols the user + ;; may have written in the query — avoids collision. + (renamer + (dl-make-anon-renamer (dl-max-anon-num-list goal 0 0)))) (let ((user-vars (dl-query-user-vars goals)) (renamed (map (fn (g) (dl-rename-anon-lit g renamer)) goals))) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index ae3a7255..e7621023 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,13 +1,13 @@ { "lang": "datalog", - "total_passed": 274, + "total_passed": 275, "total_failed": 0, - "total": 274, + "total": 275, "suites": [ {"name":"tokenize","passed":31,"failed":0,"total":31}, {"name":"parse","passed":23,"failed":0,"total":23}, {"name":"unify","passed":29,"failed":0,"total":29}, - {"name":"eval","passed":42,"failed":0,"total":42}, + {"name":"eval","passed":43,"failed":0,"total":43}, {"name":"builtins","passed":26,"failed":0,"total":26}, {"name":"semi_naive","passed":8,"failed":0,"total":8}, {"name":"negation","passed":12,"failed":0,"total":12}, @@ -16,5 +16,5 @@ {"name":"magic","passed":37,"failed":0,"total":37}, {"name":"demo","passed":21,"failed":0,"total":21} ], - "generated": "2026-05-11T08:59:09+00:00" + "generated": "2026-05-11T09:34:17+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index 0e44891b..4676d363 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,13 +1,13 @@ # datalog scoreboard -**274 / 274 passing** (0 failure(s)). +**275 / 275 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| | tokenize | 31 | 31 | ok | | parse | 23 | 23 | ok | | unify | 29 | 29 | ok | -| eval | 42 | 42 | ok | +| eval | 43 | 43 | ok | | builtins | 26 | 26 | ok | | semi_naive | 8 | 8 | ok | | negation | 12 | 12 | ok | diff --git a/lib/datalog/tests/eval.sx b/lib/datalog/tests/eval.sx index 1dd432de..937a49dc 100644 --- a/lib/datalog/tests/eval.sx +++ b/lib/datalog/tests/eval.sx @@ -252,6 +252,20 @@ (fn () (dl-program "n(3). double(*(X, 2)) :- n(X)."))) true) + ;; The anonymous-variable renamer used to start at `_anon1` + ;; unconditionally; a rule that wrote `q(_anon1) :- p(_anon1, _)` + ;; (the user picking the same name the renamer would generate) + ;; would see the `_` renamed to `_anon1` too, collapsing the + ;; two positions in `p(_anon1, _)` to a single var. Now the + ;; renamer scans the rule for the max `_anon` and starts past + ;; it, so user-written names of that form are preserved. + (dl-et-test-set! "anonymous-rename avoids user `_anon` collision" + (dl-query + (dl-program + "p(a, b). p(c, d). q(_anon1) :- p(_anon1, _).") + (quote (q X))) + (list {:X (quote a)} {:X (quote c)})) + (dl-et-test! "unsafe head var" (dl-et-throws? (fn () (dl-program "p(X, Y) :- q(X)."))) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 81a5ddf0..791c77d8 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -15,7 +15,7 @@ for rose-ash data (e.g. federation graph, content relationships). ## Status (rolling) -`bash lib/datalog/conformance.sh` → **274/274 across 11 suites** +`bash lib/datalog/conformance.sh` → **275/275 across 11 suites** (tokenize, parse, unify, eval, builtins, semi_naive, negation, aggregates, api, magic, demo). Source is ~3100 LOC, tests ~2900 LOC, public API documented in `lib/datalog/datalog.sx`. @@ -320,6 +320,17 @@ large graphs. _Newest first._ +- 2026-05-11 — Anonymous-variable renamer collided with user-written + `_anon` symbols. The renamer started counter at 0 and produced + `_anon1, _anon2, ...` unconditionally; if the user wrote + `q(_anon1) :- p(_anon1, _).` the `_` got renamed to `_anon1` too, + collapsing the two positions of `p` to a single var and returning + the empty result instead of `{a, c}`. Fix: scan each rule (and + query) for the max `_anon` and start the renamer past it. The + renamer constructor now takes a `start` arg; new helpers + `dl-max-anon-num` / `dl-max-anon-num-list` walk the rule tree. + 1 regression test; 275/275. + - 2026-05-11 — `dl-magic-query` could silently diverge from `dl-query` when an aggregate's inner-goal relation was IDB. The rewriter passes aggregate body lits through unchanged (no magic