datalog: anonymous-renamer avoids user _anon<N> collision
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
The renamer for anonymous `_` variables started at counter 0 and
produced `_anon1, _anon2, ...` unconditionally. A user writing the
same naming convention would see their variables shadowed:
(dl-eval "p(a, b). p(c, d). q(_anon1) :- p(_anon1, _)."
"?- q(X).")
=> () ; should be ({:X a} {:X c})
The `_` got renamed to `_anon1` too, collapsing the two positions
of `p` to a single var (forcing args to be equal — which neither
tuple satisfies).
Fix: scan each rule (and query goal) for the highest `_anon<N>`
already present and start the renamer past it. New helpers
`dl-max-anon-num` / `dl-max-anon-num-list` / `dl-try-parse-int`
walk the rule tree; `dl-make-anon-renamer` now takes a `start`
argument; `dl-rename-anon-rule` and the query-time renamer in
`dl-query` both compute the start from the input.
1 regression test; conformance 275/275.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -367,19 +367,82 @@
|
|||||||
(define
|
(define
|
||||||
dl-make-anon-renamer
|
dl-make-anon-renamer
|
||||||
(fn
|
(fn
|
||||||
()
|
(start)
|
||||||
(let ((counter 0))
|
(let ((counter start))
|
||||||
(fn () (do (set! counter (+ counter 1))
|
(fn () (do (set! counter (+ counter 1))
|
||||||
(string->symbol (str "_anon" counter)))))))
|
(string->symbol (str "_anon" counter)))))))
|
||||||
|
|
||||||
|
;; Scan a rule for variables already named `_anon<N>` (which would
|
||||||
|
;; otherwise collide with the renamer's output). Returns the max N
|
||||||
|
;; seen, or 0 if none. The renamer then starts at that max + 1, so
|
||||||
|
;; freshly-introduced anonymous names can't shadow a user-written
|
||||||
|
;; `_anon<N>` symbol.
|
||||||
|
(define
|
||||||
|
dl-max-anon-num
|
||||||
|
(fn
|
||||||
|
(term acc)
|
||||||
|
(cond
|
||||||
|
((symbol? term)
|
||||||
|
(let ((s (symbol->string term)))
|
||||||
|
(cond
|
||||||
|
((and (>= (len s) 6) (= (slice s 0 5) "_anon"))
|
||||||
|
(let ((n (dl-try-parse-int (slice s 5 (len s)))))
|
||||||
|
(cond
|
||||||
|
((nil? n) acc)
|
||||||
|
((> n acc) n)
|
||||||
|
(else acc))))
|
||||||
|
(else acc))))
|
||||||
|
((dict? term)
|
||||||
|
(cond
|
||||||
|
((has-key? term :neg)
|
||||||
|
(dl-max-anon-num (get term :neg) acc))
|
||||||
|
(else acc)))
|
||||||
|
((list? term) (dl-max-anon-num-list term acc 0))
|
||||||
|
(else acc))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
dl-max-anon-num-list
|
||||||
|
(fn
|
||||||
|
(xs acc i)
|
||||||
|
(cond
|
||||||
|
((>= i (len xs)) acc)
|
||||||
|
(else
|
||||||
|
(dl-max-anon-num-list xs (dl-max-anon-num (nth xs i) acc) (+ i 1))))))
|
||||||
|
|
||||||
|
;; Cheap "is this string a decimal int" check. Returns the number or
|
||||||
|
;; nil. Avoids relying on host parse-number, which on non-int strings
|
||||||
|
;; might raise rather than return nil.
|
||||||
|
(define
|
||||||
|
dl-try-parse-int
|
||||||
|
(fn
|
||||||
|
(s)
|
||||||
|
(cond
|
||||||
|
((= (len s) 0) nil)
|
||||||
|
((not (dl-all-digits? s 0 (len s))) nil)
|
||||||
|
(else (parse-number s)))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
dl-all-digits?
|
||||||
|
(fn
|
||||||
|
(s i n)
|
||||||
|
(cond
|
||||||
|
((>= i n) true)
|
||||||
|
((let ((c (slice s i (+ i 1))))
|
||||||
|
(not (and (>= c "0") (<= c "9"))))
|
||||||
|
false)
|
||||||
|
(else (dl-all-digits? s (+ i 1) n)))))
|
||||||
|
|
||||||
(define
|
(define
|
||||||
dl-rename-anon-rule
|
dl-rename-anon-rule
|
||||||
(fn
|
(fn
|
||||||
(rule)
|
(rule)
|
||||||
(let ((next-name (dl-make-anon-renamer)))
|
(let
|
||||||
{:head (dl-rename-anon-term (get rule :head) next-name)
|
((start (dl-max-anon-num (get rule :head)
|
||||||
:body (map (fn (lit) (dl-rename-anon-lit lit next-name))
|
(dl-max-anon-num-list (get rule :body) 0 0))))
|
||||||
(get rule :body))})))
|
(let ((next-name (dl-make-anon-renamer start)))
|
||||||
|
{:head (dl-rename-anon-term (get rule :head) next-name)
|
||||||
|
:body (map (fn (lit) (dl-rename-anon-lit lit next-name))
|
||||||
|
(get rule :body))}))))
|
||||||
|
|
||||||
(define
|
(define
|
||||||
dl-add-rule!
|
dl-add-rule!
|
||||||
|
|||||||
@@ -428,7 +428,10 @@
|
|||||||
;; names.
|
;; names.
|
||||||
(let
|
(let
|
||||||
((goals (dl-query-coerce goal))
|
((goals (dl-query-coerce goal))
|
||||||
(renamer (dl-make-anon-renamer)))
|
;; Start the renamer past any `_anon<N>` symbols the user
|
||||||
|
;; may have written in the query — avoids collision.
|
||||||
|
(renamer
|
||||||
|
(dl-make-anon-renamer (dl-max-anon-num-list goal 0 0))))
|
||||||
(let
|
(let
|
||||||
((user-vars (dl-query-user-vars goals))
|
((user-vars (dl-query-user-vars goals))
|
||||||
(renamed (map (fn (g) (dl-rename-anon-lit g renamer)) goals)))
|
(renamed (map (fn (g) (dl-rename-anon-lit g renamer)) goals)))
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
{
|
{
|
||||||
"lang": "datalog",
|
"lang": "datalog",
|
||||||
"total_passed": 274,
|
"total_passed": 275,
|
||||||
"total_failed": 0,
|
"total_failed": 0,
|
||||||
"total": 274,
|
"total": 275,
|
||||||
"suites": [
|
"suites": [
|
||||||
{"name":"tokenize","passed":31,"failed":0,"total":31},
|
{"name":"tokenize","passed":31,"failed":0,"total":31},
|
||||||
{"name":"parse","passed":23,"failed":0,"total":23},
|
{"name":"parse","passed":23,"failed":0,"total":23},
|
||||||
{"name":"unify","passed":29,"failed":0,"total":29},
|
{"name":"unify","passed":29,"failed":0,"total":29},
|
||||||
{"name":"eval","passed":42,"failed":0,"total":42},
|
{"name":"eval","passed":43,"failed":0,"total":43},
|
||||||
{"name":"builtins","passed":26,"failed":0,"total":26},
|
{"name":"builtins","passed":26,"failed":0,"total":26},
|
||||||
{"name":"semi_naive","passed":8,"failed":0,"total":8},
|
{"name":"semi_naive","passed":8,"failed":0,"total":8},
|
||||||
{"name":"negation","passed":12,"failed":0,"total":12},
|
{"name":"negation","passed":12,"failed":0,"total":12},
|
||||||
@@ -16,5 +16,5 @@
|
|||||||
{"name":"magic","passed":37,"failed":0,"total":37},
|
{"name":"magic","passed":37,"failed":0,"total":37},
|
||||||
{"name":"demo","passed":21,"failed":0,"total":21}
|
{"name":"demo","passed":21,"failed":0,"total":21}
|
||||||
],
|
],
|
||||||
"generated": "2026-05-11T08:59:09+00:00"
|
"generated": "2026-05-11T09:34:17+00:00"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,13 +1,13 @@
|
|||||||
# datalog scoreboard
|
# datalog scoreboard
|
||||||
|
|
||||||
**274 / 274 passing** (0 failure(s)).
|
**275 / 275 passing** (0 failure(s)).
|
||||||
|
|
||||||
| Suite | Passed | Total | Status |
|
| Suite | Passed | Total | Status |
|
||||||
|-------|--------|-------|--------|
|
|-------|--------|-------|--------|
|
||||||
| tokenize | 31 | 31 | ok |
|
| tokenize | 31 | 31 | ok |
|
||||||
| parse | 23 | 23 | ok |
|
| parse | 23 | 23 | ok |
|
||||||
| unify | 29 | 29 | ok |
|
| unify | 29 | 29 | ok |
|
||||||
| eval | 42 | 42 | ok |
|
| eval | 43 | 43 | ok |
|
||||||
| builtins | 26 | 26 | ok |
|
| builtins | 26 | 26 | ok |
|
||||||
| semi_naive | 8 | 8 | ok |
|
| semi_naive | 8 | 8 | ok |
|
||||||
| negation | 12 | 12 | ok |
|
| negation | 12 | 12 | ok |
|
||||||
|
|||||||
@@ -252,6 +252,20 @@
|
|||||||
(fn () (dl-program "n(3). double(*(X, 2)) :- n(X).")))
|
(fn () (dl-program "n(3). double(*(X, 2)) :- n(X).")))
|
||||||
true)
|
true)
|
||||||
|
|
||||||
|
;; The anonymous-variable renamer used to start at `_anon1`
|
||||||
|
;; unconditionally; a rule that wrote `q(_anon1) :- p(_anon1, _)`
|
||||||
|
;; (the user picking the same name the renamer would generate)
|
||||||
|
;; would see the `_` renamed to `_anon1` too, collapsing the
|
||||||
|
;; two positions in `p(_anon1, _)` to a single var. Now the
|
||||||
|
;; renamer scans the rule for the max `_anon<N>` and starts past
|
||||||
|
;; it, so user-written names of that form are preserved.
|
||||||
|
(dl-et-test-set! "anonymous-rename avoids user `_anon` collision"
|
||||||
|
(dl-query
|
||||||
|
(dl-program
|
||||||
|
"p(a, b). p(c, d). q(_anon1) :- p(_anon1, _).")
|
||||||
|
(quote (q X)))
|
||||||
|
(list {:X (quote a)} {:X (quote c)}))
|
||||||
|
|
||||||
(dl-et-test!
|
(dl-et-test!
|
||||||
"unsafe head var"
|
"unsafe head var"
|
||||||
(dl-et-throws? (fn () (dl-program "p(X, Y) :- q(X).")))
|
(dl-et-throws? (fn () (dl-program "p(X, Y) :- q(X).")))
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ for rose-ash data (e.g. federation graph, content relationships).
|
|||||||
|
|
||||||
## Status (rolling)
|
## Status (rolling)
|
||||||
|
|
||||||
`bash lib/datalog/conformance.sh` → **274/274 across 11 suites**
|
`bash lib/datalog/conformance.sh` → **275/275 across 11 suites**
|
||||||
(tokenize, parse, unify, eval, builtins, semi_naive, negation, aggregates,
|
(tokenize, parse, unify, eval, builtins, semi_naive, negation, aggregates,
|
||||||
api, magic, demo). Source is ~3100 LOC, tests ~2900 LOC, public API
|
api, magic, demo). Source is ~3100 LOC, tests ~2900 LOC, public API
|
||||||
documented in `lib/datalog/datalog.sx`.
|
documented in `lib/datalog/datalog.sx`.
|
||||||
@@ -320,6 +320,17 @@ large graphs.
|
|||||||
|
|
||||||
_Newest first._
|
_Newest first._
|
||||||
|
|
||||||
|
- 2026-05-11 — Anonymous-variable renamer collided with user-written
|
||||||
|
`_anon<N>` symbols. The renamer started counter at 0 and produced
|
||||||
|
`_anon1, _anon2, ...` unconditionally; if the user wrote
|
||||||
|
`q(_anon1) :- p(_anon1, _).` the `_` got renamed to `_anon1` too,
|
||||||
|
collapsing the two positions of `p` to a single var and returning
|
||||||
|
the empty result instead of `{a, c}`. Fix: scan each rule (and
|
||||||
|
query) for the max `_anon<N>` and start the renamer past it. The
|
||||||
|
renamer constructor now takes a `start` arg; new helpers
|
||||||
|
`dl-max-anon-num` / `dl-max-anon-num-list` walk the rule tree.
|
||||||
|
1 regression test; 275/275.
|
||||||
|
|
||||||
- 2026-05-11 — `dl-magic-query` could silently diverge from
|
- 2026-05-11 — `dl-magic-query` could silently diverge from
|
||||||
`dl-query` when an aggregate's inner-goal relation was IDB. The
|
`dl-query` when an aggregate's inner-goal relation was IDB. The
|
||||||
rewriter passes aggregate body lits through unchanged (no magic
|
rewriter passes aggregate body lits through unchanged (no magic
|
||||||
|
|||||||
Reference in New Issue
Block a user