Files
rose-ash/lib/datalog/db.sx
giles ce98d97728
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
datalog: anonymous-renamer avoids user _anon<N> collision
The renamer for anonymous `_` variables started at counter 0 and
produced `_anon1, _anon2, ...` unconditionally. A user writing the
same naming convention would see their variables shadowed:

  (dl-eval "p(a, b). p(c, d). q(_anon1) :- p(_anon1, _)."
           "?- q(X).")
  => ()    ; should be ({:X a} {:X c})

The `_` got renamed to `_anon1` too, collapsing the two positions
of `p` to a single var (forcing args to be equal — which neither
tuple satisfies).

Fix: scan each rule (and query goal) for the highest `_anon<N>`
already present and start the renamer past it. New helpers
`dl-max-anon-num` / `dl-max-anon-num-list` / `dl-try-parse-int`
walk the rule tree; `dl-make-anon-renamer` now takes a `start`
argument; `dl-rename-anon-rule` and the query-time renamer in
`dl-query` both compute the start from the input.

1 regression test; conformance 275/275.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-11 09:34:41 +00:00

556 lines
15 KiB
Plaintext

;; lib/datalog/db.sx — Datalog database (EDB + IDB + rules) + safety hook.
;;
;; A db is a mutable dict:
;; {:facts {<rel-name-string> -> (literal ...)}
;; :rules ({:head literal :body (literal ...)} ...)}
;;
;; Facts are stored as full literals `(rel arg ... arg)` so they unify
;; directly against rule body literals. Each relation's tuple list is
;; deduplicated on insert.
;;
;; Phase 3 introduced safety analysis for head variables; Phase 4 (in
;; lib/datalog/builtins.sx) swaps in the real `dl-rule-check-safety`,
;; which is order-aware and understands built-in predicates.
(define
dl-make-db
(fn ()
{:facts {}
:facts-keys {}
:facts-index {}
:edb-keys {}
:rules (list)
:strategy :semi-naive}))
;; Record (rel-key, tuple-key) as user-asserted EDB. dl-add-fact! calls
;; this when an explicit fact is added; the saturator (which uses
;; dl-add-derived!) does NOT, so derived tuples never appear here.
;; dl-retract! consults :edb-keys to know which tuples must survive
;; the wipe-and-resaturate round-trip.
(define
dl-mark-edb!
(fn
(db rel-key tk)
(let
((edb (get db :edb-keys)))
(do
(when
(not (has-key? edb rel-key))
(dict-set! edb rel-key {}))
(dict-set! (get edb rel-key) tk true)))))
(define
dl-edb-fact?
(fn
(db rel-key tk)
(let
((edb (get db :edb-keys)))
(and (has-key? edb rel-key)
(has-key? (get edb rel-key) tk)))))
;; Evaluation strategy. Default :semi-naive (the only strategy
;; currently implemented). :magic is reserved for goal-directed
;; magic-sets evaluation — calling it now logs a one-time "deferred"
;; note and falls back to semi-naive.
(define
dl-set-strategy!
(fn
(db strategy)
(do
(dict-set! db :strategy strategy)
db)))
(define
dl-get-strategy
(fn
(db)
(if (has-key? db :strategy) (get db :strategy) :semi-naive)))
(define
dl-rel-name
(fn
(lit)
(cond
((and (dict? lit) (has-key? lit :neg)) (dl-rel-name (get lit :neg)))
((and (list? lit) (> (len lit) 0) (symbol? (first lit)))
(symbol->string (first lit)))
(else nil))))
(define dl-builtin-rels (list "<" "<=" ">" ">=" "=" "!=" "is"))
(define
dl-member-string?
(fn
(s xs)
(cond
((= (len xs) 0) false)
((= (first xs) s) true)
(else (dl-member-string? s (rest xs))))))
(define
dl-builtin?
(fn
(lit)
(and
(list? lit)
(> (len lit) 0)
(let
((rel (dl-rel-name lit)))
(cond
((nil? rel) false)
(else (dl-member-string? rel dl-builtin-rels)))))))
(define
dl-positive-lit?
(fn
(lit)
(cond
((and (dict? lit) (has-key? lit :neg)) false)
((dl-builtin? lit) false)
((and (list? lit) (> (len lit) 0)) true)
(else false))))
(define
dl-tuple-equal?
(fn
(a b)
(cond
((and (list? a) (list? b))
(and (= (len a) (len b)) (dl-tuple-equal-list? a b 0)))
((and (number? a) (number? b)) (= a b))
(else (equal? a b)))))
(define
dl-tuple-equal-list?
(fn
(a b i)
(cond
((>= i (len a)) true)
((not (dl-tuple-equal? (nth a i) (nth b i))) false)
(else (dl-tuple-equal-list? a b (+ i 1))))))
(define
dl-tuple-member?
(fn
(lit lits)
(dl-tuple-member-aux? lit lits 0 (len lits))))
(define
dl-tuple-member-aux?
(fn
(lit lits i n)
(cond
((>= i n) false)
((dl-tuple-equal? lit (nth lits i)) true)
(else (dl-tuple-member-aux? lit lits (+ i 1) n)))))
(define
dl-ensure-rel!
(fn
(db rel-key)
(let
((facts (get db :facts))
(fk (get db :facts-keys))
(fi (get db :facts-index)))
(do
(when
(not (has-key? facts rel-key))
(dict-set! facts rel-key (list)))
(when
(not (has-key? fk rel-key))
(dict-set! fk rel-key {}))
(when
(not (has-key? fi rel-key))
(dict-set! fi rel-key {}))
(get facts rel-key)))))
;; First-arg index helpers. Tuples are keyed by their first-after-rel
;; arg's `(str ...)`; when that arg is a constant, dl-match-positive
;; uses the index instead of scanning the full relation.
(define
dl-arg-key
(fn
(v)
(str v)))
(define
dl-index-add!
(fn
(db rel-key lit)
(let
((idx (get db :facts-index))
(n (len lit)))
(when
(and (>= n 2) (has-key? idx rel-key))
(let
((rel-idx (get idx rel-key))
(k (dl-arg-key (nth lit 1))))
(do
(when
(not (has-key? rel-idx k))
(dict-set! rel-idx k (list)))
(append! (get rel-idx k) lit)))))))
(define
dl-index-lookup
(fn
(db rel-key arg-val)
(let
((idx (get db :facts-index)))
(cond
((not (has-key? idx rel-key)) (list))
(else
(let ((rel-idx (get idx rel-key))
(k (dl-arg-key arg-val)))
(if (has-key? rel-idx k) (get rel-idx k) (list))))))))
(define dl-tuple-key (fn (lit) (str lit)))
(define
dl-rel-tuples
(fn
(db rel-key)
(let
((facts (get db :facts)))
(if (has-key? facts rel-key) (get facts rel-key) (list)))))
;; Reserved relation names: built-in / aggregate / negation / arrow.
;; Rules and facts may not have these as their head's relation, since
;; the saturator treats them specially or they are not relation names
;; at all.
(define
dl-reserved-rel-names
(list "not" "count" "sum" "min" "max" "findall" "is"
"<" "<=" ">" ">=" "=" "!=" "+" "-" "*" "/" ":-" "?-"))
(define
dl-reserved-rel?
(fn
(name) (dl-member-string? name dl-reserved-rel-names)))
;; Internal: append a derived tuple to :facts without the public
;; validation pass and without marking :edb-keys. Used by the saturator
;; (eval.sx) and magic-sets (magic.sx). Returns true if the tuple was
;; new, false if already present.
(define
dl-add-derived!
(fn
(db lit)
(let
((rel-key (dl-rel-name lit)))
(let
((tuples (dl-ensure-rel! db rel-key))
(key-dict (get (get db :facts-keys) rel-key))
(tk (dl-tuple-key lit)))
(cond
((has-key? key-dict tk) false)
(else
(do
(dict-set! key-dict tk true)
(append! tuples lit)
(dl-index-add! db rel-key lit)
true)))))))
;; A simple term — number, string, or symbol — i.e. anything legal
;; as an EDB fact arg. Compound (list) args belong only in body
;; literals where they encode arithmetic / aggregate sub-goals.
(define
dl-simple-term?
(fn
(term)
(or (number? term) (string? term) (symbol? term))))
(define
dl-args-simple?
(fn
(lit i n)
(cond
((>= i n) true)
((not (dl-simple-term? (nth lit i))) false)
(else (dl-args-simple? lit (+ i 1) n)))))
(define
dl-add-fact!
(fn
(db lit)
(cond
((not (and (list? lit) (> (len lit) 0)))
(error (str "dl-add-fact!: expected literal list, got " lit)))
((dl-reserved-rel? (dl-rel-name lit))
(error (str "dl-add-fact!: '" (dl-rel-name lit)
"' is a reserved name (built-in / aggregate / negation)")))
((not (dl-args-simple? lit 1 (len lit)))
(error (str "dl-add-fact!: fact args must be numbers, strings, "
"or symbols — compound args (e.g. arithmetic "
"expressions) are body-only and aren't evaluated "
"in fact position. got " lit)))
((not (dl-ground? lit (dl-empty-subst)))
(error (str "dl-add-fact!: expected ground literal, got " lit)))
(else
(let
((rel-key (dl-rel-name lit)) (tk (dl-tuple-key lit)))
(do
;; Always mark EDB origin — even if the tuple key was already
;; present (e.g. previously derived), so an explicit assert
;; promotes it to EDB and protects it from the IDB wipe.
(dl-mark-edb! db rel-key tk)
(dl-add-derived! db lit)))))))
;; The full safety check lives in builtins.sx (it has to know which
;; predicates are built-ins). dl-add-rule! calls it via forward
;; reference; load builtins.sx alongside db.sx in any setup that
;; adds rules. The fallback below is used if builtins.sx isn't loaded.
(define
dl-rule-check-safety
(fn
(rule)
(let
((head-vars (dl-vars-of (get rule :head))) (body-vars (list)))
(do
(for-each
(fn
(lit)
(when
(and
(list? lit)
(> (len lit) 0)
(not (and (dict? lit) (has-key? lit :neg))))
(for-each
(fn
(v)
(when
(not (dl-member-string? v body-vars))
(append! body-vars v)))
(dl-vars-of lit))))
(get rule :body))
(let
((missing (list)))
(do
(for-each
(fn
(v)
(when
(and
(not (dl-member-string? v body-vars))
(not (= v "_")))
(append! missing v)))
head-vars)
(cond
((> (len missing) 0)
(str
"head variable(s) "
missing
" do not appear in any body literal"))
(else nil))))))))
(define
dl-rename-anon-term
(fn
(term next-name)
(cond
((and (symbol? term) (= (symbol->string term) "_"))
(next-name))
((list? term)
(map (fn (x) (dl-rename-anon-term x next-name)) term))
(else term))))
(define
dl-rename-anon-lit
(fn
(lit next-name)
(cond
((and (dict? lit) (has-key? lit :neg))
{:neg (dl-rename-anon-term (get lit :neg) next-name)})
((list? lit) (dl-rename-anon-term lit next-name))
(else lit))))
(define
dl-make-anon-renamer
(fn
(start)
(let ((counter start))
(fn () (do (set! counter (+ counter 1))
(string->symbol (str "_anon" counter)))))))
;; Scan a rule for variables already named `_anon<N>` (which would
;; otherwise collide with the renamer's output). Returns the max N
;; seen, or 0 if none. The renamer then starts at that max + 1, so
;; freshly-introduced anonymous names can't shadow a user-written
;; `_anon<N>` symbol.
(define
dl-max-anon-num
(fn
(term acc)
(cond
((symbol? term)
(let ((s (symbol->string term)))
(cond
((and (>= (len s) 6) (= (slice s 0 5) "_anon"))
(let ((n (dl-try-parse-int (slice s 5 (len s)))))
(cond
((nil? n) acc)
((> n acc) n)
(else acc))))
(else acc))))
((dict? term)
(cond
((has-key? term :neg)
(dl-max-anon-num (get term :neg) acc))
(else acc)))
((list? term) (dl-max-anon-num-list term acc 0))
(else acc))))
(define
dl-max-anon-num-list
(fn
(xs acc i)
(cond
((>= i (len xs)) acc)
(else
(dl-max-anon-num-list xs (dl-max-anon-num (nth xs i) acc) (+ i 1))))))
;; Cheap "is this string a decimal int" check. Returns the number or
;; nil. Avoids relying on host parse-number, which on non-int strings
;; might raise rather than return nil.
(define
dl-try-parse-int
(fn
(s)
(cond
((= (len s) 0) nil)
((not (dl-all-digits? s 0 (len s))) nil)
(else (parse-number s)))))
(define
dl-all-digits?
(fn
(s i n)
(cond
((>= i n) true)
((let ((c (slice s i (+ i 1))))
(not (and (>= c "0") (<= c "9"))))
false)
(else (dl-all-digits? s (+ i 1) n)))))
(define
dl-rename-anon-rule
(fn
(rule)
(let
((start (dl-max-anon-num (get rule :head)
(dl-max-anon-num-list (get rule :body) 0 0))))
(let ((next-name (dl-make-anon-renamer start)))
{:head (dl-rename-anon-term (get rule :head) next-name)
:body (map (fn (lit) (dl-rename-anon-lit lit next-name))
(get rule :body))}))))
(define
dl-add-rule!
(fn
(db rule)
(cond
((not (dict? rule))
(error (str "dl-add-rule!: expected rule dict, got " rule)))
((not (has-key? rule :head))
(error (str "dl-add-rule!: rule missing :head, got " rule)))
((not (and (list? (get rule :head))
(> (len (get rule :head)) 0)
(symbol? (first (get rule :head)))))
(error (str "dl-add-rule!: head must be a non-empty list "
"starting with a relation-name symbol, got "
(get rule :head))))
((not (dl-args-simple? (get rule :head) 1 (len (get rule :head))))
(error (str "dl-add-rule!: rule head args must be variables or "
"constants — compound terms (e.g. `(*(X, 2))`) are "
"not legal in head position; introduce an `is`-bound "
"intermediate in the body. got " (get rule :head))))
((not (list? (if (has-key? rule :body) (get rule :body) (list))))
(error (str "dl-add-rule!: body must be a list of literals, got "
(get rule :body))))
((dl-reserved-rel? (dl-rel-name (get rule :head)))
(error (str "dl-add-rule!: '" (dl-rel-name (get rule :head))
"' is a reserved name (built-in / aggregate / negation)")))
(else
(let ((rule (dl-rename-anon-rule rule)))
(let
((err (dl-rule-check-safety rule)))
(cond
((not (nil? err)) (error (str "dl-add-rule!: " err)))
(else
(let
((rules (get db :rules)))
(do (append! rules rule) true))))))))))
(define
dl-add-clause!
(fn
(db clause)
(cond
((has-key? clause :query) false)
((and (has-key? clause :body) (= (len (get clause :body)) 0))
(dl-add-fact! db (get clause :head)))
(else (dl-add-rule! db clause)))))
(define
dl-load-program!
(fn
(db source)
(let
((clauses (dl-parse source)))
(do (for-each (fn (c) (dl-add-clause! db c)) clauses) db))))
(define
dl-program
(fn (source) (let ((db (dl-make-db))) (dl-load-program! db source))))
(define dl-rules (fn (db) (get db :rules)))
(define
dl-fact-count
(fn
(db)
(let
((facts (get db :facts)) (total 0))
(do
(for-each
(fn (k) (set! total (+ total (len (get facts k)))))
(keys facts))
total))))
;; Returns {<rel-name>: tuple-count} for debugging. Includes
;; relations with any tuples plus all rule-head relations (so empty
;; IDB shows as 0). Skips empty EDB-only entries that are placeholders
;; from internal `dl-ensure-rel!` calls.
(define
dl-summary
(fn
(db)
(let
((facts (get db :facts))
(out {})
(rule-heads (list)))
(do
(for-each
(fn
(rule)
(let ((h (dl-rel-name (get rule :head))))
(when
(and (not (nil? h)) (not (dl-member-string? h rule-heads)))
(append! rule-heads h))))
(dl-rules db))
(for-each
(fn
(k)
(let ((c (len (get facts k))))
(when
(or (> c 0) (dl-member-string? k rule-heads))
(dict-set! out k c))))
(keys facts))
;; Add rule heads that have no facts (yet).
(for-each
(fn
(k)
(when (not (has-key? out k)) (dict-set! out k 0)))
rule-heads)
out))))