datalog: db + naive eval + safety analysis (Phase 3, 87/87)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 51s

db.sx: facts indexed by relation name, rules list, dl-add-fact!
(rejects non-ground), dl-add-rule! (rejects unsafe — head vars
not in positive body). eval.sx: dl-saturate! fixpoint, dl-query
with deduped projected results. Negation and arithmetic raise
clear errors (Phase 4/7 to follow). 15 eval tests: transitive
closure, sibling, same-gen, grandparent, cyclic reach, safety.
This commit is contained in:
2026-05-07 23:41:27 +00:00
parent 8046df7ce5
commit 9bc70fd2a9
7 changed files with 663 additions and 14 deletions

View File

@@ -7,10 +7,13 @@ PRELOADS=(
lib/datalog/tokenizer.sx
lib/datalog/parser.sx
lib/datalog/unify.sx
lib/datalog/db.sx
lib/datalog/eval.sx
)
SUITES=(
"tokenize:lib/datalog/tests/tokenize.sx:(dl-tokenize-tests-run!)"
"parse:lib/datalog/tests/parse.sx:(dl-parse-tests-run!)"
"unify:lib/datalog/tests/unify.sx:(dl-unify-tests-run!)"
"eval:lib/datalog/tests/eval.sx:(dl-eval-tests-run!)"
)

263
lib/datalog/db.sx Normal file
View File

@@ -0,0 +1,263 @@
;; lib/datalog/db.sx — Datalog database (EDB + IDB + rules) + safety.
;;
;; A db is a mutable dict:
;; {:facts {<rel-name-string> -> (literal ...)}
;; :rules ({:head literal :body (literal ...)} ...)}
;;
;; Facts are stored as full literals `(rel arg ... arg)` so they unify
;; directly against rule body literals. Each relation's tuple list is
;; deduplicated on insert.
;;
;; Phase 3 makes no distinction between EDB (asserted) and IDB (derived);
;; a future phase may track provenance for retraction.
(define dl-make-db (fn () {:facts {} :rules (list)}))
;; The relation name (as string) of a positive literal, or of the
;; underlying literal of a negative one. nil for malformed input.
(define
dl-rel-name
(fn
(lit)
(cond
((and (dict? lit) (has-key? lit :neg)) (dl-rel-name (get lit :neg)))
((and (list? lit) (> (len lit) 0) (symbol? (first lit)))
(symbol->string (first lit)))
(else nil))))
(define dl-builtin-rels (list "<" "<=" ">" ">=" "=" "!=" "is"))
(define
dl-member-string?
(fn
(s xs)
(cond
((= (len xs) 0) false)
((= (first xs) s) true)
(else (dl-member-string? s (rest xs))))))
(define
dl-builtin?
(fn
(lit)
(and
(list? lit)
(> (len lit) 0)
(let
((rel (dl-rel-name lit)))
(cond
((nil? rel) false)
(else (dl-member-string? rel dl-builtin-rels)))))))
(define
dl-positive-lit?
(fn
(lit)
(cond
((and (dict? lit) (has-key? lit :neg)) false)
((dl-builtin? lit) false)
((and (list? lit) (> (len lit) 0)) true)
(else false))))
;; Membership using dl-deep tuple equality (handles var/constant symbols
;; and numbers consistently).
(define
dl-tuple-equal?
(fn
(a b)
(cond
((and (list? a) (list? b))
(and (= (len a) (len b)) (dl-tuple-equal-list? a b 0)))
((and (number? a) (number? b)) (= a b))
(else (equal? a b)))))
(define
dl-tuple-equal-list?
(fn
(a b i)
(cond
((>= i (len a)) true)
((not (dl-tuple-equal? (nth a i) (nth b i))) false)
(else (dl-tuple-equal-list? a b (+ i 1))))))
(define
dl-tuple-member?
(fn
(lit lits)
(cond
((= (len lits) 0) false)
((dl-tuple-equal? lit (first lits)) true)
(else (dl-tuple-member? lit (rest lits))))))
;; Ensure :facts has a list for the given relation key, then return it.
(define
dl-ensure-rel!
(fn
(db rel-key)
(let
((facts (get db :facts)))
(do
(when
(not (has-key? facts rel-key))
(dict-set! facts rel-key (list)))
(get facts rel-key)))))
;; All tuples currently known for a relation (EDB IDB). Returns empty
;; list when relation hasn't been seen.
(define
dl-rel-tuples
(fn
(db rel-key)
(let
((facts (get db :facts)))
(if (has-key? facts rel-key) (get facts rel-key) (list)))))
;; Add a ground literal. Returns true iff the literal was new.
(define
dl-add-fact!
(fn
(db lit)
(cond
((not (and (list? lit) (> (len lit) 0)))
(error (str "dl-add-fact!: expected literal list, got " lit)))
((not (dl-ground? lit (dl-empty-subst)))
(error (str "dl-add-fact!: expected ground literal, got " lit)))
(else
(let
((rel-key (dl-rel-name lit)))
(let
((tuples (dl-ensure-rel! db rel-key)))
(cond
((dl-tuple-member? lit tuples) false)
(else (do (append! tuples lit) true)))))))))
;; Collect variables appearing in the positive body literals of `body`.
(define
dl-positive-body-vars
(fn
(body)
(let
((vars (list)))
(do
(for-each
(fn
(lit)
(when
(dl-positive-lit? lit)
(for-each
(fn
(v)
(when (not (dl-member-string? v vars)) (append! vars v)))
(dl-vars-of lit))))
body)
vars))))
;; Collect variables in any literal (positive, negated, or built-in).
(define
dl-all-body-vars
(fn
(body)
(let
((vars (list)))
(do
(for-each
(fn
(lit)
(let
((target (if (and (dict? lit) (has-key? lit :neg)) (get lit :neg) lit)))
(for-each
(fn
(v)
(when (not (dl-member-string? v vars)) (append! vars v)))
(dl-vars-of target))))
body)
vars))))
;; Return the list of head variables NOT covered by some positive body
;; literal. A safe rule has an empty list. The check ignores '_' since
;; that is treated as a fresh anonymous variable per occurrence.
(define
dl-rule-unsafe-head-vars
(fn
(rule)
(let
((head (get rule :head))
(body (get rule :body))
(head-vars (dl-vars-of head))
(body-vars (dl-positive-body-vars body))
(out (list)))
(do
(for-each
(fn
(v)
(when
(and (not (dl-member-string? v body-vars)) (not (= v "_")))
(append! out v)))
head-vars)
out))))
;; Add a rule. Rejects unsafe rules with a pointer at the offending var.
(define
dl-add-rule!
(fn
(db rule)
(cond
((not (dict? rule))
(error (str "dl-add-rule!: expected rule dict, got " rule)))
((not (has-key? rule :head))
(error (str "dl-add-rule!: rule missing :head, got " rule)))
(else
(let
((unsafe (dl-rule-unsafe-head-vars rule)))
(cond
((> (len unsafe) 0)
(error
(str
"dl-add-rule!: unsafe rule — head variable(s) "
unsafe
" do not appear in any positive body literal of "
rule)))
(else
(let
((rules (get db :rules)))
(do (append! rules rule) true)))))))))
;; Load a parsed clause (fact or rule) into the db. Queries are ignored.
(define
dl-add-clause!
(fn
(db clause)
(cond
((has-key? clause :query) false)
((and (has-key? clause :body) (= (len (get clause :body)) 0))
(dl-add-fact! db (get clause :head)))
(else (dl-add-rule! db clause)))))
;; Parse source text and load every clause into the db. Returns the db.
(define
dl-load-program!
(fn
(db source)
(let
((clauses (dl-parse source)))
(do (for-each (fn (c) (dl-add-clause! db c)) clauses) db))))
;; Convenience: build a db from source in one step.
(define
dl-program
(fn (source) (let ((db (dl-make-db))) (dl-load-program! db source))))
(define dl-rules (fn (db) (get db :rules)))
;; Total number of stored ground tuples across all relations.
(define
dl-fact-count
(fn
(db)
(let
((facts (get db :facts)) (total 0))
(do
(for-each
(fn (k) (set! total (+ total (len (get facts k)))))
(keys facts))
total))))

153
lib/datalog/eval.sx Normal file
View File

@@ -0,0 +1,153 @@
;; lib/datalog/eval.sx — naive bottom-up fixpoint evaluator.
;;
;; (dl-saturate! db) iterates rules until no new tuples are derived.
;; The Herbrand base is finite (no function symbols) so termination is
;; guaranteed by the language.
;;
;; Phase 3 handles only positive EDB literals. Negation (Phase 7) and
;; arithmetic built-ins (Phase 4) raise a clear error if encountered.
;; Match a positive literal against every tuple in the relation; return
;; a list of extended substitutions, one per successful unification.
(define
dl-match-positive
(fn
(lit db subst)
(let
((rel (dl-rel-name lit)) (results (list)))
(cond
((nil? rel) (error (str "dl-match-positive: bad literal " lit)))
(else
(let
((tuples (dl-rel-tuples db rel)))
(do
(for-each
(fn
(tuple)
(let
((s (dl-unify lit tuple subst)))
(when (not (nil? s)) (append! results s))))
tuples)
results)))))))
(define
dl-match-lit
(fn
(lit db subst)
(cond
((and (dict? lit) (has-key? lit :neg))
(error "datalog: negation not yet supported (Phase 7)"))
((dl-builtin? lit)
(error "datalog: built-in predicate not yet supported (Phase 4)"))
((and (list? lit) (> (len lit) 0))
(dl-match-positive lit db subst))
(else (error (str "datalog: unknown body-literal shape: " lit))))))
;; Recursively find all substitutions satisfying a list of body literals
;; under `subst`. Returns a list of substitutions.
(define
dl-find-bindings
(fn
(lits db subst)
(cond
((nil? subst) (list))
((= (len lits) 0) (list subst))
(else
(let
((options (dl-match-lit (first lits) db subst))
(results (list)))
(do
(for-each
(fn
(s)
(for-each
(fn (s2) (append! results s2))
(dl-find-bindings (rest lits) db s)))
options)
results))))))
;; Apply a rule once. Returns true if any new tuple was derived.
(define
dl-apply-rule!
(fn
(db rule)
(let
((head (get rule :head)) (body (get rule :body)) (new? false))
(do
(for-each
(fn
(s)
(let
((derived (dl-apply-subst head s)))
(when (dl-add-fact! db derived) (set! new? true))))
(dl-find-bindings body db (dl-empty-subst)))
new?))))
;; Iterate rules to fixpoint.
(define
dl-saturate!
(fn
(db)
(let
((changed true))
(do
(define
dl-sat-loop
(fn
()
(when
changed
(do
(set! changed false)
(for-each
(fn (r) (when (dl-apply-rule! db r) (set! changed true)))
(dl-rules db))
(dl-sat-loop)))))
(dl-sat-loop)
db))))
;; Run a goal against the saturated db. Returns a list of substitutions
;; (each restricted to the variables that appeared in `goal`). Duplicate
;; projections are deduplicated so users get the set of answers.
(define
dl-query
(fn
(db goal)
(do
(dl-saturate! db)
(let
((substs (dl-find-bindings (list goal) db (dl-empty-subst)))
(vars (dl-vars-of goal))
(results (list)))
(do
(for-each
(fn
(s)
(let
((proj (dl-project-subst s vars)))
(when
(not (dl-tuple-member? proj results))
(append! results proj))))
substs)
results)))))
;; Project a subst down to a given set of variable names.
(define
dl-project-subst
(fn
(subst names)
(let
((out {}))
(do
(for-each
(fn
(n)
(let
((sym (string->symbol n)))
(let
((v (dl-walk sym subst)))
(dict-set! out n (dl-apply-subst v subst)))))
names)
out))))
(define dl-relation (fn (db name) (dl-rel-tuples db name)))

View File

@@ -1,12 +1,13 @@
{
"lang": "datalog",
"total_passed": 72,
"total_passed": 87,
"total_failed": 0,
"total": 72,
"total": 87,
"suites": [
{"name":"tokenize","passed":26,"failed":0,"total":26},
{"name":"parse","passed":18,"failed":0,"total":18},
{"name":"unify","passed":28,"failed":0,"total":28}
{"name":"unify","passed":28,"failed":0,"total":28},
{"name":"eval","passed":15,"failed":0,"total":15}
],
"generated": "2026-05-07T23:34:02+00:00"
"generated": "2026-05-07T23:40:51+00:00"
}

View File

@@ -1,9 +1,10 @@
# datalog scoreboard
**72 / 72 passing** (0 failure(s)).
**87 / 87 passing** (0 failure(s)).
| Suite | Passed | Total | Status |
|-------|--------|-------|--------|
| tokenize | 26 | 26 | ok |
| parse | 18 | 18 | ok |
| unify | 28 | 28 | ok |
| eval | 15 | 15 | ok |

206
lib/datalog/tests/eval.sx Normal file
View File

@@ -0,0 +1,206 @@
;; lib/datalog/tests/eval.sx — naive evaluation + safety analysis tests.
(define dl-et-pass 0)
(define dl-et-fail 0)
(define dl-et-failures (list))
;; Same deep-equal helper used in other suites.
(define
dl-et-deep=?
(fn
(a b)
(cond
((and (list? a) (list? b))
(and (= (len a) (len b)) (dl-et-deq-l? a b 0)))
((and (dict? a) (dict? b))
(let
((ka (keys a)) (kb (keys b)))
(and (= (len ka) (len kb)) (dl-et-deq-d? a b ka 0))))
((and (number? a) (number? b)) (= a b))
(else (equal? a b)))))
(define
dl-et-deq-l?
(fn
(a b i)
(cond
((>= i (len a)) true)
((not (dl-et-deep=? (nth a i) (nth b i))) false)
(else (dl-et-deq-l? a b (+ i 1))))))
(define
dl-et-deq-d?
(fn
(a b ka i)
(cond
((>= i (len ka)) true)
((let ((k (nth ka i))) (not (dl-et-deep=? (get a k) (get b k))))
false)
(else (dl-et-deq-d? a b ka (+ i 1))))))
;; Set-equality on lists (order-independent, uses dl-et-deep=?).
(define
dl-et-set=?
(fn
(a b)
(and (= (len a) (len b)) (dl-et-subset? a b) (dl-et-subset? b a))))
(define
dl-et-subset?
(fn
(xs ys)
(cond
((= (len xs) 0) true)
((not (dl-et-contains? ys (first xs))) false)
(else (dl-et-subset? (rest xs) ys)))))
(define
dl-et-contains?
(fn
(xs target)
(cond
((= (len xs) 0) false)
((dl-et-deep=? (first xs) target) true)
(else (dl-et-contains? (rest xs) target)))))
(define
dl-et-test!
(fn
(name got expected)
(if
(dl-et-deep=? got expected)
(set! dl-et-pass (+ dl-et-pass 1))
(do
(set! dl-et-fail (+ dl-et-fail 1))
(append!
dl-et-failures
(str name "\n expected: " expected "\n got: " got))))))
(define
dl-et-test-set!
(fn
(name got expected)
(if
(dl-et-set=? got expected)
(set! dl-et-pass (+ dl-et-pass 1))
(do
(set! dl-et-fail (+ dl-et-fail 1))
(append!
dl-et-failures
(str
name
"\n expected (set): "
expected
"\n got: "
got))))))
(define
dl-et-throws?
(fn
(thunk)
(let
((threw false))
(do (guard (e (#t (set! threw true))) (thunk)) threw))))
(define
dl-et-run-all!
(fn
()
(do
(dl-et-test-set!
"fact lookup any"
(dl-query
(dl-program "parent(tom, bob). parent(bob, ann).")
(list (quote parent) (quote X) (quote Y)))
(list {:X (quote tom) :Y (quote bob)} {:X (quote bob) :Y (quote ann)}))
(dl-et-test-set!
"fact lookup constant arg"
(dl-query
(dl-program "parent(tom, bob). parent(tom, liz). parent(bob, ann).")
(list (quote parent) (quote tom) (quote Y)))
(list {:Y (quote bob)} {:Y (quote liz)}))
(dl-et-test-set!
"no match"
(dl-query
(dl-program "parent(tom, bob).")
(list (quote parent) (quote nobody) (quote X)))
(list))
(dl-et-test-set!
"ancestor closure"
(dl-query
(dl-program
"parent(tom, bob). parent(bob, ann). parent(ann, pat).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).")
(list (quote ancestor) (quote tom) (quote X)))
(list {:X (quote bob)} {:X (quote ann)} {:X (quote pat)}))
(dl-et-test-set!
"sibling"
(dl-query
(dl-program
"parent(tom, bob). parent(tom, liz). parent(jane, bob). parent(jane, liz).\n sibling(X, Y) :- parent(P, X), parent(P, Y).")
(list (quote sibling) (quote bob) (quote Y)))
(list {:Y (quote bob)} {:Y (quote liz)}))
(dl-et-test-set!
"same-generation"
(dl-query
(dl-program
"parent(tom, bob). parent(tom, liz). parent(bob, ann). parent(liz, joe).\n person(tom). person(bob). person(liz). person(ann). person(joe).\n sg(X, X) :- person(X).\n sg(X, Y) :- parent(P1, X), sg(P1, P2), parent(P2, Y).")
(list (quote sg) (quote ann) (quote X)))
(list {:X (quote ann)} {:X (quote joe)}))
(dl-et-test!
"ancestor count"
(let
((db (dl-program "parent(a, b). parent(b, c). parent(c, d).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).")))
(do (dl-saturate! db) (len (dl-relation db "ancestor"))))
6)
(dl-et-test-set!
"grandparent"
(dl-query
(dl-program
"parent(a, b). parent(b, c). parent(c, d).\n grandparent(X, Z) :- parent(X, Y), parent(Y, Z).")
(list (quote grandparent) (quote X) (quote Y)))
(list {:X (quote a) :Y (quote c)} {:X (quote b) :Y (quote d)}))
(dl-et-test!
"no recursion infinite loop"
(let
((db (dl-program "edge(1, 2). edge(2, 3). edge(3, 1).\n reach(X, Y) :- edge(X, Y).\n reach(X, Z) :- edge(X, Y), reach(Y, Z).")))
(do (dl-saturate! db) (len (dl-relation db "reach"))))
9)
(dl-et-test!
"unsafe head var"
(dl-et-throws? (fn () (dl-program "p(X, Y) :- q(X).")))
true)
(dl-et-test!
"unsafe — empty body"
(dl-et-throws? (fn () (dl-program "p(X) :- .")))
true)
(dl-et-test!
"underscore var ok"
(dl-et-throws? (fn () (dl-program "p(X, _) :- q(X).")))
false)
(dl-et-test!
"var only in head — unsafe"
(dl-et-throws? (fn () (dl-program "p(X, Y) :- q(Z).")))
true)
(dl-et-test!
"head var bound by body"
(dl-et-throws? (fn () (dl-program "p(X) :- q(X).")))
false)
(dl-et-test!
"head subset of body"
(dl-et-throws?
(fn
()
(dl-program
"edge(a,b). edge(b,c). reach(X, Z) :- edge(X, Y), edge(Y, Z).")))
false))))
(define
dl-eval-tests-run!
(fn
()
(do
(set! dl-et-pass 0)
(set! dl-et-fail 0)
(set! dl-et-failures (list))
(dl-et-run-all!)
{:failures dl-et-failures :total (+ dl-et-pass dl-et-fail) :passed dl-et-pass :failed dl-et-fail})))

View File

@@ -82,15 +82,26 @@ Key differences from Prolog:
Lists/tuples unify element-wise (used for arithmetic compounds too).
- [x] Tests in `lib/datalog/tests/unify.sx` (28). 72 / 72 conformance.
### Phase 3 — extensional DB + naive evaluation
- [ ] EDB: `{:relation-name → set-of-ground-tuples}` using SX sets (Phase 18 of primitives)
- [ ] `dl-add-fact!` `db` `relation` `args` → add ground tuple
- [ ] `dl-add-rule!` `db` `head` `body` → add rule clause
- [ ] Naive evaluation: iterate rules until fixpoint
For each rule, for each combination of body tuples that unify, derive head tuple.
Repeat until no new tuples added.
- [ ] `dl-query` `db` `goal` → list of substitutions satisfying goal against derived DB
- [ ] Tests: transitive closure (ancestor), sibling, same-generation — classic Datalog programs
### Phase 3 — extensional DB + naive evaluation + safety analysis
- [x] EDB+IDB combined: `{:facts {<rel-name-string> -> (literal ...)}}`
relations indexed by name; tuples stored as full literals so they
unify directly. Dedup on insert via `dl-tuple-equal?`.
- [x] `dl-add-fact! db lit` (rejects non-ground) and `dl-add-rule! db rule`
(rejects unsafe). `dl-program source` parses + loads in one step.
- [x] Naive evaluation `dl-saturate! db`: iterate rules until no new tuples.
`dl-find-bindings` recursively joins body literals; `dl-match-positive`
unifies a literal against every tuple in the relation.
- [x] `dl-query db goal` → list of substitutions over `goal`'s vars,
deduplicated. `dl-relation db name` for derived tuples.
- [x] Safety analysis at `dl-add-rule!` time: every head variable except
`_` must appear in some positive body literal. Built-ins and negated
literals do not satisfy safety. Helpers `dl-positive-body-vars`,
`dl-rule-unsafe-head-vars` exposed for later phases.
- [x] Negation and arithmetic built-ins error cleanly at saturate time
(Phase 4 / Phase 7 will swap in real semantics).
- [x] Tests in `lib/datalog/tests/eval.sx` (15): transitive closure,
sibling, same-generation, grandparent, cyclic graph reach, six
safety cases. 87 / 87 conformance.
### Phase 4 — semi-naive evaluation (performance)
- [ ] Delta sets: track newly derived tuples per iteration
@@ -149,6 +160,17 @@ _(none yet)_
_Newest first._
- 2026-05-07 — Phase 3 done. `lib/datalog/db.sx` (~250 LOC) holds facts
indexed by relation name plus the rules list, with `dl-add-fact!` /
`dl-add-rule!` (rejects non-ground facts and unsafe rules);
`lib/datalog/eval.sx` (~150 LOC) implements the naive bottom-up
fixpoint via `dl-find-bindings`/`dl-match-positive`/`dl-saturate!`
and `dl-query` (deduped projected substitutions). Safety analysis
rejects unsafe head vars at load time. Negation and arithmetic
built-ins raise clean errors (lifted in later phases). 15 eval
tests cover transitive closure, sibling, same-generation, cyclic
graph reach, and six safety violations. Conformance 87 / 87.
- 2026-05-07 — Phase 2 done. `lib/datalog/unify.sx` (~140 LOC):
`dl-var?` (case + underscore), `dl-walk`, `dl-bind`, `dl-unify` (returns
extended dict subst or `nil`), `dl-apply-subst`, `dl-ground?`, `dl-vars-of`.