diff --git a/lib/datalog/conformance.conf b/lib/datalog/conformance.conf index 50ca8310..58a483bc 100644 --- a/lib/datalog/conformance.conf +++ b/lib/datalog/conformance.conf @@ -7,10 +7,13 @@ PRELOADS=( lib/datalog/tokenizer.sx lib/datalog/parser.sx lib/datalog/unify.sx + lib/datalog/db.sx + lib/datalog/eval.sx ) SUITES=( "tokenize:lib/datalog/tests/tokenize.sx:(dl-tokenize-tests-run!)" "parse:lib/datalog/tests/parse.sx:(dl-parse-tests-run!)" "unify:lib/datalog/tests/unify.sx:(dl-unify-tests-run!)" + "eval:lib/datalog/tests/eval.sx:(dl-eval-tests-run!)" ) diff --git a/lib/datalog/db.sx b/lib/datalog/db.sx new file mode 100644 index 00000000..af189147 --- /dev/null +++ b/lib/datalog/db.sx @@ -0,0 +1,263 @@ +;; lib/datalog/db.sx — Datalog database (EDB + IDB + rules) + safety. +;; +;; A db is a mutable dict: +;; {:facts { -> (literal ...)} +;; :rules ({:head literal :body (literal ...)} ...)} +;; +;; Facts are stored as full literals `(rel arg ... arg)` so they unify +;; directly against rule body literals. Each relation's tuple list is +;; deduplicated on insert. +;; +;; Phase 3 makes no distinction between EDB (asserted) and IDB (derived); +;; a future phase may track provenance for retraction. + +(define dl-make-db (fn () {:facts {} :rules (list)})) + +;; The relation name (as string) of a positive literal, or of the +;; underlying literal of a negative one. nil for malformed input. +(define + dl-rel-name + (fn + (lit) + (cond + ((and (dict? lit) (has-key? lit :neg)) (dl-rel-name (get lit :neg))) + ((and (list? lit) (> (len lit) 0) (symbol? (first lit))) + (symbol->string (first lit))) + (else nil)))) + +(define dl-builtin-rels (list "<" "<=" ">" ">=" "=" "!=" "is")) + +(define + dl-member-string? + (fn + (s xs) + (cond + ((= (len xs) 0) false) + ((= (first xs) s) true) + (else (dl-member-string? s (rest xs)))))) + +(define + dl-builtin? + (fn + (lit) + (and + (list? lit) + (> (len lit) 0) + (let + ((rel (dl-rel-name lit))) + (cond + ((nil? rel) false) + (else (dl-member-string? rel dl-builtin-rels))))))) + +(define + dl-positive-lit? + (fn + (lit) + (cond + ((and (dict? lit) (has-key? lit :neg)) false) + ((dl-builtin? lit) false) + ((and (list? lit) (> (len lit) 0)) true) + (else false)))) + +;; Membership using dl-deep tuple equality (handles var/constant symbols +;; and numbers consistently). +(define + dl-tuple-equal? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-tuple-equal-list? a b 0))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-tuple-equal-list? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-tuple-equal? (nth a i) (nth b i))) false) + (else (dl-tuple-equal-list? a b (+ i 1)))))) + +(define + dl-tuple-member? + (fn + (lit lits) + (cond + ((= (len lits) 0) false) + ((dl-tuple-equal? lit (first lits)) true) + (else (dl-tuple-member? lit (rest lits)))))) + +;; Ensure :facts has a list for the given relation key, then return it. +(define + dl-ensure-rel! + (fn + (db rel-key) + (let + ((facts (get db :facts))) + (do + (when + (not (has-key? facts rel-key)) + (dict-set! facts rel-key (list))) + (get facts rel-key))))) + +;; All tuples currently known for a relation (EDB ∪ IDB). Returns empty +;; list when relation hasn't been seen. +(define + dl-rel-tuples + (fn + (db rel-key) + (let + ((facts (get db :facts))) + (if (has-key? facts rel-key) (get facts rel-key) (list))))) + +;; Add a ground literal. Returns true iff the literal was new. +(define + dl-add-fact! + (fn + (db lit) + (cond + ((not (and (list? lit) (> (len lit) 0))) + (error (str "dl-add-fact!: expected literal list, got " lit))) + ((not (dl-ground? lit (dl-empty-subst))) + (error (str "dl-add-fact!: expected ground literal, got " lit))) + (else + (let + ((rel-key (dl-rel-name lit))) + (let + ((tuples (dl-ensure-rel! db rel-key))) + (cond + ((dl-tuple-member? lit tuples) false) + (else (do (append! tuples lit) true))))))))) + +;; Collect variables appearing in the positive body literals of `body`. +(define + dl-positive-body-vars + (fn + (body) + (let + ((vars (list))) + (do + (for-each + (fn + (lit) + (when + (dl-positive-lit? lit) + (for-each + (fn + (v) + (when (not (dl-member-string? v vars)) (append! vars v))) + (dl-vars-of lit)))) + body) + vars)))) + +;; Collect variables in any literal (positive, negated, or built-in). +(define + dl-all-body-vars + (fn + (body) + (let + ((vars (list))) + (do + (for-each + (fn + (lit) + (let + ((target (if (and (dict? lit) (has-key? lit :neg)) (get lit :neg) lit))) + (for-each + (fn + (v) + (when (not (dl-member-string? v vars)) (append! vars v))) + (dl-vars-of target)))) + body) + vars)))) + +;; Return the list of head variables NOT covered by some positive body +;; literal. A safe rule has an empty list. The check ignores '_' since +;; that is treated as a fresh anonymous variable per occurrence. +(define + dl-rule-unsafe-head-vars + (fn + (rule) + (let + ((head (get rule :head)) + (body (get rule :body)) + (head-vars (dl-vars-of head)) + (body-vars (dl-positive-body-vars body)) + (out (list))) + (do + (for-each + (fn + (v) + (when + (and (not (dl-member-string? v body-vars)) (not (= v "_"))) + (append! out v))) + head-vars) + out)))) + +;; Add a rule. Rejects unsafe rules with a pointer at the offending var. +(define + dl-add-rule! + (fn + (db rule) + (cond + ((not (dict? rule)) + (error (str "dl-add-rule!: expected rule dict, got " rule))) + ((not (has-key? rule :head)) + (error (str "dl-add-rule!: rule missing :head, got " rule))) + (else + (let + ((unsafe (dl-rule-unsafe-head-vars rule))) + (cond + ((> (len unsafe) 0) + (error + (str + "dl-add-rule!: unsafe rule — head variable(s) " + unsafe + " do not appear in any positive body literal of " + rule))) + (else + (let + ((rules (get db :rules))) + (do (append! rules rule) true))))))))) + +;; Load a parsed clause (fact or rule) into the db. Queries are ignored. +(define + dl-add-clause! + (fn + (db clause) + (cond + ((has-key? clause :query) false) + ((and (has-key? clause :body) (= (len (get clause :body)) 0)) + (dl-add-fact! db (get clause :head))) + (else (dl-add-rule! db clause))))) + +;; Parse source text and load every clause into the db. Returns the db. +(define + dl-load-program! + (fn + (db source) + (let + ((clauses (dl-parse source))) + (do (for-each (fn (c) (dl-add-clause! db c)) clauses) db)))) + +;; Convenience: build a db from source in one step. +(define + dl-program + (fn (source) (let ((db (dl-make-db))) (dl-load-program! db source)))) + +(define dl-rules (fn (db) (get db :rules))) + +;; Total number of stored ground tuples across all relations. +(define + dl-fact-count + (fn + (db) + (let + ((facts (get db :facts)) (total 0)) + (do + (for-each + (fn (k) (set! total (+ total (len (get facts k))))) + (keys facts)) + total)))) diff --git a/lib/datalog/eval.sx b/lib/datalog/eval.sx new file mode 100644 index 00000000..68c83550 --- /dev/null +++ b/lib/datalog/eval.sx @@ -0,0 +1,153 @@ +;; lib/datalog/eval.sx — naive bottom-up fixpoint evaluator. +;; +;; (dl-saturate! db) iterates rules until no new tuples are derived. +;; The Herbrand base is finite (no function symbols) so termination is +;; guaranteed by the language. +;; +;; Phase 3 handles only positive EDB literals. Negation (Phase 7) and +;; arithmetic built-ins (Phase 4) raise a clear error if encountered. + +;; Match a positive literal against every tuple in the relation; return +;; a list of extended substitutions, one per successful unification. +(define + dl-match-positive + (fn + (lit db subst) + (let + ((rel (dl-rel-name lit)) (results (list))) + (cond + ((nil? rel) (error (str "dl-match-positive: bad literal " lit))) + (else + (let + ((tuples (dl-rel-tuples db rel))) + (do + (for-each + (fn + (tuple) + (let + ((s (dl-unify lit tuple subst))) + (when (not (nil? s)) (append! results s)))) + tuples) + results))))))) + +(define + dl-match-lit + (fn + (lit db subst) + (cond + ((and (dict? lit) (has-key? lit :neg)) + (error "datalog: negation not yet supported (Phase 7)")) + ((dl-builtin? lit) + (error "datalog: built-in predicate not yet supported (Phase 4)")) + ((and (list? lit) (> (len lit) 0)) + (dl-match-positive lit db subst)) + (else (error (str "datalog: unknown body-literal shape: " lit)))))) + +;; Recursively find all substitutions satisfying a list of body literals +;; under `subst`. Returns a list of substitutions. +(define + dl-find-bindings + (fn + (lits db subst) + (cond + ((nil? subst) (list)) + ((= (len lits) 0) (list subst)) + (else + (let + ((options (dl-match-lit (first lits) db subst)) + (results (list))) + (do + (for-each + (fn + (s) + (for-each + (fn (s2) (append! results s2)) + (dl-find-bindings (rest lits) db s))) + options) + results)))))) + +;; Apply a rule once. Returns true if any new tuple was derived. +(define + dl-apply-rule! + (fn + (db rule) + (let + ((head (get rule :head)) (body (get rule :body)) (new? false)) + (do + (for-each + (fn + (s) + (let + ((derived (dl-apply-subst head s))) + (when (dl-add-fact! db derived) (set! new? true)))) + (dl-find-bindings body db (dl-empty-subst))) + new?)))) + +;; Iterate rules to fixpoint. +(define + dl-saturate! + (fn + (db) + (let + ((changed true)) + (do + (define + dl-sat-loop + (fn + () + (when + changed + (do + (set! changed false) + (for-each + (fn (r) (when (dl-apply-rule! db r) (set! changed true))) + (dl-rules db)) + (dl-sat-loop))))) + (dl-sat-loop) + db)))) + +;; Run a goal against the saturated db. Returns a list of substitutions +;; (each restricted to the variables that appeared in `goal`). Duplicate +;; projections are deduplicated so users get the set of answers. +(define + dl-query + (fn + (db goal) + (do + (dl-saturate! db) + (let + ((substs (dl-find-bindings (list goal) db (dl-empty-subst))) + (vars (dl-vars-of goal)) + (results (list))) + (do + (for-each + (fn + (s) + (let + ((proj (dl-project-subst s vars))) + (when + (not (dl-tuple-member? proj results)) + (append! results proj)))) + substs) + results))))) + +;; Project a subst down to a given set of variable names. +(define + dl-project-subst + (fn + (subst names) + (let + ((out {})) + (do + (for-each + (fn + (n) + (let + ((sym (string->symbol n))) + (let + ((v (dl-walk sym subst))) + (dict-set! out n (dl-apply-subst v subst))))) + names) + out)))) + +(define dl-relation (fn (db name) (dl-rel-tuples db name))) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index 837e746f..11113c5c 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,12 +1,13 @@ { "lang": "datalog", - "total_passed": 72, + "total_passed": 87, "total_failed": 0, - "total": 72, + "total": 87, "suites": [ {"name":"tokenize","passed":26,"failed":0,"total":26}, {"name":"parse","passed":18,"failed":0,"total":18}, - {"name":"unify","passed":28,"failed":0,"total":28} + {"name":"unify","passed":28,"failed":0,"total":28}, + {"name":"eval","passed":15,"failed":0,"total":15} ], - "generated": "2026-05-07T23:34:02+00:00" + "generated": "2026-05-07T23:40:51+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index 729e0fa9..e921fcfd 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,9 +1,10 @@ # datalog scoreboard -**72 / 72 passing** (0 failure(s)). +**87 / 87 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| | tokenize | 26 | 26 | ok | | parse | 18 | 18 | ok | | unify | 28 | 28 | ok | +| eval | 15 | 15 | ok | diff --git a/lib/datalog/tests/eval.sx b/lib/datalog/tests/eval.sx new file mode 100644 index 00000000..1c51e9b4 --- /dev/null +++ b/lib/datalog/tests/eval.sx @@ -0,0 +1,206 @@ +;; lib/datalog/tests/eval.sx — naive evaluation + safety analysis tests. + +(define dl-et-pass 0) +(define dl-et-fail 0) +(define dl-et-failures (list)) + +;; Same deep-equal helper used in other suites. +(define + dl-et-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-et-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let + ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-et-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-et-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-et-deep=? (nth a i) (nth b i))) false) + (else (dl-et-deq-l? a b (+ i 1)))))) + +(define + dl-et-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) (not (dl-et-deep=? (get a k) (get b k)))) + false) + (else (dl-et-deq-d? a b ka (+ i 1)))))) + +;; Set-equality on lists (order-independent, uses dl-et-deep=?). +(define + dl-et-set=? + (fn + (a b) + (and (= (len a) (len b)) (dl-et-subset? a b) (dl-et-subset? b a)))) + +(define + dl-et-subset? + (fn + (xs ys) + (cond + ((= (len xs) 0) true) + ((not (dl-et-contains? ys (first xs))) false) + (else (dl-et-subset? (rest xs) ys))))) + +(define + dl-et-contains? + (fn + (xs target) + (cond + ((= (len xs) 0) false) + ((dl-et-deep=? (first xs) target) true) + (else (dl-et-contains? (rest xs) target))))) + +(define + dl-et-test! + (fn + (name got expected) + (if + (dl-et-deep=? got expected) + (set! dl-et-pass (+ dl-et-pass 1)) + (do + (set! dl-et-fail (+ dl-et-fail 1)) + (append! + dl-et-failures + (str name "\n expected: " expected "\n got: " got)))))) + +(define + dl-et-test-set! + (fn + (name got expected) + (if + (dl-et-set=? got expected) + (set! dl-et-pass (+ dl-et-pass 1)) + (do + (set! dl-et-fail (+ dl-et-fail 1)) + (append! + dl-et-failures + (str + name + "\n expected (set): " + expected + "\n got: " + got)))))) + +(define + dl-et-throws? + (fn + (thunk) + (let + ((threw false)) + (do (guard (e (#t (set! threw true))) (thunk)) threw)))) + +(define + dl-et-run-all! + (fn + () + (do + (dl-et-test-set! + "fact lookup any" + (dl-query + (dl-program "parent(tom, bob). parent(bob, ann).") + (list (quote parent) (quote X) (quote Y))) + (list {:X (quote tom) :Y (quote bob)} {:X (quote bob) :Y (quote ann)})) + (dl-et-test-set! + "fact lookup constant arg" + (dl-query + (dl-program "parent(tom, bob). parent(tom, liz). parent(bob, ann).") + (list (quote parent) (quote tom) (quote Y))) + (list {:Y (quote bob)} {:Y (quote liz)})) + (dl-et-test-set! + "no match" + (dl-query + (dl-program "parent(tom, bob).") + (list (quote parent) (quote nobody) (quote X))) + (list)) + (dl-et-test-set! + "ancestor closure" + (dl-query + (dl-program + "parent(tom, bob). parent(bob, ann). parent(ann, pat).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).") + (list (quote ancestor) (quote tom) (quote X))) + (list {:X (quote bob)} {:X (quote ann)} {:X (quote pat)})) + (dl-et-test-set! + "sibling" + (dl-query + (dl-program + "parent(tom, bob). parent(tom, liz). parent(jane, bob). parent(jane, liz).\n sibling(X, Y) :- parent(P, X), parent(P, Y).") + (list (quote sibling) (quote bob) (quote Y))) + (list {:Y (quote bob)} {:Y (quote liz)})) + (dl-et-test-set! + "same-generation" + (dl-query + (dl-program + "parent(tom, bob). parent(tom, liz). parent(bob, ann). parent(liz, joe).\n person(tom). person(bob). person(liz). person(ann). person(joe).\n sg(X, X) :- person(X).\n sg(X, Y) :- parent(P1, X), sg(P1, P2), parent(P2, Y).") + (list (quote sg) (quote ann) (quote X))) + (list {:X (quote ann)} {:X (quote joe)})) + (dl-et-test! + "ancestor count" + (let + ((db (dl-program "parent(a, b). parent(b, c). parent(c, d).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (do (dl-saturate! db) (len (dl-relation db "ancestor")))) + 6) + (dl-et-test-set! + "grandparent" + (dl-query + (dl-program + "parent(a, b). parent(b, c). parent(c, d).\n grandparent(X, Z) :- parent(X, Y), parent(Y, Z).") + (list (quote grandparent) (quote X) (quote Y))) + (list {:X (quote a) :Y (quote c)} {:X (quote b) :Y (quote d)})) + (dl-et-test! + "no recursion infinite loop" + (let + ((db (dl-program "edge(1, 2). edge(2, 3). edge(3, 1).\n reach(X, Y) :- edge(X, Y).\n reach(X, Z) :- edge(X, Y), reach(Y, Z)."))) + (do (dl-saturate! db) (len (dl-relation db "reach")))) + 9) + (dl-et-test! + "unsafe head var" + (dl-et-throws? (fn () (dl-program "p(X, Y) :- q(X)."))) + true) + (dl-et-test! + "unsafe — empty body" + (dl-et-throws? (fn () (dl-program "p(X) :- ."))) + true) + (dl-et-test! + "underscore var ok" + (dl-et-throws? (fn () (dl-program "p(X, _) :- q(X)."))) + false) + (dl-et-test! + "var only in head — unsafe" + (dl-et-throws? (fn () (dl-program "p(X, Y) :- q(Z)."))) + true) + (dl-et-test! + "head var bound by body" + (dl-et-throws? (fn () (dl-program "p(X) :- q(X)."))) + false) + (dl-et-test! + "head subset of body" + (dl-et-throws? + (fn + () + (dl-program + "edge(a,b). edge(b,c). reach(X, Z) :- edge(X, Y), edge(Y, Z)."))) + false)))) + +(define + dl-eval-tests-run! + (fn + () + (do + (set! dl-et-pass 0) + (set! dl-et-fail 0) + (set! dl-et-failures (list)) + (dl-et-run-all!) + {:failures dl-et-failures :total (+ dl-et-pass dl-et-fail) :passed dl-et-pass :failed dl-et-fail}))) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 68dc785f..c978043c 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -82,15 +82,26 @@ Key differences from Prolog: Lists/tuples unify element-wise (used for arithmetic compounds too). - [x] Tests in `lib/datalog/tests/unify.sx` (28). 72 / 72 conformance. -### Phase 3 — extensional DB + naive evaluation -- [ ] EDB: `{:relation-name → set-of-ground-tuples}` using SX sets (Phase 18 of primitives) -- [ ] `dl-add-fact!` `db` `relation` `args` → add ground tuple -- [ ] `dl-add-rule!` `db` `head` `body` → add rule clause -- [ ] Naive evaluation: iterate rules until fixpoint - For each rule, for each combination of body tuples that unify, derive head tuple. - Repeat until no new tuples added. -- [ ] `dl-query` `db` `goal` → list of substitutions satisfying goal against derived DB -- [ ] Tests: transitive closure (ancestor), sibling, same-generation — classic Datalog programs +### Phase 3 — extensional DB + naive evaluation + safety analysis +- [x] EDB+IDB combined: `{:facts { -> (literal ...)}}` — + relations indexed by name; tuples stored as full literals so they + unify directly. Dedup on insert via `dl-tuple-equal?`. +- [x] `dl-add-fact! db lit` (rejects non-ground) and `dl-add-rule! db rule` + (rejects unsafe). `dl-program source` parses + loads in one step. +- [x] Naive evaluation `dl-saturate! db`: iterate rules until no new tuples. + `dl-find-bindings` recursively joins body literals; `dl-match-positive` + unifies a literal against every tuple in the relation. +- [x] `dl-query db goal` → list of substitutions over `goal`'s vars, + deduplicated. `dl-relation db name` for derived tuples. +- [x] Safety analysis at `dl-add-rule!` time: every head variable except + `_` must appear in some positive body literal. Built-ins and negated + literals do not satisfy safety. Helpers `dl-positive-body-vars`, + `dl-rule-unsafe-head-vars` exposed for later phases. +- [x] Negation and arithmetic built-ins error cleanly at saturate time + (Phase 4 / Phase 7 will swap in real semantics). +- [x] Tests in `lib/datalog/tests/eval.sx` (15): transitive closure, + sibling, same-generation, grandparent, cyclic graph reach, six + safety cases. 87 / 87 conformance. ### Phase 4 — semi-naive evaluation (performance) - [ ] Delta sets: track newly derived tuples per iteration @@ -149,6 +160,17 @@ _(none yet)_ _Newest first._ +- 2026-05-07 — Phase 3 done. `lib/datalog/db.sx` (~250 LOC) holds facts + indexed by relation name plus the rules list, with `dl-add-fact!` / + `dl-add-rule!` (rejects non-ground facts and unsafe rules); + `lib/datalog/eval.sx` (~150 LOC) implements the naive bottom-up + fixpoint via `dl-find-bindings`/`dl-match-positive`/`dl-saturate!` + and `dl-query` (deduped projected substitutions). Safety analysis + rejects unsafe head vars at load time. Negation and arithmetic + built-ins raise clean errors (lifted in later phases). 15 eval + tests cover transitive closure, sibling, same-generation, cyclic + graph reach, and six safety violations. Conformance 87 / 87. + - 2026-05-07 — Phase 2 done. `lib/datalog/unify.sx` (~140 LOC): `dl-var?` (case + underscore), `dl-walk`, `dl-bind`, `dl-unify` (returns extended dict subst or `nil`), `dl-apply-subst`, `dl-ground?`, `dl-vars-of`.