diff --git a/lib/datalog/conformance.conf b/lib/datalog/conformance.conf index a324032b..50ca8310 100644 --- a/lib/datalog/conformance.conf +++ b/lib/datalog/conformance.conf @@ -6,9 +6,11 @@ MODE=dict PRELOADS=( lib/datalog/tokenizer.sx lib/datalog/parser.sx + lib/datalog/unify.sx ) SUITES=( "tokenize:lib/datalog/tests/tokenize.sx:(dl-tokenize-tests-run!)" "parse:lib/datalog/tests/parse.sx:(dl-parse-tests-run!)" + "unify:lib/datalog/tests/unify.sx:(dl-unify-tests-run!)" ) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index b6b5c6fa..837e746f 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,11 +1,12 @@ { "lang": "datalog", - "total_passed": 44, + "total_passed": 72, "total_failed": 0, - "total": 44, + "total": 72, "suites": [ {"name":"tokenize","passed":26,"failed":0,"total":26}, - {"name":"parse","passed":18,"failed":0,"total":18} + {"name":"parse","passed":18,"failed":0,"total":18}, + {"name":"unify","passed":28,"failed":0,"total":28} ], - "generated": "2026-05-07T23:30:07+00:00" + "generated": "2026-05-07T23:34:02+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index 9cd60338..729e0fa9 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,8 +1,9 @@ # datalog scoreboard -**44 / 44 passing** (0 failure(s)). +**72 / 72 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| | tokenize | 26 | 26 | ok | | parse | 18 | 18 | ok | +| unify | 28 | 28 | ok | diff --git a/lib/datalog/tests/unify.sx b/lib/datalog/tests/unify.sx new file mode 100644 index 00000000..873cc655 --- /dev/null +++ b/lib/datalog/tests/unify.sx @@ -0,0 +1,185 @@ +;; lib/datalog/tests/unify.sx — unification + substitution tests. + +(define dl-ut-pass 0) +(define dl-ut-fail 0) +(define dl-ut-failures (list)) + +(define + dl-ut-deep-equal? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-ut-deq-list? a b 0))) + ((and (dict? a) (dict? b)) + (let + ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-ut-deq-dict? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-ut-deq-list? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-ut-deep-equal? (nth a i) (nth b i))) false) + (else (dl-ut-deq-list? a b (+ i 1)))))) + +(define + dl-ut-deq-dict? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) (not (dl-ut-deep-equal? (get a k) (get b k)))) + false) + (else (dl-ut-deq-dict? a b ka (+ i 1)))))) + +(define + dl-ut-test! + (fn + (name got expected) + (if + (dl-ut-deep-equal? got expected) + (set! dl-ut-pass (+ dl-ut-pass 1)) + (do + (set! dl-ut-fail (+ dl-ut-fail 1)) + (append! + dl-ut-failures + (str name "\n expected: " expected "\n got: " got)))))) + +(define + dl-ut-run-all! + (fn + () + (do + (dl-ut-test! "var? uppercase" (dl-var? (quote X)) true) + (dl-ut-test! "var? underscore" (dl-var? (quote _foo)) true) + (dl-ut-test! "var? lowercase" (dl-var? (quote tom)) false) + (dl-ut-test! "var? number" (dl-var? 5) false) + (dl-ut-test! "var? string" (dl-var? "hi") false) + (dl-ut-test! "var? list" (dl-var? (list 1)) false) + (dl-ut-test! + "atom-atom match" + (dl-unify (quote tom) (quote tom) (dl-empty-subst)) + {}) + (dl-ut-test! + "atom-atom fail" + (dl-unify (quote tom) (quote bob) (dl-empty-subst)) + nil) + (dl-ut-test! + "num-num match" + (dl-unify 5 5 (dl-empty-subst)) + {}) + (dl-ut-test! + "num-num fail" + (dl-unify 5 6 (dl-empty-subst)) + nil) + (dl-ut-test! + "string match" + (dl-unify "hi" "hi" (dl-empty-subst)) + {}) + (dl-ut-test! "string fail" (dl-unify "hi" "bye" (dl-empty-subst)) nil) + (dl-ut-test! + "var-atom binds" + (dl-unify (quote X) (quote tom) (dl-empty-subst)) + {:X (quote tom)}) + (dl-ut-test! + "atom-var binds" + (dl-unify (quote tom) (quote X) (dl-empty-subst)) + {:X (quote tom)}) + (dl-ut-test! + "var-var same" + (dl-unify (quote X) (quote X) (dl-empty-subst)) + {}) + (dl-ut-test! + "var-var bind" + (let + ((s (dl-unify (quote X) (quote Y) (dl-empty-subst)))) + (dl-walk (quote X) s)) + (quote Y)) + (dl-ut-test! + "tuple match" + (dl-unify + (list (quote parent) (quote X) (quote bob)) + (list (quote parent) (quote tom) (quote Y)) + (dl-empty-subst)) + {:X (quote tom) :Y (quote bob)}) + (dl-ut-test! + "tuple arity mismatch" + (dl-unify + (list (quote p) (quote X)) + (list (quote p) (quote a) (quote b)) + (dl-empty-subst)) + nil) + (dl-ut-test! + "tuple head mismatch" + (dl-unify + (list (quote p) (quote X)) + (list (quote q) (quote X)) + (dl-empty-subst)) + nil) + (dl-ut-test! + "walk chain" + (let + ((s1 (dl-unify (quote X) (quote Y) (dl-empty-subst)))) + (let + ((s2 (dl-unify (quote Y) (quote tom) s1))) + (dl-walk (quote X) s2))) + (quote tom)) + (dl-ut-test! + "apply subst on tuple" + (let + ((s (dl-bind (quote X) (quote tom) (dl-empty-subst)))) + (dl-apply-subst (list (quote parent) (quote X) (quote Y)) s)) + (list (quote parent) (quote tom) (quote Y))) + (dl-ut-test! + "ground? all const" + (dl-ground? + (list (quote p) (quote tom) 5) + (dl-empty-subst)) + true) + (dl-ut-test! + "ground? unbound var" + (dl-ground? (list (quote p) (quote X)) (dl-empty-subst)) + false) + (dl-ut-test! + "ground? bound var" + (let + ((s (dl-bind (quote X) (quote tom) (dl-empty-subst)))) + (dl-ground? (list (quote p) (quote X)) s)) + true) + (dl-ut-test! + "ground? bare var" + (dl-ground? (quote X) (dl-empty-subst)) + false) + (dl-ut-test! + "vars-of basic" + (dl-vars-of + (list (quote p) (quote X) (quote tom) (quote Y) (quote X))) + (list "X" "Y")) + (dl-ut-test! + "vars-of ground" + (dl-vars-of (list (quote p) (quote tom) (quote bob))) + (list)) + (dl-ut-test! + "vars-of nested compound" + (dl-vars-of + (list + (quote is) + (quote Z) + (list (string->symbol "+") (quote X) 1))) + (list "Z" "X"))))) + +(define + dl-unify-tests-run! + (fn + () + (do + (set! dl-ut-pass 0) + (set! dl-ut-fail 0) + (set! dl-ut-failures (list)) + (dl-ut-run-all!) + {:failures dl-ut-failures :total (+ dl-ut-pass dl-ut-fail) :passed dl-ut-pass :failed dl-ut-fail}))) diff --git a/lib/datalog/unify.sx b/lib/datalog/unify.sx new file mode 100644 index 00000000..7313a603 --- /dev/null +++ b/lib/datalog/unify.sx @@ -0,0 +1,159 @@ +;; lib/datalog/unify.sx — unification + substitution for Datalog terms. +;; +;; Term taxonomy (after parsing): +;; variable — SX symbol whose first char is uppercase A–Z or '_'. +;; constant — SX symbol whose first char is lowercase a–z (atom name). +;; number — numeric literal. +;; string — string literal. +;; compound — SX list (functor arg ... arg). In core Datalog these +;; only appear as arithmetic expressions (see Phase 4 +;; safety analysis); compound-against-compound unification +;; is supported anyway for completeness. +;; +;; Substitutions are immutable dicts keyed by variable name (string). +;; A failed unification returns nil; success returns the extended subst. + +(define dl-empty-subst (fn () {})) + +(define + dl-var? + (fn + (term) + (and + (symbol? term) + (let + ((name (symbol->string term))) + (and + (> (len name) 0) + (let + ((c (slice name 0 1))) + (or (and (>= c "A") (<= c "Z")) (= c "_")))))))) + +;; Walk: chase variable bindings until we hit a non-variable or an unbound +;; variable. The result is either a non-variable term or an unbound var. +(define + dl-walk + (fn + (term subst) + (if + (dl-var? term) + (let + ((name (symbol->string term))) + (if + (and (dict? subst) (has-key? subst name)) + (dl-walk (get subst name) subst) + term)) + term))) + +;; Bind a variable symbol to a value in subst, returning a new subst. +(define + dl-bind + (fn (var-sym value subst) (assoc subst (symbol->string var-sym) value))) + +(define + dl-unify + (fn + (t1 t2 subst) + (if + (nil? subst) + nil + (let + ((u1 (dl-walk t1 subst)) (u2 (dl-walk t2 subst))) + (cond + ((dl-var? u1) + (cond + ((and (dl-var? u2) (= (symbol->string u1) (symbol->string u2))) + subst) + (else (dl-bind u1 u2 subst)))) + ((dl-var? u2) (dl-bind u2 u1 subst)) + ((and (list? u1) (list? u2)) + (if + (= (len u1) (len u2)) + (dl-unify-list u1 u2 subst 0) + nil)) + ((and (number? u1) (number? u2)) (if (= u1 u2) subst nil)) + ((and (string? u1) (string? u2)) (if (= u1 u2) subst nil)) + ((and (symbol? u1) (symbol? u2)) + (if (= (symbol->string u1) (symbol->string u2)) subst nil)) + (else nil)))))) + +(define + dl-unify-list + (fn + (a b subst i) + (cond + ((nil? subst) nil) + ((>= i (len a)) subst) + (else + (dl-unify-list + a + b + (dl-unify (nth a i) (nth b i) subst) + (+ i 1)))))) + +;; Apply substitution: walk the term and recurse into lists. +(define + dl-apply-subst + (fn + (term subst) + (let + ((w (dl-walk term subst))) + (if (list? w) (map (fn (x) (dl-apply-subst x subst)) w) w)))) + +;; Ground? — true iff no free variables remain after walking. +(define + dl-ground? + (fn + (term subst) + (let + ((w (dl-walk term subst))) + (cond + ((dl-var? w) false) + ((list? w) (dl-ground-list? w subst 0)) + (else true))))) + +(define + dl-ground-list? + (fn + (xs subst i) + (cond + ((>= i (len xs)) true) + ((not (dl-ground? (nth xs i) subst)) false) + (else (dl-ground-list? xs subst (+ i 1)))))) + +;; Return the list of variable names appearing in a term (deduped, in +;; left-to-right order). Useful for safety analysis later. +(define + dl-vars-of + (fn (term) (let ((seen (list))) (do (dl-vars-of-aux term seen) seen)))) + +(define + dl-vars-of-aux + (fn + (term acc) + (cond + ((dl-var? term) + (let + ((name (symbol->string term))) + (when (not (dl-member? name acc)) (append! acc name)))) + ((list? term) (dl-vars-of-list term acc 0)) + (else nil)))) + +(define + dl-vars-of-list + (fn + (xs acc i) + (when + (< i (len xs)) + (do + (dl-vars-of-aux (nth xs i) acc) + (dl-vars-of-list xs acc (+ i 1)))))) + +(define + dl-member? + (fn + (x xs) + (cond + ((= (len xs) 0) false) + ((= (first xs) x) true) + (else (dl-member? x (rest xs)))))) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 784cf0df..68dc785f 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -75,10 +75,12 @@ Key differences from Prolog: Conformance harness: `bash lib/datalog/conformance.sh` → 44 / 44 passing. ### Phase 2 — unification + substitution -- [ ] Share or port unification from `lib/prolog/` — term walk, occurs check off by default -- [ ] `dl-unify` `t1` `t2` `subst` → extended subst or nil (no function symbols means simpler) -- [ ] `dl-ground?` `term` → bool — all variables bound in substitution -- [ ] Tests: atom/atom, var/atom, var/var, list args +- [x] Ported (not shared) from `lib/prolog/` — term walk, no occurs check. +- [x] `dl-unify t1 t2 subst` → extended subst dict, or `nil` on failure. +- [x] `dl-walk`, `dl-bind`, `dl-apply-subst`, `dl-ground?`, `dl-vars-of`. +- [x] Substitutions are immutable dicts keyed by variable name (string). + Lists/tuples unify element-wise (used for arithmetic compounds too). +- [x] Tests in `lib/datalog/tests/unify.sx` (28). 72 / 72 conformance. ### Phase 3 — extensional DB + naive evaluation - [ ] EDB: `{:relation-name → set-of-ground-tuples}` using SX sets (Phase 18 of primitives) @@ -147,6 +149,12 @@ _(none yet)_ _Newest first._ +- 2026-05-07 — Phase 2 done. `lib/datalog/unify.sx` (~140 LOC): + `dl-var?` (case + underscore), `dl-walk`, `dl-bind`, `dl-unify` (returns + extended dict subst or `nil`), `dl-apply-subst`, `dl-ground?`, `dl-vars-of`. + Substitutions are immutable dicts; `assoc` builds extended copies. 28 + unify tests; conformance now 72 / 72. + - 2026-05-07 — Phase 1 done. `lib/datalog/tokenizer.sx` (~190 LOC) emits `{:type :value :pos}` tokens; `lib/datalog/parser.sx` (~150 LOC) produces `{:head … :body …}` / `{:query …}` clauses, with nested compounds