From 7ce723f73233c97cecdd4e212302168410aa86f3 Mon Sep 17 00:00:00 2001 From: giles Date: Thu, 7 May 2026 23:51:21 +0000 Subject: [PATCH] datalog: built-ins + body arithmetic + order-aware safety (Phase 4, 106/106) New lib/datalog/builtins.sx: (< <= > >= = !=) and (is X expr) with + - * /. dl-eval-arith recursively evaluates nested compounds. Safety analysis now walks body left-to-right tracking the bound set: comparisons require all args bound, is RHS vars must be bound (LHS becomes bound), = special-cases the var/non-var combos. db.sx keeps the simple safety check as a forward-reference fallback; builtins.sx redefines dl-rule-check-safety to the comprehensive version. eval.sx dispatches built-ins through dl-eval-builtin instead of erroring. 19 new tests. --- lib/datalog/builtins.sx | 300 ++++++++++++++++++++++++++++++++++ lib/datalog/conformance.conf | 2 + lib/datalog/db.sx | 124 +++++--------- lib/datalog/eval.sx | 20 +-- lib/datalog/scoreboard.json | 9 +- lib/datalog/scoreboard.md | 3 +- lib/datalog/tests/builtins.sx | 228 ++++++++++++++++++++++++++ plans/datalog-on-sx.md | 41 +++-- 8 files changed, 617 insertions(+), 110 deletions(-) create mode 100644 lib/datalog/builtins.sx create mode 100644 lib/datalog/tests/builtins.sx diff --git a/lib/datalog/builtins.sx b/lib/datalog/builtins.sx new file mode 100644 index 00000000..dfd2307a --- /dev/null +++ b/lib/datalog/builtins.sx @@ -0,0 +1,300 @@ +;; lib/datalog/builtins.sx — comparison + arithmetic body literals. +;; +;; Built-in predicates filter / extend candidate substitutions during +;; rule evaluation. They are not stored facts and do not participate in +;; the Herbrand base. +;; +;; (< a b) (<= a b) (> a b) (>= a b) ; numeric (or string) compare +;; (= a b) ; unify (binds vars) +;; (!= a b) ; ground-only inequality +;; (is X expr) ; bind X to expr's value +;; +;; Arithmetic expressions are SX-list compounds: +;; (+ a b) (- a b) (* a b) (/ a b) +;; or numbers / variables (must be bound at evaluation time). + +(define + dl-comparison? + (fn + (lit) + (and + (list? lit) + (> (len lit) 0) + (let + ((rel (dl-rel-name lit))) + (cond + ((nil? rel) false) + (else (dl-member-string? rel (list "<" "<=" ">" ">=" "!=")))))))) + +(define + dl-eq? + (fn + (lit) + (and + (list? lit) + (> (len lit) 0) + (let ((rel (dl-rel-name lit))) (and (not (nil? rel)) (= rel "=")))))) + +(define + dl-is? + (fn + (lit) + (and + (list? lit) + (> (len lit) 0) + (let + ((rel (dl-rel-name lit))) + (and (not (nil? rel)) (= rel "is")))))) + +;; Evaluate an arithmetic expression under subst. Returns the numeric +;; result, or raises if any operand is unbound or non-numeric. +(define + dl-eval-arith + (fn + (expr subst) + (let + ((w (dl-walk expr subst))) + (cond + ((number? w) w) + ((dl-var? w) + (error (str "datalog arith: unbound variable " (symbol->string w)))) + ((list? w) + (let + ((rel (dl-rel-name w)) (args (rest w))) + (cond + ((not (= (len args) 2)) + (error (str "datalog arith: need 2 args, got " w))) + (else + (let + ((a (dl-eval-arith (first args) subst)) + (b (dl-eval-arith (nth args 1) subst))) + (cond + ((= rel "+") (+ a b)) + ((= rel "-") (- a b)) + ((= rel "*") (* a b)) + ((= rel "/") (/ a b)) + (else (error (str "datalog arith: unknown op " rel))))))))) + (else (error (str "datalog arith: not a number — " w))))))) + +(define + dl-eval-compare + (fn + (lit subst) + (let + ((rel (dl-rel-name lit)) + (a (dl-walk (nth lit 1) subst)) + (b (dl-walk (nth lit 2) subst))) + (cond + ((or (dl-var? a) (dl-var? b)) + (error + (str + "datalog: comparison " + rel + " has unbound argument; " + "ensure prior body literal binds the variable"))) + (else + (let + ((ok (cond ((= rel "<") (< a b)) ((= rel "<=") (<= a b)) ((= rel ">") (> a b)) ((= rel ">=") (>= a b)) ((= rel "!=") (not (dl-tuple-equal? a b))) (else (error (str "datalog: unknown compare " rel)))))) + (if ok subst nil))))))) + +(define + dl-eval-eq + (fn + (lit subst) + (dl-unify (nth lit 1) (nth lit 2) subst))) + +(define + dl-eval-is + (fn + (lit subst) + (let + ((target (nth lit 1)) (expr (nth lit 2))) + (let + ((value (dl-eval-arith expr subst))) + (dl-unify target value subst))))) + +(define + dl-eval-builtin + (fn + (lit subst) + (cond + ((dl-comparison? lit) (dl-eval-compare lit subst)) + ((dl-eq? lit) (dl-eval-eq lit subst)) + ((dl-is? lit) (dl-eval-is lit subst)) + (else (error (str "dl-eval-builtin: not a built-in: " lit)))))) + +;; ── Safety analysis ────────────────────────────────────────────── +;; +;; Walks body literals left-to-right tracking a "bound" set. The check +;; understands these literal kinds: +;; +;; positive non-built-in → adds its vars to bound +;; (is X expr) → vars(expr) ⊆ bound, then add X (if var) +;; <,<=,>,>=,!= → all vars ⊆ bound (no binding) +;; (= a b) where: +;; both non-vars → constraint check, no binding +;; a var, b not → bind a +;; b var, a not → bind b +;; both vars → at least one in bound; bind the other +;; {:neg lit} → all vars ⊆ bound (Phase 7 enforces fully) +;; +;; At end, head vars (minus `_`) must be ⊆ bound. + +(define + dl-vars-not-in + (fn + (vs bound) + (let + ((out (list))) + (do + (for-each + (fn + (v) + (when (not (dl-member-string? v bound)) (append! out v))) + vs) + out)))) + +(define + dl-rule-check-safety + (fn + (rule) + (let + ((head (get rule :head)) + (body (get rule :body)) + (bound (list)) + (err nil)) + (do + (define + dl-add-bound! + (fn + (vs) + (for-each + (fn + (v) + (when (not (dl-member-string? v bound)) (append! bound v))) + vs))) + (define + dl-process-eq! + (fn + (lit) + (let + ((a (nth lit 1)) (b (nth lit 2))) + (let + ((va (dl-var? a)) (vb (dl-var? b))) + (cond + ((and (not va) (not vb)) nil) + ((and va (not vb)) + (dl-add-bound! (list (symbol->string a)))) + ((and (not va) vb) + (dl-add-bound! (list (symbol->string b)))) + (else + (let + ((sa (symbol->string a)) (sb (symbol->string b))) + (cond + ((dl-member-string? sa bound) + (dl-add-bound! (list sb))) + ((dl-member-string? sb bound) + (dl-add-bound! (list sa))) + (else + (set! + err + (str + "= between two unbound variables " + (list sa sb) + " — at least one must be bound by an " + "earlier positive body literal"))))))))))) + (define + dl-process-cmp! + (fn + (lit) + (let + ((needed (dl-vars-of (list (nth lit 1) (nth lit 2))))) + (let + ((missing (dl-vars-not-in needed bound))) + (when + (> (len missing) 0) + (set! + err + (str + "comparison " + (dl-rel-name lit) + " requires bound variable(s) " + missing + " (must be bound by an earlier positive " + "body literal)"))))))) + (define + dl-process-is! + (fn + (lit) + (let + ((tgt (nth lit 1)) (expr (nth lit 2))) + (let + ((needed (dl-vars-of expr))) + (let + ((missing (dl-vars-not-in needed bound))) + (cond + ((> (len missing) 0) + (set! + err + (str + "is RHS uses unbound variable(s) " + missing + " — bind them via a prior positive body " + "literal"))) + (else + (when + (dl-var? tgt) + (dl-add-bound! (list (symbol->string tgt))))))))))) + (define + dl-process-neg! + (fn + (lit) + (let + ((needed (dl-vars-of (get lit :neg)))) + (let + ((missing (dl-vars-not-in needed bound))) + (when + (> (len missing) 0) + (set! + err + (str + "negation refers to unbound variable(s) " + missing + " — they must be bound by an earlier " + "positive body literal"))))))) + (define + dl-process-lit! + (fn + (lit) + (when + (nil? err) + (cond + ((and (dict? lit) (has-key? lit :neg)) + (dl-process-neg! lit)) + ((dl-eq? lit) (dl-process-eq! lit)) + ((dl-is? lit) (dl-process-is! lit)) + ((dl-comparison? lit) (dl-process-cmp! lit)) + ((and (list? lit) (> (len lit) 0)) + (dl-add-bound! (dl-vars-of lit))))))) + (for-each dl-process-lit! body) + (when + (nil? err) + (let + ((head-vars (dl-vars-of head)) (missing (list))) + (do + (for-each + (fn + (v) + (when + (and (not (dl-member-string? v bound)) (not (= v "_"))) + (append! missing v))) + head-vars) + (when + (> (len missing) 0) + (set! + err + (str + "head variable(s) " + missing + " do not appear in any positive body literal")))))) + err)))) diff --git a/lib/datalog/conformance.conf b/lib/datalog/conformance.conf index 58a483bc..8125ba13 100644 --- a/lib/datalog/conformance.conf +++ b/lib/datalog/conformance.conf @@ -8,6 +8,7 @@ PRELOADS=( lib/datalog/parser.sx lib/datalog/unify.sx lib/datalog/db.sx + lib/datalog/builtins.sx lib/datalog/eval.sx ) @@ -16,4 +17,5 @@ SUITES=( "parse:lib/datalog/tests/parse.sx:(dl-parse-tests-run!)" "unify:lib/datalog/tests/unify.sx:(dl-unify-tests-run!)" "eval:lib/datalog/tests/eval.sx:(dl-eval-tests-run!)" + "builtins:lib/datalog/tests/builtins.sx:(dl-builtins-tests-run!)" ) diff --git a/lib/datalog/db.sx b/lib/datalog/db.sx index af189147..d83a1dcc 100644 --- a/lib/datalog/db.sx +++ b/lib/datalog/db.sx @@ -1,4 +1,4 @@ -;; lib/datalog/db.sx — Datalog database (EDB + IDB + rules) + safety. +;; lib/datalog/db.sx — Datalog database (EDB + IDB + rules) + safety hook. ;; ;; A db is a mutable dict: ;; {:facts { -> (literal ...)} @@ -8,13 +8,12 @@ ;; directly against rule body literals. Each relation's tuple list is ;; deduplicated on insert. ;; -;; Phase 3 makes no distinction between EDB (asserted) and IDB (derived); -;; a future phase may track provenance for retraction. +;; Phase 3 introduced safety analysis for head variables; Phase 4 (in +;; lib/datalog/builtins.sx) swaps in the real `dl-rule-check-safety`, +;; which is order-aware and understands built-in predicates. (define dl-make-db (fn () {:facts {} :rules (list)})) -;; The relation name (as string) of a positive literal, or of the -;; underlying literal of a negative one. nil for malformed input. (define dl-rel-name (fn @@ -59,8 +58,6 @@ ((and (list? lit) (> (len lit) 0)) true) (else false)))) -;; Membership using dl-deep tuple equality (handles var/constant symbols -;; and numbers consistently). (define dl-tuple-equal? (fn @@ -89,7 +86,6 @@ ((dl-tuple-equal? lit (first lits)) true) (else (dl-tuple-member? lit (rest lits)))))) -;; Ensure :facts has a list for the given relation key, then return it. (define dl-ensure-rel! (fn @@ -102,8 +98,6 @@ (dict-set! facts rel-key (list))) (get facts rel-key))))) -;; All tuples currently known for a relation (EDB ∪ IDB). Returns empty -;; list when relation hasn't been seen. (define dl-rel-tuples (fn @@ -112,7 +106,6 @@ ((facts (get db :facts))) (if (has-key? facts rel-key) (get facts rel-key) (list))))) -;; Add a ground literal. Returns true iff the literal was new. (define dl-add-fact! (fn @@ -131,72 +124,53 @@ ((dl-tuple-member? lit tuples) false) (else (do (append! tuples lit) true))))))))) -;; Collect variables appearing in the positive body literals of `body`. +;; The full safety check lives in builtins.sx (it has to know which +;; predicates are built-ins). dl-add-rule! calls it via forward +;; reference; load builtins.sx alongside db.sx in any setup that +;; adds rules. The fallback below is used if builtins.sx isn't loaded. (define - dl-positive-body-vars - (fn - (body) - (let - ((vars (list))) - (do - (for-each - (fn - (lit) - (when - (dl-positive-lit? lit) - (for-each - (fn - (v) - (when (not (dl-member-string? v vars)) (append! vars v))) - (dl-vars-of lit)))) - body) - vars)))) - -;; Collect variables in any literal (positive, negated, or built-in). -(define - dl-all-body-vars - (fn - (body) - (let - ((vars (list))) - (do - (for-each - (fn - (lit) - (let - ((target (if (and (dict? lit) (has-key? lit :neg)) (get lit :neg) lit))) - (for-each - (fn - (v) - (when (not (dl-member-string? v vars)) (append! vars v))) - (dl-vars-of target)))) - body) - vars)))) - -;; Return the list of head variables NOT covered by some positive body -;; literal. A safe rule has an empty list. The check ignores '_' since -;; that is treated as a fresh anonymous variable per occurrence. -(define - dl-rule-unsafe-head-vars + dl-rule-check-safety (fn (rule) (let - ((head (get rule :head)) - (body (get rule :body)) - (head-vars (dl-vars-of head)) - (body-vars (dl-positive-body-vars body)) - (out (list))) + ((head-vars (dl-vars-of (get rule :head))) (body-vars (list))) (do (for-each (fn - (v) + (lit) (when - (and (not (dl-member-string? v body-vars)) (not (= v "_"))) - (append! out v))) - head-vars) - out)))) + (and + (list? lit) + (> (len lit) 0) + (not (and (dict? lit) (has-key? lit :neg)))) + (for-each + (fn + (v) + (when + (not (dl-member-string? v body-vars)) + (append! body-vars v))) + (dl-vars-of lit)))) + (get rule :body)) + (let + ((missing (list))) + (do + (for-each + (fn + (v) + (when + (and + (not (dl-member-string? v body-vars)) + (not (= v "_"))) + (append! missing v))) + head-vars) + (cond + ((> (len missing) 0) + (str + "head variable(s) " + missing + " do not appear in any body literal")) + (else nil)))))))) -;; Add a rule. Rejects unsafe rules with a pointer at the offending var. (define dl-add-rule! (fn @@ -208,21 +182,14 @@ (error (str "dl-add-rule!: rule missing :head, got " rule))) (else (let - ((unsafe (dl-rule-unsafe-head-vars rule))) + ((err (dl-rule-check-safety rule))) (cond - ((> (len unsafe) 0) - (error - (str - "dl-add-rule!: unsafe rule — head variable(s) " - unsafe - " do not appear in any positive body literal of " - rule))) + ((not (nil? err)) (error (str "dl-add-rule!: " err))) (else (let ((rules (get db :rules))) (do (append! rules rule) true))))))))) -;; Load a parsed clause (fact or rule) into the db. Queries are ignored. (define dl-add-clause! (fn @@ -233,7 +200,6 @@ (dl-add-fact! db (get clause :head))) (else (dl-add-rule! db clause))))) -;; Parse source text and load every clause into the db. Returns the db. (define dl-load-program! (fn @@ -242,14 +208,12 @@ ((clauses (dl-parse source))) (do (for-each (fn (c) (dl-add-clause! db c)) clauses) db)))) -;; Convenience: build a db from source in one step. (define dl-program (fn (source) (let ((db (dl-make-db))) (dl-load-program! db source)))) (define dl-rules (fn (db) (get db :rules))) -;; Total number of stored ground tuples across all relations. (define dl-fact-count (fn diff --git a/lib/datalog/eval.sx b/lib/datalog/eval.sx index 68c83550..7c1e7c6c 100644 --- a/lib/datalog/eval.sx +++ b/lib/datalog/eval.sx @@ -4,11 +4,11 @@ ;; The Herbrand base is finite (no function symbols) so termination is ;; guaranteed by the language. ;; -;; Phase 3 handles only positive EDB literals. Negation (Phase 7) and -;; arithmetic built-ins (Phase 4) raise a clear error if encountered. +;; Body literal kinds handled here: +;; positive (rel arg ... arg) → match against EDB+IDB tuples (dl-match-positive) +;; built-in (< X Y), (is X e) → constraint via dl-eval-builtin (Phase 4) +;; negation {:neg lit} → Phase 7 -;; Match a positive literal against every tuple in the relation; return -;; a list of extended substitutions, one per successful unification. (define dl-match-positive (fn @@ -38,13 +38,13 @@ ((and (dict? lit) (has-key? lit :neg)) (error "datalog: negation not yet supported (Phase 7)")) ((dl-builtin? lit) - (error "datalog: built-in predicate not yet supported (Phase 4)")) + (let + ((s (dl-eval-builtin lit subst))) + (if (nil? s) (list) (list s)))) ((and (list? lit) (> (len lit) 0)) (dl-match-positive lit db subst)) (else (error (str "datalog: unknown body-literal shape: " lit)))))) -;; Recursively find all substitutions satisfying a list of body literals -;; under `subst`. Returns a list of substitutions. (define dl-find-bindings (fn @@ -66,7 +66,6 @@ options) results)))))) -;; Apply a rule once. Returns true if any new tuple was derived. (define dl-apply-rule! (fn @@ -83,7 +82,6 @@ (dl-find-bindings body db (dl-empty-subst))) new?)))) -;; Iterate rules to fixpoint. (define dl-saturate! (fn @@ -106,9 +104,6 @@ (dl-sat-loop) db)))) -;; Run a goal against the saturated db. Returns a list of substitutions -;; (each restricted to the variables that appeared in `goal`). Duplicate -;; projections are deduplicated so users get the set of answers. (define dl-query (fn @@ -131,7 +126,6 @@ substs) results))))) -;; Project a subst down to a given set of variable names. (define dl-project-subst (fn diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index 11113c5c..a1933cc5 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,13 +1,14 @@ { "lang": "datalog", - "total_passed": 87, + "total_passed": 106, "total_failed": 0, - "total": 87, + "total": 106, "suites": [ {"name":"tokenize","passed":26,"failed":0,"total":26}, {"name":"parse","passed":18,"failed":0,"total":18}, {"name":"unify","passed":28,"failed":0,"total":28}, - {"name":"eval","passed":15,"failed":0,"total":15} + {"name":"eval","passed":15,"failed":0,"total":15}, + {"name":"builtins","passed":19,"failed":0,"total":19} ], - "generated": "2026-05-07T23:40:51+00:00" + "generated": "2026-05-07T23:50:44+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index e921fcfd..2b5954fc 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,6 +1,6 @@ # datalog scoreboard -**87 / 87 passing** (0 failure(s)). +**106 / 106 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| @@ -8,3 +8,4 @@ | parse | 18 | 18 | ok | | unify | 28 | 28 | ok | | eval | 15 | 15 | ok | +| builtins | 19 | 19 | ok | diff --git a/lib/datalog/tests/builtins.sx b/lib/datalog/tests/builtins.sx new file mode 100644 index 00000000..f5136134 --- /dev/null +++ b/lib/datalog/tests/builtins.sx @@ -0,0 +1,228 @@ +;; lib/datalog/tests/builtins.sx — comparison + arithmetic body literals. + +(define dl-bt-pass 0) +(define dl-bt-fail 0) +(define dl-bt-failures (list)) + +(define + dl-bt-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-bt-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let + ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-bt-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-bt-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-bt-deep=? (nth a i) (nth b i))) false) + (else (dl-bt-deq-l? a b (+ i 1)))))) + +(define + dl-bt-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) (not (dl-bt-deep=? (get a k) (get b k)))) + false) + (else (dl-bt-deq-d? a b ka (+ i 1)))))) + +(define + dl-bt-set=? + (fn + (a b) + (and (= (len a) (len b)) (dl-bt-subset? a b) (dl-bt-subset? b a)))) + +(define + dl-bt-subset? + (fn + (xs ys) + (cond + ((= (len xs) 0) true) + ((not (dl-bt-contains? ys (first xs))) false) + (else (dl-bt-subset? (rest xs) ys))))) + +(define + dl-bt-contains? + (fn + (xs target) + (cond + ((= (len xs) 0) false) + ((dl-bt-deep=? (first xs) target) true) + (else (dl-bt-contains? (rest xs) target))))) + +(define + dl-bt-test-set! + (fn + (name got expected) + (if + (dl-bt-set=? got expected) + (set! dl-bt-pass (+ dl-bt-pass 1)) + (do + (set! dl-bt-fail (+ dl-bt-fail 1)) + (append! + dl-bt-failures + (str + name + "\n expected (set): " + expected + "\n got: " + got)))))) + +(define + dl-bt-test! + (fn + (name got expected) + (if + (dl-bt-deep=? got expected) + (set! dl-bt-pass (+ dl-bt-pass 1)) + (do + (set! dl-bt-fail (+ dl-bt-fail 1)) + (append! + dl-bt-failures + (str name "\n expected: " expected "\n got: " got)))))) + +(define + dl-bt-throws? + (fn + (thunk) + (let + ((threw false)) + (do (guard (e (#t (set! threw true))) (thunk)) threw)))) + +(define + dl-bt-run-all! + (fn + () + (do + (dl-bt-test-set! + "less than filter" + (dl-query + (dl-program + "age(alice, 30). age(bob, 17). age(carol, 22).\n adult(X) :- age(X, A), >=(A, 18).") + (list (quote adult) (quote X))) + (list {:X (quote alice)} {:X (quote carol)})) + (dl-bt-test-set! + "less-equal filter" + (dl-query + (dl-program + "n(1). n(2). n(3). n(4). n(5).\n small(X) :- n(X), <=(X, 3).") + (list (quote small) (quote X))) + (list {:X 1} {:X 2} {:X 3})) + (dl-bt-test-set! + "not-equal filter" + (dl-query + (dl-program + "p(1, 2). p(2, 2). p(3, 4).\n diff(X, Y) :- p(X, Y), !=(X, Y).") + (list (quote diff) (quote X) (quote Y))) + (list {:X 1 :Y 2} {:X 3 :Y 4})) + (dl-bt-test-set! + "is plus" + (dl-query + (dl-program + "n(1). n(2). n(3).\n succ(X, Y) :- n(X), is(Y, +(X, 1)).") + (list (quote succ) (quote X) (quote Y))) + (list {:X 1 :Y 2} {:X 2 :Y 3} {:X 3 :Y 4})) + (dl-bt-test-set! + "is multiply" + (dl-query + (dl-program + "n(2). n(3). n(4).\n square(X, Y) :- n(X), is(Y, *(X, X)).") + (list (quote square) (quote X) (quote Y))) + (list {:X 2 :Y 4} {:X 3 :Y 9} {:X 4 :Y 16})) + (dl-bt-test-set! + "is nested expr" + (dl-query + (dl-program + "n(1). n(2). n(3).\n f(X, Y) :- n(X), is(Y, *(+(X, 1), 2)).") + (list (quote f) (quote X) (quote Y))) + (list {:X 1 :Y 4} {:X 2 :Y 6} {:X 3 :Y 8})) + (dl-bt-test-set! + "is bound LHS — equality" + (dl-query + (dl-program + "n(1, 2). n(2, 5). n(3, 4).\n succ(X, Y) :- n(X, Y), is(Y, +(X, 1)).") + (list (quote succ) (quote X) (quote Y))) + (list {:X 1 :Y 2} {:X 3 :Y 4})) + (dl-bt-test-set! + "triple via is" + (dl-query + (dl-program + "n(1). n(2). n(3).\n triple(X, Y) :- n(X), is(Y, *(X, 3)).") + (list (quote triple) (quote X) (quote Y))) + (list {:X 1 :Y 3} {:X 2 :Y 6} {:X 3 :Y 9})) + (dl-bt-test-set! + "= unifies var with constant" + (dl-query + (dl-program "p(a). p(b).\n qual(X) :- p(X), =(X, a).") + (list (quote qual) (quote X))) + (list {:X (quote a)})) + (dl-bt-test-set! + "= unifies two vars (one bound)" + (dl-query + (dl-program "p(a). p(b).\n twin(X, Y) :- p(X), =(Y, X).") + (list (quote twin) (quote X) (quote Y))) + (list {:X (quote a) :Y (quote a)} {:X (quote b) :Y (quote b)})) + (dl-bt-test! + "big count" + (let + ((db (dl-program "n(0). n(1). n(2). n(3). n(4). n(5). n(6). n(7). n(8). n(9).\n big(X) :- n(X), >=(X, 5)."))) + (do (dl-saturate! db) (len (dl-relation db "big")))) + 5) + (dl-bt-test! + "unsafe — comparison without binder" + (dl-bt-throws? (fn () (dl-program "p(X) :- <(X, 5)."))) + true) + (dl-bt-test! + "unsafe — comparison both unbound" + (dl-bt-throws? (fn () (dl-program "p(X, Y) :- <(X, Y), q(X)."))) + true) + (dl-bt-test! + "unsafe — is uses unbound RHS var" + (dl-bt-throws? + (fn () (dl-program "p(X, Y) :- q(X), is(Y, +(X, Z))."))) + true) + (dl-bt-test! + "unsafe — is on its own" + (dl-bt-throws? (fn () (dl-program "p(Y) :- is(Y, +(X, 1))."))) + true) + (dl-bt-test! + "unsafe — = between two unbound" + (dl-bt-throws? (fn () (dl-program "p(X, Y) :- =(X, Y)."))) + true) + (dl-bt-test! + "safe — is binds head var" + (dl-bt-throws? + (fn () (dl-program "n(1). p(Y) :- n(X), is(Y, +(X, 1))."))) + false) + (dl-bt-test! + "safe — comparison after binder" + (dl-bt-throws? + (fn () (dl-program "n(1). big(X) :- n(X), >=(X, 0)."))) + false) + (dl-bt-test! + "safe — = binds head var" + (dl-bt-throws? + (fn () (dl-program "p(a). p(b). x(Y) :- p(X), =(Y, X)."))) + false)))) + +(define + dl-builtins-tests-run! + (fn + () + (do + (set! dl-bt-pass 0) + (set! dl-bt-fail 0) + (set! dl-bt-failures (list)) + (dl-bt-run-all!) + {:failures dl-bt-failures :total (+ dl-bt-pass dl-bt-fail) :passed dl-bt-pass :failed dl-bt-fail}))) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 6f74f6d6..10ff7f3f 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -107,19 +107,23 @@ Key differences from Prolog: Almost every real query needs `<`, `=`, simple arithmetic, and string comparisons in body position. These are not EDB lookups — they're constraints that filter bindings. -- [ ] Recognise built-in predicates in body: `(< X Y)`, `(<= X Y)`, `(> X Y)`, +- [x] Recognise built-in predicates in body: `(< X Y)`, `(<= X Y)`, `(> X Y)`, `(>= X Y)`, `(= X Y)`, `(!= X Y)` and arithmetic forms `(is Z (+ X Y))`, - `(is Z (- X Y))`, `(is Z (* X Y))`, `(is Z (/ X Y))`. -- [ ] Built-in evaluation: at the join step, after binding variables from - EDB lookups, evaluate built-ins as constraints. If any built-in fails - or has unbound inputs, drop the candidate substitution. -- [ ] **Safety extension**: `is` binds its left operand iff right operand is - fully ground. `(< X Y)` requires both X and Y bound by some prior body - literal — reject unsafe at `dl-add-rule!` time. -- [ ] Wire arithmetic operators through to SX numeric primitives — no - separate Datalog number tower. -- [ ] Tests: range filters, arithmetic derivations, comparison-based - queries, safety violation on `(p X) :- (< X 5).` + `(is Z (- X Y))`, `(is Z (* X Y))`, `(is Z (/ X Y))`. Live in + `lib/datalog/builtins.sx`. +- [x] `dl-eval-builtin` dispatches; `dl-eval-arith` recursively evaluates + `(+ a b)` etc. with full nesting. `=` unifies; `!=` rejects equal + ground terms. +- [x] Order-aware safety analysis (`dl-rule-check-safety`): walks body + left-to-right tracking which vars are bound. `is`'s RHS vars must + be already bound; LHS becomes bound. Comparisons require both + sides bound. `=` is special-cased — at least one side bound binds + the other. Negation vars must be bound (will be enforced fully in + Phase 7). +- [x] Wired through SX numeric primitives — no separate number tower. +- [x] Tests in `lib/datalog/tests/builtins.sx` (19): range filters, + arithmetic derivations, equality binding, eight safety violations + and three safe-shape tests. Conformance 106 / 106. ### Phase 5 — semi-naive evaluation (performance) - [ ] Delta sets: track newly derived tuples per iteration @@ -196,6 +200,19 @@ _(none yet)_ _Newest first._ +- 2026-05-07 — Phase 4 done. `lib/datalog/builtins.sx` (~280 LOC) adds + `(< X Y)`, `(<= X Y)`, `(> X Y)`, `(>= X Y)`, `(= X Y)`, `(!= X Y)`, + and `(is X expr)` with `+ - * /`. `dl-eval-builtin` dispatches; + `dl-eval-arith` recursively evaluates nested compounds. Safety + check is now order-aware — it walks body literals left-to-right + tracking the bound set, requires comparison/`is` inputs to be + already bound, and special-cases `=` (binds the var-side; both + sides must include at least one bound to bind the other). Phase 3's + simple safety check stays in db.sx as a forward-reference fallback; + builtins.sx redefines `dl-rule-check-safety` to the comprehensive + version. eval.sx's `dl-match-lit` now dispatches built-ins through + `dl-eval-builtin`. 19 builtins tests; conformance 106 / 106. + - 2026-05-07 — Phase 3 done. `lib/datalog/db.sx` (~250 LOC) holds facts indexed by relation name plus the rules list, with `dl-add-fact!` / `dl-add-rule!` (rejects non-ground facts and unsafe rules);