diff --git a/lib/datalog/aggregates.sx b/lib/datalog/aggregates.sx new file mode 100644 index 00000000..0e62e536 --- /dev/null +++ b/lib/datalog/aggregates.sx @@ -0,0 +1,157 @@ +;; lib/datalog/aggregates.sx — count / sum / min / max / findall. +;; +;; Surface form (always 3-arg after the relation name): +;; +;; (count Result Var GoalLit) +;; (sum Result Var GoalLit) +;; (min Result Var GoalLit) +;; (max Result Var GoalLit) +;; (findall List Var GoalLit) +;; +;; Parsed naturally because arg-position compounds are already allowed +;; (Phase 4 needs them for arithmetic). At evaluation time the aggregator +;; runs `dl-find-bindings` on `GoalLit` under the current subst, collects +;; the distinct values of `Var`, and binds `Result`. +;; +;; Aggregation is non-monotonic — `count(C, X, p(X))` shrinks as p loses +;; tuples. The stratifier (lib/datalog/strata.sx) treats every aggregate's +;; goal relation as a negation-like edge so the inner relation is fully +;; derived before the aggregate fires. +;; +;; Empty input: count → 0, sum → 0, min/max → no binding (rule fails). + +(define dl-aggregate-rels (list "count" "sum" "min" "max" "findall")) + +(define + dl-aggregate? + (fn + (lit) + (and + (list? lit) + (>= (len lit) 4) + (let ((rel (dl-rel-name lit))) + (cond + ((nil? rel) false) + (else (dl-member-string? rel dl-aggregate-rels))))))) + +;; Apply aggregation operator to a list of (already-distinct) numeric or +;; symbolic values. Returns the aggregated value, or :empty if min/max +;; has no input. +(define + dl-do-aggregate + (fn + (op vals) + (cond + ((= op "count") (len vals)) + ((= op "sum") (dl-sum-vals vals 0)) + ((= op "findall") vals) + ((= op "min") + (cond + ((= (len vals) 0) :empty) + (else (dl-min-vals vals 1 (first vals))))) + ((= op "max") + (cond + ((= (len vals) 0) :empty) + (else (dl-max-vals vals 1 (first vals))))) + (else (error (str "datalog: unknown aggregate " op)))))) + +(define + dl-sum-vals + (fn + (vals acc) + (cond + ((= (len vals) 0) acc) + (else (dl-sum-vals (rest vals) (+ acc (first vals))))))) + +(define + dl-min-vals + (fn + (vals i cur) + (cond + ((>= i (len vals)) cur) + (else + (let ((v (nth vals i))) + (dl-min-vals vals (+ i 1) (if (< v cur) v cur))))))) + +(define + dl-max-vals + (fn + (vals i cur) + (cond + ((>= i (len vals)) cur) + (else + (let ((v (nth vals i))) + (dl-max-vals vals (+ i 1) (if (> v cur) v cur))))))) + +;; Membership check by deep equality (so 30 == 30.0 etc). +(define + dl-val-member? + (fn + (v xs) + (cond + ((= (len xs) 0) false) + ((dl-tuple-equal? v (first xs)) true) + (else (dl-val-member? v (rest xs)))))) + +;; Evaluate an aggregate body lit under `subst`. Returns the list of +;; extended substitutions (0 or 1 element). +(define + dl-eval-aggregate + (fn + (lit db subst) + (let + ((op (dl-rel-name lit)) + (result-var (nth lit 1)) + (agg-var (nth lit 2)) + (goal (nth lit 3))) + (cond + ((not (dl-var? agg-var)) + (error (str "datalog aggregate (" op + "): second arg must be a variable, got " agg-var))) + ((not (and (list? goal) (> (len goal) 0) + (symbol? (first goal)))) + (error (str "datalog aggregate (" op + "): third arg must be a positive literal, got " + goal))) + ((not (dl-member-string? + (symbol->string agg-var) + (dl-vars-of goal))) + (error (str "datalog aggregate (" op + "): aggregation variable " agg-var + " does not appear in the goal " goal + " — without it every match contributes the same " + "(unbound) value and the result is meaningless"))) + (else + (let ((vals (list))) + (do + (for-each + (fn + (s) + (let ((v (dl-apply-subst agg-var s))) + (when (not (dl-val-member? v vals)) + (append! vals v)))) + (dl-find-bindings (list goal) db subst)) + (let ((agg-val (dl-do-aggregate op vals))) + (cond + ((= agg-val :empty) (list)) + (else + (let ((s2 (dl-unify result-var agg-val subst))) + (if (nil? s2) (list) (list s2))))))))))))) + + +;; Stratification edges from aggregates: like negation, the goal's +;; relation must be in a strictly lower stratum so that the aggregate +;; fires only after the underlying tuples are settled. +(define + dl-aggregate-dep-edge + (fn + (lit) + (cond + ((dl-aggregate? lit) + (let ((goal (nth lit 3))) + (cond + ((and (list? goal) (> (len goal) 0)) + (let ((rel (dl-rel-name goal))) + (if (nil? rel) nil {:rel rel :neg true}))) + (else nil)))) + (else nil)))) diff --git a/lib/datalog/api.sx b/lib/datalog/api.sx new file mode 100644 index 00000000..ed014f1a --- /dev/null +++ b/lib/datalog/api.sx @@ -0,0 +1,303 @@ +;; lib/datalog/api.sx — SX-data embedding API. +;; +;; Where Phase 1's `dl-program` takes a Datalog source string, +;; this module exposes a parser-free API that consumes SX data +;; directly. Two rule shapes are accepted: +;; +;; - dict: {:head :body ( ...)} +;; - list: ( <- ...) +;; — `<-` is an SX symbol used as the rule arrow. +;; +;; Examples: +;; +;; (dl-program-data +;; '((parent tom bob) (parent tom liz) (parent bob ann)) +;; '((ancestor X Y <- (parent X Y)) +;; (ancestor X Z <- (parent X Y) (ancestor Y Z)))) +;; +;; (dl-query db '(ancestor tom X)) ; same query API as before +;; +;; Variables follow the parser convention: SX symbols whose first +;; character is uppercase or `_` are variables. + +(define + dl-rule + (fn (head body) {:head head :body body})) + +(define + dl-rule-arrow? + (fn + (x) + (and (symbol? x) (= (symbol->string x) "<-")))) + +(define + dl-find-arrow + (fn + (rl i n) + (cond + ((>= i n) nil) + ((dl-rule-arrow? (nth rl i)) i) + (else (dl-find-arrow rl (+ i 1) n))))) + +;; Given a list of the form (head-elt ... <- body-lit ...) returns +;; {:head (head-elt ...) :body (body-lit ...)}. If no arrow is +;; present, the whole list is treated as the head and the body is +;; empty (i.e. a fact written rule-style). +(define + dl-rule-from-list + (fn + (rl) + (let ((n (len rl))) + (let ((idx (dl-find-arrow rl 0 n))) + (cond + ((nil? idx) {:head rl :body (list)}) + (else + (let + ((head (slice rl 0 idx)) + (body (slice rl (+ idx 1) n))) + {:head head :body body}))))))) + +;; Coerce a rule given as either a dict or a list-with-arrow to a dict. +(define + dl-coerce-rule + (fn + (r) + (cond + ((dict? r) r) + ((list? r) (dl-rule-from-list r)) + (else (error (str "dl-coerce-rule: expected dict or list, got " r)))))) + +;; Build a db from SX data lists. +(define + dl-program-data + (fn + (facts rules) + (let ((db (dl-make-db))) + (do + (for-each (fn (lit) (dl-add-fact! db lit)) facts) + (for-each + (fn (r) (dl-add-rule! db (dl-coerce-rule r))) + rules) + db)))) + +;; Add a single fact at runtime, then re-saturate the db so derived +;; tuples reflect the change. Returns the db. +(define + dl-assert! + (fn + (db lit) + (do + (dl-add-fact! db lit) + (dl-saturate! db) + db))) + +;; Remove a fact and re-saturate. Mixed relations (which have BOTH +;; user-asserted facts AND rules) are supported via :edb-keys provenance +;; — explicit facts are marked at dl-add-fact! time, the saturator uses +;; dl-add-derived! which doesn't mark them, so the retract pass can +;; safely wipe IDB-derived tuples while preserving the user's EDB. +;; +;; Effect: +;; - remove tuples matching `lit` from :facts and :edb-keys +;; - for every relation that has a rule (i.e. potentially IDB or +;; mixed), drop the IDB-derived portion (anything not in :edb-keys) +;; so the saturator can re-derive cleanly +;; - re-saturate +(define + dl-retract! + (fn + (db lit) + (let + ((rel-key (dl-rel-name lit))) + (do + ;; Drop the matching tuple from its relation list, its facts-keys, + ;; its first-arg index, AND from :edb-keys (if present). + (when + (has-key? (get db :facts) rel-key) + (let + ((existing (get (get db :facts) rel-key)) + (kept (list)) + (kept-keys {}) + (kept-index {}) + (edb-rel (cond + ((has-key? (get db :edb-keys) rel-key) + (get (get db :edb-keys) rel-key)) + (else nil))) + (kept-edb {})) + (do + (for-each + (fn + (t) + (when + (not (dl-tuple-equal? t lit)) + (do + (append! kept t) + (let ((tk (dl-tuple-key t))) + (do + (dict-set! kept-keys tk true) + (when + (and (not (nil? edb-rel)) + (has-key? edb-rel tk)) + (dict-set! kept-edb tk true)))) + (when + (>= (len t) 2) + (let ((k (dl-arg-key (nth t 1)))) + (do + (when + (not (has-key? kept-index k)) + (dict-set! kept-index k (list))) + (append! (get kept-index k) t))))))) + existing) + (dict-set! (get db :facts) rel-key kept) + (dict-set! (get db :facts-keys) rel-key kept-keys) + (dict-set! (get db :facts-index) rel-key kept-index) + (when + (not (nil? edb-rel)) + (dict-set! (get db :edb-keys) rel-key kept-edb))))) + ;; For each rule-head relation, strip the IDB-derived tuples + ;; (anything not marked in :edb-keys) so the saturator can + ;; cleanly re-derive without leaving stale tuples that depended + ;; on the now-removed fact. + (let ((rule-heads (dl-rule-head-rels db))) + (for-each + (fn + (k) + (when + (has-key? (get db :facts) k) + (let + ((existing (get (get db :facts) k)) + (kept (list)) + (kept-keys {}) + (kept-index {}) + (edb-rel (cond + ((has-key? (get db :edb-keys) k) + (get (get db :edb-keys) k)) + (else {})))) + (do + (for-each + (fn + (t) + (let ((tk (dl-tuple-key t))) + (when + (has-key? edb-rel tk) + (do + (append! kept t) + (dict-set! kept-keys tk true) + (when + (>= (len t) 2) + (let ((kk (dl-arg-key (nth t 1)))) + (do + (when + (not (has-key? kept-index kk)) + (dict-set! kept-index kk (list))) + (append! (get kept-index kk) t)))))))) + existing) + (dict-set! (get db :facts) k kept) + (dict-set! (get db :facts-keys) k kept-keys) + (dict-set! (get db :facts-index) k kept-index))))) + rule-heads)) + (dl-saturate! db) + db)))) + +;; ── Convenience: single-call source + query ─────────────────── +;; (dl-eval source query-source) parses both, builds a db, saturates, +;; runs the query, returns the substitution list. The query source +;; should be `?- goal[, goal ...].` — the parser produces a clause +;; with :query containing a list of literals which is fed straight +;; to dl-query. +(define + dl-eval + (fn + (source query-source) + (let + ((db (dl-program source)) + (queries (dl-parse query-source))) + (cond + ((= (len queries) 0) (error "dl-eval: query string is empty")) + ((not (has-key? (first queries) :query)) + (error "dl-eval: second arg must be a `?- ...` query clause")) + (else + (dl-query db (get (first queries) :query))))))) + +;; (dl-eval-magic source query-source) — like dl-eval but routes a +;; single-positive-literal query through `dl-magic-query` for goal- +;; directed evaluation. Multi-literal query bodies fall back to the +;; standard dl-query path (magic-sets is currently only wired for +;; single-positive goals). The caller's source is parsed afresh +;; each call so successive invocations are independent. +(define + dl-eval-magic + (fn + (source query-source) + (let + ((db (dl-program source)) + (queries (dl-parse query-source))) + (cond + ((= (len queries) 0) (error "dl-eval-magic: query string is empty")) + ((not (has-key? (first queries) :query)) + (error + "dl-eval-magic: second arg must be a `?- ...` query clause")) + (else + (let + ((qbody (get (first queries) :query))) + (cond + ((and (= (len qbody) 1) + (list? (first qbody)) + (> (len (first qbody)) 0) + (symbol? (first (first qbody)))) + (dl-magic-query db (first qbody))) + (else (dl-query db qbody))))))))) + +;; List rules whose head's relation matches `rel-name`. Useful for +;; inspection ("show me how this relation is derived") without +;; exposing the internal `:rules` list. +(define + dl-rules-of + (fn + (db rel-name) + (let ((out (list))) + (do + (for-each + (fn + (rule) + (when + (= (dl-rel-name (get rule :head)) rel-name) + (append! out rule))) + (dl-rules db)) + out)))) + +(define + dl-rule-head-rels + (fn + (db) + (let ((seen (list))) + (do + (for-each + (fn + (rule) + (let ((h (dl-rel-name (get rule :head)))) + (when + (and (not (nil? h)) (not (dl-member-string? h seen))) + (append! seen h)))) + (dl-rules db)) + seen)))) + +;; Wipe every relation that has at least one rule (i.e. every IDB +;; relation) — leaves EDB facts and rule definitions intact. Useful +;; before a follow-up `dl-saturate!` if you want a clean restart, or +;; for inspection of the EDB-only baseline. +(define + dl-clear-idb! + (fn + (db) + (let ((rule-heads (dl-rule-head-rels db))) + (do + (for-each + (fn + (k) + (do + (dict-set! (get db :facts) k (list)) + (dict-set! (get db :facts-keys) k {}) + (dict-set! (get db :facts-index) k {}))) + rule-heads) + db)))) diff --git a/lib/datalog/builtins.sx b/lib/datalog/builtins.sx new file mode 100644 index 00000000..9989c1ad --- /dev/null +++ b/lib/datalog/builtins.sx @@ -0,0 +1,406 @@ +;; lib/datalog/builtins.sx — comparison + arithmetic body literals. +;; +;; Built-in predicates filter / extend candidate substitutions during +;; rule evaluation. They are not stored facts and do not participate in +;; the Herbrand base. +;; +;; (< a b) (<= a b) (> a b) (>= a b) ; numeric (or string) compare +;; (= a b) ; unify (binds vars) +;; (!= a b) ; ground-only inequality +;; (is X expr) ; bind X to expr's value +;; +;; Arithmetic expressions are SX-list compounds: +;; (+ a b) (- a b) (* a b) (/ a b) +;; or numbers / variables (must be bound at evaluation time). + +(define + dl-comparison? + (fn + (lit) + (and + (list? lit) + (> (len lit) 0) + (let + ((rel (dl-rel-name lit))) + (cond + ((nil? rel) false) + (else (dl-member-string? rel (list "<" "<=" ">" ">=" "!=")))))))) + +(define + dl-eq? + (fn + (lit) + (and + (list? lit) + (> (len lit) 0) + (let ((rel (dl-rel-name lit))) (and (not (nil? rel)) (= rel "=")))))) + +(define + dl-is? + (fn + (lit) + (and + (list? lit) + (> (len lit) 0) + (let + ((rel (dl-rel-name lit))) + (and (not (nil? rel)) (= rel "is")))))) + +;; Evaluate an arithmetic expression under subst. Returns the numeric +;; result, or raises if any operand is unbound or non-numeric. +(define + dl-eval-arith + (fn + (expr subst) + (let + ((w (dl-walk expr subst))) + (cond + ((number? w) w) + ((dl-var? w) + (error (str "datalog arith: unbound variable " (symbol->string w)))) + ((list? w) + (let + ((rel (dl-rel-name w)) (args (rest w))) + (cond + ((not (= (len args) 2)) + (error (str "datalog arith: need 2 args, got " w))) + (else + (let + ((a (dl-eval-arith (first args) subst)) + (b (dl-eval-arith (nth args 1) subst))) + (cond + ((= rel "+") (+ a b)) + ((= rel "-") (- a b)) + ((= rel "*") (* a b)) + ((= rel "/") + (cond + ((= b 0) + (error + (str "datalog arith: division by zero in " + w))) + (else (/ a b)))) + (else (error (str "datalog arith: unknown op " rel))))))))) + (else (error (str "datalog arith: not a number — " w))))))) + +;; Comparable types — both operands must be the same primitive type +;; (both numbers, both strings). `!=` is the exception: it's defined +;; for any pair (returns true iff not equal) since dl-tuple-equal? +;; handles type-mixed comparisons. +(define + dl-compare-typeok? + (fn + (rel a b) + (cond + ((= rel "!=") true) + ((and (number? a) (number? b)) true) + ((and (string? a) (string? b)) true) + (else false)))) + +(define + dl-eval-compare + (fn + (lit subst) + (let + ((rel (dl-rel-name lit)) + (a (dl-walk (nth lit 1) subst)) + (b (dl-walk (nth lit 2) subst))) + (cond + ((or (dl-var? a) (dl-var? b)) + (error + (str + "datalog: comparison " + rel + " has unbound argument; " + "ensure prior body literal binds the variable"))) + ((not (dl-compare-typeok? rel a b)) + (error + (str "datalog: comparison " rel " requires same-type " + "operands (both numbers or both strings), got " + a " and " b))) + (else + (let + ((ok (cond ((= rel "<") (< a b)) ((= rel "<=") (<= a b)) ((= rel ">") (> a b)) ((= rel ">=") (>= a b)) ((= rel "!=") (not (dl-tuple-equal? a b))) (else (error (str "datalog: unknown compare " rel)))))) + (if ok subst nil))))))) + +(define + dl-eval-eq + (fn + (lit subst) + (dl-unify (nth lit 1) (nth lit 2) subst))) + +(define + dl-eval-is + (fn + (lit subst) + (let + ((target (nth lit 1)) (expr (nth lit 2))) + (let + ((value (dl-eval-arith expr subst))) + (dl-unify target value subst))))) + +(define + dl-eval-builtin + (fn + (lit subst) + (cond + ((dl-comparison? lit) (dl-eval-compare lit subst)) + ((dl-eq? lit) (dl-eval-eq lit subst)) + ((dl-is? lit) (dl-eval-is lit subst)) + (else (error (str "dl-eval-builtin: not a built-in: " lit)))))) + +;; ── Safety analysis ────────────────────────────────────────────── +;; +;; Walks body literals left-to-right tracking a "bound" set. The check +;; understands these literal kinds: +;; +;; positive non-built-in → adds its vars to bound +;; (is X expr) → vars(expr) ⊆ bound, then add X (if var) +;; <,<=,>,>=,!= → all vars ⊆ bound (no binding) +;; (= a b) where: +;; both non-vars → constraint check, no binding +;; a var, b not → bind a +;; b var, a not → bind b +;; both vars → at least one in bound; bind the other +;; {:neg lit} → all vars ⊆ bound (Phase 7 enforces fully) +;; +;; At end, head vars (minus `_`) must be ⊆ bound. + +(define + dl-vars-not-in + (fn + (vs bound) + (let + ((out (list))) + (do + (for-each + (fn + (v) + (when (not (dl-member-string? v bound)) (append! out v))) + vs) + out)))) + +;; Filter a list of variable-name strings to exclude anonymous-renamed +;; vars (`_` in source → `_anon*` by dl-rename-anon-term). Used by +;; the negation safety check, where anonymous vars are existential +;; within the negated literal. +(define + dl-non-anon-vars + (fn + (vs) + (let + ((out (list))) + (do + (for-each + (fn + (v) + (when + (not (and (>= (len v) 5) + (= (slice v 0 5) "_anon"))) + (append! out v))) + vs) + out)))) + +(define + dl-rule-check-safety + (fn + (rule) + (let + ((head (get rule :head)) + (body (get rule :body)) + (bound (list)) + (err nil)) + (do + (define + dl-add-bound! + (fn + (vs) + (for-each + (fn + (v) + (when (not (dl-member-string? v bound)) (append! bound v))) + vs))) + (define + dl-process-eq! + (fn + (lit) + (let + ((a (nth lit 1)) (b (nth lit 2))) + (let + ((va (dl-var? a)) (vb (dl-var? b))) + (cond + ((and (not va) (not vb)) nil) + ((and va (not vb)) + (dl-add-bound! (list (symbol->string a)))) + ((and (not va) vb) + (dl-add-bound! (list (symbol->string b)))) + (else + (let + ((sa (symbol->string a)) (sb (symbol->string b))) + (cond + ((dl-member-string? sa bound) + (dl-add-bound! (list sb))) + ((dl-member-string? sb bound) + (dl-add-bound! (list sa))) + (else + (set! + err + (str + "= between two unbound variables " + (list sa sb) + " — at least one must be bound by an " + "earlier positive body literal"))))))))))) + (define + dl-process-cmp! + (fn + (lit) + (let + ((needed (dl-vars-of (list (nth lit 1) (nth lit 2))))) + (let + ((missing (dl-vars-not-in needed bound))) + (when + (> (len missing) 0) + (set! + err + (str + "comparison " + (dl-rel-name lit) + " requires bound variable(s) " + missing + " (must be bound by an earlier positive " + "body literal)"))))))) + (define + dl-process-is! + (fn + (lit) + (let + ((tgt (nth lit 1)) (expr (nth lit 2))) + (let + ((needed (dl-vars-of expr))) + (let + ((missing (dl-vars-not-in needed bound))) + (cond + ((> (len missing) 0) + (set! + err + (str + "is RHS uses unbound variable(s) " + missing + " — bind them via a prior positive body " + "literal"))) + (else + (when + (dl-var? tgt) + (dl-add-bound! (list (symbol->string tgt))))))))))) + (define + dl-process-neg! + (fn + (lit) + (let + ((inner (get lit :neg))) + (let + ((inner-rn + (cond + ((and (list? inner) (> (len inner) 0)) + (dl-rel-name inner)) + (else nil))) + ;; Anonymous variables (`_` in source → `_anon*` after + ;; renaming) are existentially quantified within the + ;; negated literal — they don't need to be bound by + ;; an earlier body lit, since `not p(X, _)` is a + ;; valid idiom for "no Y exists s.t. p(X, Y)". Filter + ;; them out of the safety check. + (needed (dl-non-anon-vars (dl-vars-of inner))) + (missing (dl-vars-not-in needed bound))) + (cond + ((and (not (nil? inner-rn)) (dl-reserved-rel? inner-rn)) + (set! err + (str "negated literal uses reserved name '" + inner-rn + "' — nested `not(...)` / negated built-ins are " + "not supported; introduce an intermediate " + "relation and negate that"))) + ((> (len missing) 0) + (set! err + (str "negation refers to unbound variable(s) " + missing + " — they must be bound by an earlier " + "positive body literal")))))))) + (define + dl-process-agg! + (fn + (lit) + (let + ((result-var (nth lit 1))) + ;; Aggregate goal vars are existentially quantified within + ;; the aggregate; nothing required from outer context. The + ;; result var becomes bound after the aggregate fires. + (when + (dl-var? result-var) + (dl-add-bound! (list (symbol->string result-var))))))) + + (define + dl-process-lit! + (fn + (lit) + (when + (nil? err) + (cond + ((and (dict? lit) (has-key? lit :neg)) + (dl-process-neg! lit)) + ;; A bare dict that is not a recognised negation is + ;; almost certainly a typo (e.g. `{:negs ...}` instead + ;; of `{:neg ...}`). Without this guard the dict would + ;; silently fall through every clause; the head safety + ;; check would then flag the head variables as unbound + ;; even though the real bug is the malformed body lit. + ((dict? lit) + (set! err + (str "body literal is a dict but lacks :neg — " + "the only dict-shaped body lit recognised is " + "{:neg } for stratified " + "negation, got " lit))) + ((dl-aggregate? lit) (dl-process-agg! lit)) + ((dl-eq? lit) (dl-process-eq! lit)) + ((dl-is? lit) (dl-process-is! lit)) + ((dl-comparison? lit) (dl-process-cmp! lit)) + ((and (list? lit) (> (len lit) 0)) + (let ((rn (dl-rel-name lit))) + (cond + ((and (not (nil? rn)) (dl-reserved-rel? rn)) + (set! err + (str "body literal uses reserved name '" rn + "' — built-ins / aggregates have their own " + "syntax; nested `not(...)` is not supported " + "(use stratified negation via an " + "intermediate relation)"))) + (else (dl-add-bound! (dl-vars-of lit)))))) + (else + ;; Anything that's not a dict, not a list, or an + ;; empty list. Numbers / strings / symbols as body + ;; lits don't make sense — surface the type. + (set! err + (str "body literal must be a positive lit, " + "built-in, aggregate, or {:neg ...} dict, " + "got " lit))))))) + (for-each dl-process-lit! body) + (when + (nil? err) + (let + ((head-vars (dl-vars-of head)) (missing (list))) + (do + (for-each + (fn + (v) + (when + (and (not (dl-member-string? v bound)) (not (= v "_"))) + (append! missing v))) + head-vars) + (when + (> (len missing) 0) + (set! + err + (str + "head variable(s) " + missing + " do not appear in any positive body literal")))))) + err)))) diff --git a/lib/datalog/conformance.conf b/lib/datalog/conformance.conf new file mode 100644 index 00000000..a6a0c747 --- /dev/null +++ b/lib/datalog/conformance.conf @@ -0,0 +1,32 @@ +# Datalog conformance config — sourced by lib/guest/conformance.sh. + +LANG_NAME=datalog +MODE=dict + +PRELOADS=( + lib/datalog/tokenizer.sx + lib/datalog/parser.sx + lib/datalog/unify.sx + lib/datalog/db.sx + lib/datalog/builtins.sx + lib/datalog/aggregates.sx + lib/datalog/strata.sx + lib/datalog/eval.sx + lib/datalog/api.sx + lib/datalog/magic.sx + lib/datalog/demo.sx +) + +SUITES=( + "tokenize:lib/datalog/tests/tokenize.sx:(dl-tokenize-tests-run!)" + "parse:lib/datalog/tests/parse.sx:(dl-parse-tests-run!)" + "unify:lib/datalog/tests/unify.sx:(dl-unify-tests-run!)" + "eval:lib/datalog/tests/eval.sx:(dl-eval-tests-run!)" + "builtins:lib/datalog/tests/builtins.sx:(dl-builtins-tests-run!)" + "semi_naive:lib/datalog/tests/semi_naive.sx:(dl-semi-naive-tests-run!)" + "negation:lib/datalog/tests/negation.sx:(dl-negation-tests-run!)" + "aggregates:lib/datalog/tests/aggregates.sx:(dl-aggregates-tests-run!)" + "api:lib/datalog/tests/api.sx:(dl-api-tests-run!)" + "magic:lib/datalog/tests/magic.sx:(dl-magic-tests-run!)" + "demo:lib/datalog/tests/demo.sx:(dl-demo-tests-run!)" +) diff --git a/lib/datalog/conformance.sh b/lib/datalog/conformance.sh new file mode 100755 index 00000000..0b4a0e13 --- /dev/null +++ b/lib/datalog/conformance.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# Thin wrapper — see lib/guest/conformance.sh and lib/datalog/conformance.conf. +exec bash "$(dirname "$0")/../guest/conformance.sh" "$(dirname "$0")/conformance.conf" "$@" diff --git a/lib/datalog/datalog.sx b/lib/datalog/datalog.sx new file mode 100644 index 00000000..54e2ce7d --- /dev/null +++ b/lib/datalog/datalog.sx @@ -0,0 +1,97 @@ +;; lib/datalog/datalog.sx — public API documentation index. +;; +;; This file is reference-only — `load` is an epoch-protocol command, +;; not an SX function, so it cannot reload a list of files from inside +;; another `.sx` file. To set up a fresh sx_server session with all +;; modules in scope, issue these loads in order: +;; +;; (load "lib/datalog/tokenizer.sx") +;; (load "lib/datalog/parser.sx") +;; (load "lib/datalog/unify.sx") +;; (load "lib/datalog/db.sx") +;; (load "lib/datalog/builtins.sx") +;; (load "lib/datalog/aggregates.sx") +;; (load "lib/datalog/strata.sx") +;; (load "lib/datalog/eval.sx") +;; (load "lib/datalog/api.sx") +;; (load "lib/datalog/magic.sx") +;; (load "lib/datalog/demo.sx") +;; +;; (lib/datalog/conformance.sh runs this load list automatically.) +;; +;; ── Public API surface ───────────────────────────────────────────── +;; +;; Source / data: +;; (dl-tokenize "src") → token list +;; (dl-parse "src") → parsed clauses +;; (dl-program "src") → db built from a source string +;; (dl-program-data facts rules) → db from SX data lists; rules +;; accept either dict form or +;; list form with `<-` arrow +;; +;; Construction (mutates db): +;; (dl-make-db) empty db +;; (dl-add-fact! db lit) rejects non-ground +;; (dl-add-rule! db rule) rejects unsafe rules +;; (dl-rule head body) dict-rule constructor +;; (dl-add-clause! db clause) parser output → fact or rule +;; (dl-load-program! db src) string source +;; (dl-set-strategy! db strategy) :semi-naive default; :magic +;; is informational, use +;; dl-magic-query for actual +;; magic-sets evaluation +;; +;; Mutation: +;; (dl-assert! db lit) add + re-saturate +;; (dl-retract! db lit) drop EDB, wipe IDB, re-saturate +;; (dl-clear-idb! db) wipe rule-headed relations +;; +;; Query / inspection: +;; (dl-saturate! db) stratified semi-naive default +;; (dl-saturate-naive! db) reference (slow on chains) +;; (dl-saturate-rules! db rules) per-rule-set semi-naive worker +;; (dl-query db goal) list of substitution dicts +;; (dl-relation db rel-name) tuple list for a relation +;; (dl-rules db) rule list +;; (dl-fact-count db) total ground tuples +;; (dl-summary db) {: count} for inspection +;; +;; Single-call convenience: +;; (dl-eval source query-source) parse, run, return substs +;; (dl-eval-magic source query-source) single-goal → magic-sets +;; +;; Magic-sets (lib/datalog/magic.sx): +;; (dl-adorn-goal goal) "b/f" adornment string +;; (dl-rule-sips rule head-adn) SIPS analysis per body lit +;; (dl-magic-rewrite rules rel adn args) +;; rewritten rule list + seed +;; (dl-magic-query db query-goal) end-to-end magic-sets query +;; +;; ── Body literal kinds ───────────────────────────────────────────── +;; +;; Positive (rel arg ... arg) +;; Negation {:neg (rel arg ...)} +;; Comparison (< X Y), (<= X Y), (> X Y), (>= X Y), +;; (= X Y), (!= X Y) +;; Arithmetic (is Z (+ X Y)) and (- * /) +;; Aggregation (count R V Goal), (sum R V Goal), +;; (min R V Goal), (max R V Goal), +;; (findall L V Goal) +;; +;; ── Variable conventions ─────────────────────────────────────────── +;; +;; Variables: SX symbols whose first char is uppercase A–Z or '_'. +;; Anonymous '_' is renamed to a fresh _anon per occurrence at +;; rule/query load time so multiple '_' don't unify. +;; +;; ── Demo programs ────────────────────────────────────────────────── +;; +;; See lib/datalog/demo.sx — federation, content, permissions, and +;; the canonical "cooking posts by people I follow (transitively)" +;; example. +;; +;; ── Status ───────────────────────────────────────────────────────── +;; +;; See plans/datalog-on-sx.md — phase-by-phase progress log and +;; roadmap. Run `bash lib/datalog/conformance.sh` to refresh +;; `lib/datalog/scoreboard.{json,md}`. diff --git a/lib/datalog/db.sx b/lib/datalog/db.sx new file mode 100644 index 00000000..93544c7e --- /dev/null +++ b/lib/datalog/db.sx @@ -0,0 +1,575 @@ +;; lib/datalog/db.sx — Datalog database (EDB + IDB + rules) + safety hook. +;; +;; A db is a mutable dict: +;; {:facts { -> (literal ...)} +;; :rules ({:head literal :body (literal ...)} ...)} +;; +;; Facts are stored as full literals `(rel arg ... arg)` so they unify +;; directly against rule body literals. Each relation's tuple list is +;; deduplicated on insert. +;; +;; Phase 3 introduced safety analysis for head variables; Phase 4 (in +;; lib/datalog/builtins.sx) swaps in the real `dl-rule-check-safety`, +;; which is order-aware and understands built-in predicates. + +(define + dl-make-db + (fn () + {:facts {} + :facts-keys {} + :facts-index {} + :edb-keys {} + :rules (list) + :strategy :semi-naive})) + +;; Record (rel-key, tuple-key) as user-asserted EDB. dl-add-fact! calls +;; this when an explicit fact is added; the saturator (which uses +;; dl-add-derived!) does NOT, so derived tuples never appear here. +;; dl-retract! consults :edb-keys to know which tuples must survive +;; the wipe-and-resaturate round-trip. +(define + dl-mark-edb! + (fn + (db rel-key tk) + (let + ((edb (get db :edb-keys))) + (do + (when + (not (has-key? edb rel-key)) + (dict-set! edb rel-key {})) + (dict-set! (get edb rel-key) tk true))))) + +(define + dl-edb-fact? + (fn + (db rel-key tk) + (let + ((edb (get db :edb-keys))) + (and (has-key? edb rel-key) + (has-key? (get edb rel-key) tk))))) + +;; Evaluation strategy. Default :semi-naive (used by dl-saturate!). +;; :naive selects dl-saturate-naive! (slower but easier to reason +;; about). :magic is a marker — goal-directed magic-sets evaluation +;; is invoked separately via `dl-magic-query`; setting :magic here +;; is purely informational. Any other value is rejected so typos +;; don't silently fall back to the default. +(define + dl-strategy-values + (list :semi-naive :naive :magic)) + +(define + dl-set-strategy! + (fn + (db strategy) + (cond + ((not (dl-keyword-member? strategy dl-strategy-values)) + (error (str "dl-set-strategy!: unknown strategy " strategy + " — must be one of " dl-strategy-values))) + (else + (do + (dict-set! db :strategy strategy) + db))))) + +(define + dl-keyword-member? + (fn + (k xs) + (cond + ((= (len xs) 0) false) + ((= k (first xs)) true) + (else (dl-keyword-member? k (rest xs)))))) + +(define + dl-get-strategy + (fn + (db) + (if (has-key? db :strategy) (get db :strategy) :semi-naive))) + +(define + dl-rel-name + (fn + (lit) + (cond + ((and (dict? lit) (has-key? lit :neg)) (dl-rel-name (get lit :neg))) + ((and (list? lit) (> (len lit) 0) (symbol? (first lit))) + (symbol->string (first lit))) + (else nil)))) + +(define dl-builtin-rels (list "<" "<=" ">" ">=" "=" "!=" "is")) + +(define + dl-member-string? + (fn + (s xs) + (cond + ((= (len xs) 0) false) + ((= (first xs) s) true) + (else (dl-member-string? s (rest xs)))))) + +(define + dl-builtin? + (fn + (lit) + (and + (list? lit) + (> (len lit) 0) + (let + ((rel (dl-rel-name lit))) + (cond + ((nil? rel) false) + (else (dl-member-string? rel dl-builtin-rels))))))) + +(define + dl-positive-lit? + (fn + (lit) + (cond + ((and (dict? lit) (has-key? lit :neg)) false) + ((dl-builtin? lit) false) + ((and (list? lit) (> (len lit) 0)) true) + (else false)))) + +(define + dl-tuple-equal? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-tuple-equal-list? a b 0))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-tuple-equal-list? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-tuple-equal? (nth a i) (nth b i))) false) + (else (dl-tuple-equal-list? a b (+ i 1)))))) + +(define + dl-tuple-member? + (fn + (lit lits) + (dl-tuple-member-aux? lit lits 0 (len lits)))) + +(define + dl-tuple-member-aux? + (fn + (lit lits i n) + (cond + ((>= i n) false) + ((dl-tuple-equal? lit (nth lits i)) true) + (else (dl-tuple-member-aux? lit lits (+ i 1) n))))) + +(define + dl-ensure-rel! + (fn + (db rel-key) + (let + ((facts (get db :facts)) + (fk (get db :facts-keys)) + (fi (get db :facts-index))) + (do + (when + (not (has-key? facts rel-key)) + (dict-set! facts rel-key (list))) + (when + (not (has-key? fk rel-key)) + (dict-set! fk rel-key {})) + (when + (not (has-key? fi rel-key)) + (dict-set! fi rel-key {})) + (get facts rel-key))))) + +;; First-arg index helpers. Tuples are keyed by their first-after-rel +;; arg's `(str ...)`; when that arg is a constant, dl-match-positive +;; uses the index instead of scanning the full relation. +(define + dl-arg-key + (fn + (v) + (str v))) + +(define + dl-index-add! + (fn + (db rel-key lit) + (let + ((idx (get db :facts-index)) + (n (len lit))) + (when + (and (>= n 2) (has-key? idx rel-key)) + (let + ((rel-idx (get idx rel-key)) + (k (dl-arg-key (nth lit 1)))) + (do + (when + (not (has-key? rel-idx k)) + (dict-set! rel-idx k (list))) + (append! (get rel-idx k) lit))))))) + +(define + dl-index-lookup + (fn + (db rel-key arg-val) + (let + ((idx (get db :facts-index))) + (cond + ((not (has-key? idx rel-key)) (list)) + (else + (let ((rel-idx (get idx rel-key)) + (k (dl-arg-key arg-val))) + (if (has-key? rel-idx k) (get rel-idx k) (list)))))))) + +(define dl-tuple-key (fn (lit) (str lit))) + +(define + dl-rel-tuples + (fn + (db rel-key) + (let + ((facts (get db :facts))) + (if (has-key? facts rel-key) (get facts rel-key) (list))))) + +;; Reserved relation names: built-in / aggregate / negation / arrow. +;; Rules and facts may not have these as their head's relation, since +;; the saturator treats them specially or they are not relation names +;; at all. +(define + dl-reserved-rel-names + (list "not" "count" "sum" "min" "max" "findall" "is" + "<" "<=" ">" ">=" "=" "!=" "+" "-" "*" "/" ":-" "?-")) + +(define + dl-reserved-rel? + (fn + (name) (dl-member-string? name dl-reserved-rel-names))) + +;; Internal: append a derived tuple to :facts without the public +;; validation pass and without marking :edb-keys. Used by the saturator +;; (eval.sx) and magic-sets (magic.sx). Returns true if the tuple was +;; new, false if already present. +(define + dl-add-derived! + (fn + (db lit) + (let + ((rel-key (dl-rel-name lit))) + (let + ((tuples (dl-ensure-rel! db rel-key)) + (key-dict (get (get db :facts-keys) rel-key)) + (tk (dl-tuple-key lit))) + (cond + ((has-key? key-dict tk) false) + (else + (do + (dict-set! key-dict tk true) + (append! tuples lit) + (dl-index-add! db rel-key lit) + true))))))) + +;; A simple term — number, string, or symbol — i.e. anything legal +;; as an EDB fact arg. Compound (list) args belong only in body +;; literals where they encode arithmetic / aggregate sub-goals. +(define + dl-simple-term? + (fn + (term) + (or (number? term) (string? term) (symbol? term)))) + +(define + dl-args-simple? + (fn + (lit i n) + (cond + ((>= i n) true) + ((not (dl-simple-term? (nth lit i))) false) + (else (dl-args-simple? lit (+ i 1) n))))) + +(define + dl-add-fact! + (fn + (db lit) + (cond + ((not (and (list? lit) (> (len lit) 0))) + (error (str "dl-add-fact!: expected literal list, got " lit))) + ((dl-reserved-rel? (dl-rel-name lit)) + (error (str "dl-add-fact!: '" (dl-rel-name lit) + "' is a reserved name (built-in / aggregate / negation)"))) + ((not (dl-args-simple? lit 1 (len lit))) + (error (str "dl-add-fact!: fact args must be numbers, strings, " + "or symbols — compound args (e.g. arithmetic " + "expressions) are body-only and aren't evaluated " + "in fact position. got " lit))) + ((not (dl-ground? lit (dl-empty-subst))) + (error (str "dl-add-fact!: expected ground literal, got " lit))) + (else + (let + ((rel-key (dl-rel-name lit)) (tk (dl-tuple-key lit))) + (do + ;; Always mark EDB origin — even if the tuple key was already + ;; present (e.g. previously derived), so an explicit assert + ;; promotes it to EDB and protects it from the IDB wipe. + (dl-mark-edb! db rel-key tk) + (dl-add-derived! db lit))))))) + +;; The full safety check lives in builtins.sx (it has to know which +;; predicates are built-ins). dl-add-rule! calls it via forward +;; reference; load builtins.sx alongside db.sx in any setup that +;; adds rules. The fallback below is used if builtins.sx isn't loaded. +(define + dl-rule-check-safety + (fn + (rule) + (let + ((head-vars (dl-vars-of (get rule :head))) (body-vars (list))) + (do + (for-each + (fn + (lit) + (when + (and + (list? lit) + (> (len lit) 0) + (not (and (dict? lit) (has-key? lit :neg)))) + (for-each + (fn + (v) + (when + (not (dl-member-string? v body-vars)) + (append! body-vars v))) + (dl-vars-of lit)))) + (get rule :body)) + (let + ((missing (list))) + (do + (for-each + (fn + (v) + (when + (and + (not (dl-member-string? v body-vars)) + (not (= v "_"))) + (append! missing v))) + head-vars) + (cond + ((> (len missing) 0) + (str + "head variable(s) " + missing + " do not appear in any body literal")) + (else nil)))))))) + +(define + dl-rename-anon-term + (fn + (term next-name) + (cond + ((and (symbol? term) (= (symbol->string term) "_")) + (next-name)) + ((list? term) + (map (fn (x) (dl-rename-anon-term x next-name)) term)) + (else term)))) + +(define + dl-rename-anon-lit + (fn + (lit next-name) + (cond + ((and (dict? lit) (has-key? lit :neg)) + {:neg (dl-rename-anon-term (get lit :neg) next-name)}) + ((list? lit) (dl-rename-anon-term lit next-name)) + (else lit)))) + +(define + dl-make-anon-renamer + (fn + (start) + (let ((counter start)) + (fn () (do (set! counter (+ counter 1)) + (string->symbol (str "_anon" counter))))))) + +;; Scan a rule for variables already named `_anon` (which would +;; otherwise collide with the renamer's output). Returns the max N +;; seen, or 0 if none. The renamer then starts at that max + 1, so +;; freshly-introduced anonymous names can't shadow a user-written +;; `_anon` symbol. +(define + dl-max-anon-num + (fn + (term acc) + (cond + ((symbol? term) + (let ((s (symbol->string term))) + (cond + ((and (>= (len s) 6) (= (slice s 0 5) "_anon")) + (let ((n (dl-try-parse-int (slice s 5 (len s))))) + (cond + ((nil? n) acc) + ((> n acc) n) + (else acc)))) + (else acc)))) + ((dict? term) + (cond + ((has-key? term :neg) + (dl-max-anon-num (get term :neg) acc)) + (else acc))) + ((list? term) (dl-max-anon-num-list term acc 0)) + (else acc)))) + +(define + dl-max-anon-num-list + (fn + (xs acc i) + (cond + ((>= i (len xs)) acc) + (else + (dl-max-anon-num-list xs (dl-max-anon-num (nth xs i) acc) (+ i 1)))))) + +;; Cheap "is this string a decimal int" check. Returns the number or +;; nil. Avoids relying on host parse-number, which on non-int strings +;; might raise rather than return nil. +(define + dl-try-parse-int + (fn + (s) + (cond + ((= (len s) 0) nil) + ((not (dl-all-digits? s 0 (len s))) nil) + (else (parse-number s))))) + +(define + dl-all-digits? + (fn + (s i n) + (cond + ((>= i n) true) + ((let ((c (slice s i (+ i 1)))) + (not (and (>= c "0") (<= c "9")))) + false) + (else (dl-all-digits? s (+ i 1) n))))) + +(define + dl-rename-anon-rule + (fn + (rule) + (let + ((start (dl-max-anon-num (get rule :head) + (dl-max-anon-num-list (get rule :body) 0 0)))) + (let ((next-name (dl-make-anon-renamer start))) + {:head (dl-rename-anon-term (get rule :head) next-name) + :body (map (fn (lit) (dl-rename-anon-lit lit next-name)) + (get rule :body))})))) + +(define + dl-add-rule! + (fn + (db rule) + (cond + ((not (dict? rule)) + (error (str "dl-add-rule!: expected rule dict, got " rule))) + ((not (has-key? rule :head)) + (error (str "dl-add-rule!: rule missing :head, got " rule))) + ((not (and (list? (get rule :head)) + (> (len (get rule :head)) 0) + (symbol? (first (get rule :head))))) + (error (str "dl-add-rule!: head must be a non-empty list " + "starting with a relation-name symbol, got " + (get rule :head)))) + ((not (dl-args-simple? (get rule :head) 1 (len (get rule :head)))) + (error (str "dl-add-rule!: rule head args must be variables or " + "constants — compound terms (e.g. `(*(X, 2))`) are " + "not legal in head position; introduce an `is`-bound " + "intermediate in the body. got " (get rule :head)))) + ((not (list? (if (has-key? rule :body) (get rule :body) (list)))) + (error (str "dl-add-rule!: body must be a list of literals, got " + (get rule :body)))) + ((dl-reserved-rel? (dl-rel-name (get rule :head))) + (error (str "dl-add-rule!: '" (dl-rel-name (get rule :head)) + "' is a reserved name (built-in / aggregate / negation)"))) + (else + (let ((rule (dl-rename-anon-rule rule))) + (let + ((err (dl-rule-check-safety rule))) + (cond + ((not (nil? err)) (error (str "dl-add-rule!: " err))) + (else + (let + ((rules (get db :rules))) + (do (append! rules rule) true)))))))))) + +(define + dl-add-clause! + (fn + (db clause) + (cond + ((has-key? clause :query) false) + ((and (has-key? clause :body) (= (len (get clause :body)) 0)) + (dl-add-fact! db (get clause :head))) + (else (dl-add-rule! db clause))))) + +(define + dl-load-program! + (fn + (db source) + (let + ((clauses (dl-parse source))) + (do (for-each (fn (c) (dl-add-clause! db c)) clauses) db)))) + +(define + dl-program + (fn (source) (let ((db (dl-make-db))) (dl-load-program! db source)))) + +(define dl-rules (fn (db) (get db :rules))) + +(define + dl-fact-count + (fn + (db) + (let + ((facts (get db :facts)) (total 0)) + (do + (for-each + (fn (k) (set! total (+ total (len (get facts k))))) + (keys facts)) + total)))) + +;; Returns {: tuple-count} for debugging. Includes +;; relations with any tuples plus all rule-head relations (so empty +;; IDB shows as 0). Skips empty EDB-only entries that are placeholders +;; from internal `dl-ensure-rel!` calls. +(define + dl-summary + (fn + (db) + (let + ((facts (get db :facts)) + (out {}) + (rule-heads (list))) + (do + (for-each + (fn + (rule) + (let ((h (dl-rel-name (get rule :head)))) + (when + (and (not (nil? h)) (not (dl-member-string? h rule-heads))) + (append! rule-heads h)))) + (dl-rules db)) + (for-each + (fn + (k) + (let ((c (len (get facts k)))) + (when + (or (> c 0) (dl-member-string? k rule-heads)) + (dict-set! out k c)))) + (keys facts)) + ;; Add rule heads that have no facts (yet). + (for-each + (fn + (k) + (when (not (has-key? out k)) (dict-set! out k 0))) + rule-heads) + out)))) diff --git a/lib/datalog/demo.sx b/lib/datalog/demo.sx new file mode 100644 index 00000000..657c64c1 --- /dev/null +++ b/lib/datalog/demo.sx @@ -0,0 +1,162 @@ +;; lib/datalog/demo.sx — example programs over rose-ash-shaped data. +;; +;; Phase 10 prototypes Datalog as a rose-ash query language. Wiring +;; the EDB to actual PostgreSQL is out of scope for this loop (it +;; would touch service code outside lib/datalog/), but the programs +;; below show the shape of queries we want, and the test suite runs +;; them against synthetic in-memory tuples loaded via dl-program-data. +;; +;; Seven thematic demos: +;; +;; 1. Federation — follow graph, transitive reach, mutuals, FOAF. +;; 2. Content — posts, tags, likes, popularity, "for you" feed. +;; 3. Permissions — group membership and resource access. +;; 4. Cooking-posts — canonical "posts about cooking by people I +;; follow (transitively)" multi-domain query. +;; 5. Tag co-occurrence — distinct (T1, T2) pairs with counts. +;; 6. Shortest path — weighted-DAG path enumeration + min agg. +;; 7. Org chart — transitive subordinate + headcount per mgr. + +;; ── Demo 1: federation follow graph ───────────────────────────── +;; EDB: (follows ACTOR-A ACTOR-B) — A follows B. +;; IDB: +;; (mutual A B) — A follows B and B follows A +;; (reachable A B) — transitive follow closure +;; (foaf A C) — friend of a friend (mutual filter) +(define + dl-demo-federation-rules + (quote + ((mutual A B <- (follows A B) (follows B A)) + (reachable A B <- (follows A B)) + (reachable A C <- (follows A B) (reachable B C)) + (foaf A C <- (follows A B) (follows B C) (!= A C))))) + +;; ── Demo 2: content recommendation ────────────────────────────── +;; EDB: +;; (authored ACTOR POST) +;; (tagged POST TAG) +;; (liked ACTOR POST) +;; IDB: +;; (post-likes POST N) — count of likes per post +;; (popular POST) — posts with >= 3 likes +;; (tagged-by-mutual ACTOR POST) — post tagged TOPIC by someone +;; A's mutuals follow. +(define + dl-demo-content-rules + (quote + ((post-likes P N <- (authored Author P) (count N L (liked L P))) + (popular P <- (authored Author P) (post-likes P N) (>= N 3)) + (interesting Me P + <- + (follows Me Buddy) + (authored Buddy P) + (popular P))))) + +;; ── Demo 3: role-based permissions ────────────────────────────── +;; EDB: +;; (member ACTOR GROUP) +;; (subgroup CHILD PARENT) +;; (allowed GROUP RESOURCE) +;; IDB: +;; (in-group ACTOR GROUP) — direct or via subgroup chain +;; (can-access ACTOR RESOURCE) — actor inherits group permission +(define + dl-demo-perm-rules + (quote + ((in-group A G <- (member A G)) + (in-group A G <- (member A H) (subgroup-trans H G)) + (subgroup-trans X Y <- (subgroup X Y)) + (subgroup-trans X Z <- (subgroup X Y) (subgroup-trans Y Z)) + (can-access A R <- (in-group A G) (allowed G R))))) + +;; ── Demo 4: cooking-posts (the canonical Phase 10 query) ──────── +;; "Posts about cooking by people I follow (transitively)." +;; Combines federation (follows + transitive reach), authoring, +;; tagging — the rose-ash multi-domain join. +;; +;; EDB: +;; (follows ACTOR-A ACTOR-B) +;; (authored ACTOR POST) +;; (tagged POST TAG) +(define + dl-demo-cooking-rules + (quote + ((reach Me Them <- (follows Me Them)) + (reach Me Them <- (follows Me X) (reach X Them)) + (cooking-post-by-network Me P + <- + (reach Me Author) + (authored Author P) + (tagged P cooking))))) + +;; ── Demo 5: tag co-occurrence ─────────────────────────────────── +;; "Posts tagged with both T1 AND T2." Useful for narrowed-down +;; recommendations like "vegetarian cooking" posts. +;; +;; EDB: +;; (tagged POST TAG) +;; IDB: +;; (cotagged POST T1 T2) — post has both T1 and T2 (T1 != T2) +;; (popular-pair T1 T2 N) — count of posts cotagged (T1, T2) +(define + dl-demo-tag-cooccur-rules + (quote + ((cotagged P T1 T2 <- (tagged P T1) (tagged P T2) (!= T1 T2)) + ;; Distinct (T1, T2) pairs that occur somewhere. + (tag-pair T1 T2 <- (cotagged P T1 T2)) + (tag-pair-count T1 T2 N + <- + (tag-pair T1 T2) + (count N P (cotagged P T1 T2)))))) + +;; ── Demo 6: weighted-DAG shortest path ───────────────────────── +;; "What's the cheapest way from X to Y?" Edge weights with `is` +;; arithmetic to sum costs, then `min` aggregation to pick the +;; shortest. Termination requires the graph to be a DAG (cycles +;; would produce infinite distances without a bound; programs +;; built on this should add a depth filter `(<, D, MAX)` if cycles +;; are possible). +;; +;; EDB: +;; (edge FROM TO COST) +;; IDB: +;; (path FROM TO COST) — any path +;; (shortest FROM TO COST) — minimum cost path +(define + dl-demo-shortest-path-rules + (quote + ((path X Y W <- (edge X Y W)) + (path X Z W + <- + (edge X Y W1) + (path Y Z W2) + (is W (+ W1 W2))) + (shortest X Y W <- (path X Y _) (min W C (path X Y C)))))) + +;; ── Demo 7: org chart + transitive headcount ─────────────────── +;; Manager graph: each employee has a single manager. Compute the +;; transitive subordinate set and headcount per manager. +;; +;; EDB: +;; (manager EMP MGR) — EMP reports directly to MGR +;; IDB: +;; (subordinate MGR EMP) — EMP is in MGR's subtree +;; (headcount MGR N) — number of subordinates under MGR +(define + dl-demo-org-rules + (quote + ((subordinate Mgr Emp <- (manager Emp Mgr)) + (subordinate Mgr Emp + <- (manager Mid Mgr) (subordinate Mid Emp)) + (headcount Mgr N + <- (subordinate Mgr Anyone) (count N E (subordinate Mgr E)))))) + +;; ── Loader stub ────────────────────────────────────────────────── +;; Wiring to PostgreSQL would replace these helpers with calls into +;; rose-ash's internal HTTP RPC (fetch_data → /internal/data/...). +;; The shape returned by dl-load-from-edb! is the same in either case. +(define + dl-demo-make + (fn + (facts rules) + (dl-program-data facts rules))) diff --git a/lib/datalog/eval.sx b/lib/datalog/eval.sx new file mode 100644 index 00000000..abf8e458 --- /dev/null +++ b/lib/datalog/eval.sx @@ -0,0 +1,512 @@ +;; lib/datalog/eval.sx — fixpoint evaluator (naive + semi-naive). +;; +;; Two saturators are exposed: +;; `dl-saturate-naive!` — re-joins each rule against the full DB every +;; iteration. Reference implementation; useful for differential tests. +;; `dl-saturate!` — semi-naive default. Tracks per-relation delta +;; sets and substitutes one positive body literal per rule with the +;; delta of its relation, joining the rest against the previous- +;; iteration DB. Same fixpoint, dramatically less work on recursive +;; rules. +;; +;; Body literal kinds: +;; positive (rel arg ... arg) → match against EDB+IDB tuples +;; built-in (< X Y), (is X e) → constraint via dl-eval-builtin +;; negation {:neg lit} → Phase 7 + +(define + dl-match-positive + (fn + (lit db subst) + (let + ((rel (dl-rel-name lit)) (results (list))) + (cond + ((nil? rel) (error (str "dl-match-positive: bad literal " lit))) + (else + (let + ;; If the first argument walks to a non-variable (constant + ;; or already-bound var), use the first-arg index for + ;; this relation. Otherwise scan the full tuple list. + ((tuples + (cond + ((>= (len lit) 2) + (let ((walked (dl-walk (nth lit 1) subst))) + (cond + ((dl-var? walked) (dl-rel-tuples db rel)) + (else (dl-index-lookup db rel walked))))) + (else (dl-rel-tuples db rel))))) + (do + (for-each + (fn + (tuple) + (let + ((s (dl-unify lit tuple subst))) + (when (not (nil? s)) (append! results s)))) + tuples) + results))))))) + +;; Match a positive literal against the delta set for its relation only. +(define + dl-match-positive-delta + (fn + (lit delta subst) + (let + ((rel (dl-rel-name lit)) (results (list))) + (let + ((tuples (if (has-key? delta rel) (get delta rel) (list)))) + (do + (for-each + (fn + (tuple) + (let + ((s (dl-unify lit tuple subst))) + (when (not (nil? s)) (append! results s)))) + tuples) + results))))) + +;; Naive matcher (for dl-saturate-naive! and dl-query post-saturation). +(define + dl-match-negation + (fn + (inner db subst) + (let + ((walked (dl-apply-subst inner subst)) + (matches (dl-match-positive inner db subst))) + (cond + ((= (len matches) 0) (list subst)) + (else (list)))))) + +(define + dl-match-lit + (fn + (lit db subst) + (cond + ((and (dict? lit) (has-key? lit :neg)) + (dl-match-negation (get lit :neg) db subst)) + ((dl-aggregate? lit) (dl-eval-aggregate lit db subst)) + ((dl-builtin? lit) + (let + ((s (dl-eval-builtin lit subst))) + (if (nil? s) (list) (list s)))) + ((and (list? lit) (> (len lit) 0)) + (dl-match-positive lit db subst)) + (else (error (str "datalog: unknown body-literal shape: " lit)))))) + +(define + dl-find-bindings + (fn (lits db subst) (dl-fb-aux lits db subst 0 (len lits)))) + +(define + dl-fb-aux + (fn + (lits db subst i n) + (cond + ((nil? subst) (list)) + ((>= i n) (list subst)) + (else + (let + ((options (dl-match-lit (nth lits i) db subst)) + (results (list))) + (do + (for-each + (fn + (s) + (for-each + (fn (s2) (append! results s2)) + (dl-fb-aux lits db s (+ i 1) n))) + options) + results)))))) + +;; Naive: apply each rule against full DB until no new tuples. +(define + dl-apply-rule! + (fn + (db rule) + (let + ((head (get rule :head)) (body (get rule :body)) (new? false)) + (do + (for-each + (fn + (s) + (let + ((derived (dl-apply-subst head s))) + (when (dl-add-derived! db derived) (set! new? true)))) + (dl-find-bindings body db (dl-empty-subst))) + new?)))) + +;; Returns true iff one more saturation step would derive no new +;; tuples (i.e. the db is at fixpoint). Useful in tests that want +;; to assert "no work left" after a saturation call. Works under +;; either saturator since both compute the same fixpoint. +(define + dl-saturated? + (fn + (db) + (let ((any-new false)) + (do + (for-each + (fn + (rule) + (when (not any-new) + (for-each + (fn + (s) + (let ((derived (dl-apply-subst (get rule :head) s))) + (when + (and (not any-new) + (not (dl-tuple-member? + derived + (dl-rel-tuples + db (dl-rel-name derived))))) + (set! any-new true)))) + (dl-find-bindings (get rule :body) db (dl-empty-subst))))) + (dl-rules db)) + (not any-new))))) + +(define + dl-saturate-naive! + (fn + (db) + (let + ((changed true)) + (do + (define + dl-snloop + (fn + () + (when + changed + (do + (set! changed false) + (for-each + (fn (r) (when (dl-apply-rule! db r) (set! changed true))) + (dl-rules db)) + (dl-snloop))))) + (dl-snloop) + db)))) + +;; ── Semi-naive ─────────────────────────────────────────────────── + +;; Take a snapshot dict {rel -> tuples} of every relation currently in +;; the DB. Used as initial delta for the first iteration. +(define + dl-snapshot-facts + (fn + (db) + (let + ((facts (get db :facts)) (out {})) + (do + (for-each + (fn (k) (dict-set! out k (dl-copy-list (get facts k)))) + (keys facts)) + out)))) + +(define + dl-copy-list + (fn + (xs) + (let + ((out (list))) + (do (for-each (fn (x) (append! out x)) xs) out)))) + +;; Does any relation in `delta` have ≥1 tuple? +(define + dl-delta-empty? + (fn + (delta) + (let + ((ks (keys delta)) (any-non-empty false)) + (do + (for-each + (fn + (k) + (when + (> (len (get delta k)) 0) + (set! any-non-empty true))) + ks) + (not any-non-empty))))) + +;; Find substitutions such that `lits` are all satisfied AND `delta-idx` +;; is matched against the per-relation delta only. The other positive +;; literals match against the snapshot DB (db.facts read at iteration +;; start). Built-ins and negations behave as in `dl-match-lit`. +(define + dl-find-bindings-semi + (fn + (lits db delta delta-idx subst) + (dl-fbs-aux lits db delta delta-idx 0 subst))) + +(define + dl-fbs-aux + (fn + (lits db delta delta-idx i subst) + (cond + ((nil? subst) (list)) + ((>= i (len lits)) (list subst)) + (else + (let + ((lit (nth lits i)) + (options + (cond + ((and (dict? lit) (has-key? lit :neg)) + (dl-match-negation (get lit :neg) db subst)) + ((dl-aggregate? lit) (dl-eval-aggregate lit db subst)) + ((dl-builtin? lit) + (let + ((s (dl-eval-builtin lit subst))) + (if (nil? s) (list) (list s)))) + ((and (list? lit) (> (len lit) 0)) + (if + (= i delta-idx) + (dl-match-positive-delta lit delta subst) + (dl-match-positive lit db subst))) + (else (error (str "datalog: unknown body-lit: " lit))))) + (results (list))) + (do + (for-each + (fn + (s) + (for-each + (fn (s2) (append! results s2)) + (dl-fbs-aux lits db delta delta-idx (+ i 1) s))) + options) + results)))))) + +;; Collect candidate head tuples from a rule using delta. Walks every +;; positive body position and unions the resulting heads. For rules +;; with no positive body literal, falls back to a naive single-pass +;; (so static facts like `(p X) :- (= X 5).` derive on iteration 1). +(define + dl-collect-rule-candidates + (fn + (rule db delta) + (let + ((head (get rule :head)) + (body (get rule :body)) + (out (list)) + (saw-pos false)) + (do + (define + dl-cri + (fn + (i) + (when + (< i (len body)) + (do + (let + ((lit (nth body i))) + (when + (dl-positive-lit? lit) + (do + (set! saw-pos true) + (for-each + (fn (s) (append! out (dl-apply-subst head s))) + (dl-find-bindings-semi + body + db + delta + i + (dl-empty-subst)))))) + (dl-cri (+ i 1)))))) + (dl-cri 0) + (when + (not saw-pos) + (for-each + (fn (s) (append! out (dl-apply-subst head s))) + (dl-find-bindings body db (dl-empty-subst)))) + out)))) + +;; Add a list of candidate tuples to db; collect newly-added ones into +;; the new-delta dict (keyed by relation name). +(define + dl-commit-candidates! + (fn + (db candidates new-delta) + (for-each + (fn + (lit) + (when + (dl-add-derived! db lit) + (let + ((rel (dl-rel-name lit))) + (do + (when + (not (has-key? new-delta rel)) + (dict-set! new-delta rel (list))) + (append! (get new-delta rel) lit))))) + candidates))) + +(define + dl-saturate-rules! + (fn + (db rules) + (let + ((delta (dl-snapshot-facts db))) + (do + (define + dl-sr-step + (fn + () + (let + ((pending (list)) (new-delta {})) + (do + (for-each + (fn + (rule) + (for-each + (fn (cand) (append! pending cand)) + (dl-collect-rule-candidates rule db delta))) + rules) + (dl-commit-candidates! db pending new-delta) + (cond + ((dl-delta-empty? new-delta) nil) + (else (do (set! delta new-delta) (dl-sr-step)))))))) + (dl-sr-step) + db)))) + +;; Stratified driver: rejects non-stratifiable programs at saturation +;; time, then iterates strata in increasing order, running semi-naive on +;; the rules whose head sits in that stratum. +(define + dl-saturate! + (fn + (db) + (let + ((err (dl-check-stratifiable db))) + (cond + ((not (nil? err)) (error (str "dl-saturate!: " err))) + (else + (let + ((strata (dl-compute-strata db))) + (let + ((grouped (dl-group-rules-by-stratum db strata))) + (let + ((groups (get grouped :groups)) + (max-s (get grouped :max))) + (do + (define + dl-strat-loop + (fn + (s) + (when + (<= s max-s) + (let + ((sk (str s))) + (do + (when + (has-key? groups sk) + (dl-saturate-rules! db (get groups sk))) + (dl-strat-loop (+ s 1))))))) + (dl-strat-loop 0) + db))))))))) + +;; ── Querying ───────────────────────────────────────────────────── + +;; Coerce a query argument to a list of body literals. A single literal +;; like `(p X)` (positive — head is a symbol) or `{:neg ...}` becomes +;; `((p X))`. A list of literals like `((p X) (q X))` is returned as-is. +(define + dl-query-coerce + (fn + (goal) + (cond + ((and (dict? goal) (has-key? goal :neg)) (list goal)) + ((and (list? goal) (> (len goal) 0) (symbol? (first goal))) + (list goal)) + ((list? goal) goal) + (else (error (str "dl-query: unrecognised goal shape: " goal)))))) + +(define + dl-query + (fn + (db goal) + (do + (dl-saturate! db) + ;; Rename anonymous '_' vars in each goal literal so multiple + ;; occurrences do not unify together. Keep the user-facing var + ;; list (taken before renaming) so projected results retain user + ;; names. + (let + ((goals (dl-query-coerce goal)) + ;; Start the renamer past any `_anon` symbols the user + ;; may have written in the query — avoids collision. + (renamer + (dl-make-anon-renamer (dl-max-anon-num-list goal 0 0)))) + (let + ((user-vars (dl-query-user-vars goals)) + (renamed (map (fn (g) (dl-rename-anon-lit g renamer)) goals))) + (let + ((substs (dl-find-bindings renamed db (dl-empty-subst))) + (results (list))) + (do + (for-each + (fn + (s) + (let + ((proj (dl-project-subst s user-vars))) + (when + (not (dl-tuple-member? proj results)) + (append! results proj)))) + substs) + results))))))) + +(define + dl-query-user-vars + (fn + (goals) + (let ((seen (list))) + (do + (for-each + (fn + (g) + (cond + ((and (dict? g) (has-key? g :neg)) + (for-each + (fn + (v) + (when + (and (not (= v "_")) (not (dl-member-string? v seen))) + (append! seen v))) + (dl-vars-of (get g :neg)))) + ((dl-aggregate? g) + ;; Only the result var (first arg of the aggregate + ;; literal) is user-facing. The aggregated var and + ;; any vars in the inner goal are internal. + (let ((r (nth g 1))) + (when + (dl-var? r) + (let ((rn (symbol->string r))) + (when + (and (not (= rn "_")) + (not (dl-member-string? rn seen))) + (append! seen rn)))))) + (else + (for-each + (fn + (v) + (when + (and (not (= v "_")) (not (dl-member-string? v seen))) + (append! seen v))) + (dl-vars-of g))))) + goals) + seen)))) + +(define + dl-project-subst + (fn + (subst names) + (let + ((out {})) + (do + (for-each + (fn + (n) + (let + ((sym (string->symbol n))) + (let + ((v (dl-walk sym subst))) + (dict-set! out n (dl-apply-subst v subst))))) + names) + out)))) + +(define dl-relation (fn (db name) (dl-rel-tuples db name))) diff --git a/lib/datalog/magic.sx b/lib/datalog/magic.sx new file mode 100644 index 00000000..448042e9 --- /dev/null +++ b/lib/datalog/magic.sx @@ -0,0 +1,464 @@ +;; lib/datalog/magic.sx — adornment analysis + sideways info passing. +;; +;; First step of the magic-sets transformation (Phase 6). Right now +;; the saturator does not consume these — they are introspection +;; helpers that future magic-set rewriting will build on top of. +;; +;; Definitions: +;; - An *adornment* of an n-ary literal is an n-character string +;; of "b" (bound — value already known at the call site) and +;; "f" (free — to be derived). +;; - SIPS (Sideways Information Passing Strategy) walks the body +;; of an adorned rule left-to-right tracking which variables +;; have been bound so far, computing each body literal's +;; adornment in turn. +;; +;; Usage: +;; +;; (dl-adorn-goal '(ancestor tom X)) +;; => "bf" +;; +;; (dl-rule-sips +;; {:head (ancestor X Z) +;; :body ((parent X Y) (ancestor Y Z))} +;; "bf") +;; => ({:lit (parent X Y) :adornment "bf"} +;; {:lit (ancestor Y Z) :adornment "bf"}) + +;; Per-arg adornment under the current bound-var name set. +(define + dl-adorn-arg + (fn + (arg bound) + (cond + ((dl-var? arg) + (if (dl-member-string? (symbol->string arg) bound) "b" "f")) + (else "b")))) + +;; Adornment for the args of a literal (after the relation name). +(define + dl-adorn-args + (fn + (args bound) + (cond + ((= (len args) 0) "") + (else + (str + (dl-adorn-arg (first args) bound) + (dl-adorn-args (rest args) bound)))))) + +;; Adornment of a top-level goal under the empty bound-var set. +(define + dl-adorn-goal + (fn (goal) (dl-adorn-args (rest goal) (list)))) + +;; Adornment of a literal under an explicit bound set. +(define + dl-adorn-lit + (fn (lit bound) (dl-adorn-args (rest lit) bound))) + +;; The set of variable names made bound by walking a positive +;; literal whose adornment is known. Free positions add their +;; vars to the bound set. +(define + dl-vars-bound-by-lit + (fn + (lit bound) + (let ((args (rest lit)) (out (list))) + (do + (for-each + (fn (a) + (when + (and (dl-var? a) + (not (dl-member-string? (symbol->string a) bound)) + (not (dl-member-string? (symbol->string a) out))) + (append! out (symbol->string a)))) + args) + out)))) + +;; Walk the rule body left-to-right tracking bound vars seeded by the +;; head adornment. Returns a list of {:lit :adornment} entries. +;; +;; Negation, comparison, and built-ins are passed through with their +;; adornment computed from the current bound set; they don't add new +;; bindings (except `is`, which binds its left arg if a var). Aggregates +;; are treated like is — the result var becomes bound. +(define + dl-init-head-bound + (fn + (head adornment) + (let ((args (rest head)) (out (list))) + (do + (define + dl-ihb-loop + (fn + (i) + (when + (< i (len args)) + (do + (let + ((c (slice adornment i (+ i 1))) + (a (nth args i))) + (when + (and (= c "b") (dl-var? a)) + (let ((n (symbol->string a))) + (when + (not (dl-member-string? n out)) + (append! out n))))) + (dl-ihb-loop (+ i 1)))))) + (dl-ihb-loop 0) + out)))) + +(define + dl-rule-sips + (fn + (rule head-adornment) + (let + ((bound (dl-init-head-bound (get rule :head) head-adornment)) + (out (list))) + (do + (for-each + (fn + (lit) + (cond + ((and (dict? lit) (has-key? lit :neg)) + (let ((target (get lit :neg))) + (append! + out + {:lit lit :adornment (dl-adorn-lit target bound)}))) + ((dl-builtin? lit) + (let ((adn (dl-adorn-lit lit bound))) + (do + (append! out {:lit lit :adornment adn}) + ;; `is` binds its left arg (if var) once RHS is ground. + (when + (and (= (dl-rel-name lit) "is") (dl-var? (nth lit 1))) + (let ((n (symbol->string (nth lit 1)))) + (when + (not (dl-member-string? n bound)) + (append! bound n))))))) + ((and (list? lit) (dl-aggregate? lit)) + (let ((adn (dl-adorn-lit lit bound))) + (do + (append! out {:lit lit :adornment adn}) + ;; Result var (first arg) becomes bound. + (when (dl-var? (nth lit 1)) + (let ((n (symbol->string (nth lit 1)))) + (when + (not (dl-member-string? n bound)) + (append! bound n))))))) + ((and (list? lit) (> (len lit) 0)) + (let ((adn (dl-adorn-lit lit bound))) + (do + (append! out {:lit lit :adornment adn}) + (for-each + (fn (n) + (when (not (dl-member-string? n bound)) + (append! bound n))) + (dl-vars-bound-by-lit lit bound))))))) + (get rule :body)) + out)))) + +;; ── Magic predicate naming + bound-args extraction ───────────── +;; These are building blocks for the magic-sets *transformation* +;; itself. The transformation (which generates rewritten rules +;; with magic_^ filters) is future work — for now +;; these helpers can be used to inspect what such a transformation +;; would produce. + +;; "magic_p^bf" given relation "p" and adornment "bf". +(define + dl-magic-rel-name + (fn (rel adornment) (str "magic_" rel "^" adornment))) + +;; A magic predicate literal: +;; (magic_^ arg1 arg2 ...) +(define + dl-magic-lit + (fn + (rel adornment bound-args) + (cons (string->symbol (dl-magic-rel-name rel adornment)) bound-args))) + +;; Extract bound args (those at "b" positions in `adornment`) from a +;; literal `(rel arg1 arg2 ... argN)`. Returns the list of arg values. +(define + dl-bound-args + (fn + (lit adornment) + (let ((args (rest lit)) (out (list))) + (do + (define + dl-ba-loop + (fn + (i) + (when + (< i (len args)) + (do + (when + (= (slice adornment i (+ i 1)) "b") + (append! out (nth args i))) + (dl-ba-loop (+ i 1)))))) + (dl-ba-loop 0) + out)))) + +;; ── Magic-sets rewriter ───────────────────────────────────────── +;; +;; Given the original rule list and a query (rel, adornment) pair, +;; generates the magic-rewritten program: a list of rules that +;; (a) gate each original rule with a `magic_^` filter and +;; (b) propagate the magic relation through SIPS so that only +;; query-relevant tuples are derived. Seed facts are returned +;; separately and must be added to the db at evaluation time. +;; +;; Output: {:rules :seed } +;; +;; The rewriter only rewrites IDB rules; EDB facts pass through. +;; Built-in predicates and negation in body literals are kept in +;; place but do not generate propagation rules of their own. + +(define + dl-magic-pair-key + (fn (rel adornment) (str rel "^" adornment))) + +(define + dl-magic-rewrite + (fn + (rules query-rel query-adornment query-args) + (let + ((seen (list)) + (queue (list)) + (out (list))) + (do + (define + dl-mq-mark! + (fn + (rel adornment) + (let ((k (dl-magic-pair-key rel adornment))) + (when + (not (dl-member-string? k seen)) + (do + (append! seen k) + (append! queue {:rel rel :adn adornment})))))) + + (define + dl-mq-rewrite-rule! + (fn + (rule adn) + (let + ((head (get rule :head)) + (body (get rule :body)) + (sips (dl-rule-sips rule adn))) + (let + ((magic-filter + (dl-magic-lit + (dl-rel-name head) + adn + (dl-bound-args head adn)))) + (do + ;; Adorned rule: head :- magic-filter, body... + (let ((new-body (list))) + (do + (append! new-body magic-filter) + (for-each + (fn (lit) (append! new-body lit)) + body) + (append! out {:head head :body new-body}))) + ;; Propagation rules for each positive non-builtin + ;; body literal at position i. + (define + dl-mq-prop-loop + (fn + (i) + (when + (< i (len body)) + (do + (let + ((lit (nth body i)) + (sip-entry (nth sips i))) + (when + (and (list? lit) + (> (len lit) 0) + (not (and (dict? lit) (has-key? lit :neg))) + (not (dl-builtin? lit)) + (not (dl-aggregate? lit))) + (let + ((lit-adn (get sip-entry :adornment)) + (lit-rel (dl-rel-name lit))) + (let + ((prop-head + (dl-magic-lit + lit-rel + lit-adn + (dl-bound-args lit lit-adn)))) + (let ((prop-body (list))) + (do + (append! prop-body magic-filter) + (define + dl-mq-prefix-loop + (fn + (j) + (when + (< j i) + (do + (append! + prop-body + (nth body j)) + (dl-mq-prefix-loop (+ j 1)))))) + (dl-mq-prefix-loop 0) + (append! + out + {:head prop-head :body prop-body}) + (dl-mq-mark! lit-rel lit-adn))))))) + (dl-mq-prop-loop (+ i 1)))))) + (dl-mq-prop-loop 0)))))) + + (dl-mq-mark! query-rel query-adornment) + + (let ((idx 0)) + (define + dl-mq-process + (fn + () + (when + (< idx (len queue)) + (let ((item (nth queue idx))) + (do + (set! idx (+ idx 1)) + (let + ((rel (get item :rel)) (adn (get item :adn))) + (for-each + (fn + (rule) + (when + (= (dl-rel-name (get rule :head)) rel) + (dl-mq-rewrite-rule! rule adn))) + rules)) + (dl-mq-process)))))) + (dl-mq-process)) + + {:rules out + :seed + (dl-magic-lit + query-rel + query-adornment + query-args)})))) + +;; ── Top-level magic-sets driver ───────────────────────────────── +;; +;; (dl-magic-query db query-goal) — run `query-goal` under magic-sets +;; evaluation. Builds a fresh internal db with: +;; - the caller's EDB facts (relations not headed by any rule), +;; - the magic seed fact, and +;; - the rewritten rules. +;; Saturates and queries, returning the substitution list. The +;; caller's db is untouched. +;; +;; Useful primarily as a perf alternative for queries that only +;; need a small slice of a recursive relation. Equivalent to +;; dl-query for any single fully-stratifiable program. + +(define + dl-magic-rule-heads + (fn + (rules) + (let ((seen (list))) + (do + (for-each + (fn + (r) + (let ((h (dl-rel-name (get r :head)))) + (when + (and (not (nil? h)) (not (dl-member-string? h seen))) + (append! seen h)))) + rules) + seen)))) + +;; True iff any rule's body contains a literal kind that the magic +;; rewriter doesn't propagate magic to — i.e. an aggregate or a +;; negation. Used by dl-magic-query to decide whether to pre-saturate +;; the source db (for correctness on stratified programs) or skip +;; that step (preserving full magic-sets efficiency for pure +;; positive programs). +(define + dl-rule-has-nonprop-lit? + (fn + (body i n) + (cond + ((>= i n) false) + ((let ((lit (nth body i))) + (or (and (dict? lit) (has-key? lit :neg)) + (dl-aggregate? lit))) + true) + (else (dl-rule-has-nonprop-lit? body (+ i 1) n))))) + +(define + dl-rules-need-presaturation? + (fn + (rules) + (cond + ((= (len rules) 0) false) + ((let ((body (get (first rules) :body))) + (dl-rule-has-nonprop-lit? body 0 (len body))) + true) + (else (dl-rules-need-presaturation? (rest rules)))))) + +(define + dl-magic-query + (fn + (db query-goal) + ;; Magic-sets only applies to positive non-builtin / non-aggregate + ;; literals against rule-defined relations. For other goal shapes + ;; (built-ins, aggregates, EDB-only relations) the seed is either + ;; non-ground or unused; fall back to dl-query. + (cond + ((not (and (list? query-goal) + (> (len query-goal) 0) + (symbol? (first query-goal)))) + (error (str "dl-magic-query: goal must be a positive literal " + "(non-empty list with a symbol head), got " query-goal))) + ((or (dl-builtin? query-goal) + (dl-aggregate? query-goal) + (and (dict? query-goal) (has-key? query-goal :neg))) + (dl-query db query-goal)) + (else + (do + ;; If the rule set has aggregates or negation, pre-saturate + ;; the source db before copying facts. The magic rewriter + ;; passes aggregate body lits and negated lits through + ;; unchanged (no magic propagation generated for them) — so + ;; if their inner-goal relation is IDB, it would be empty in + ;; the magic db. Pre-saturating ensures equivalence with + ;; `dl-query` for every stratified program. Pure positive + ;; programs skip this and keep the full magic-sets perf win + ;; from goal-directed re-derivation. + (when + (dl-rules-need-presaturation? (dl-rules db)) + (dl-saturate! db)) + (let + ((query-rel (dl-rel-name query-goal)) + (query-adn (dl-adorn-goal query-goal))) + (let + ((query-args (dl-bound-args query-goal query-adn)) + (rules (dl-rules db))) + (let + ((rewritten (dl-magic-rewrite rules query-rel query-adn query-args)) + (mdb (dl-make-db)) + (rule-heads (dl-magic-rule-heads rules))) + (do + ;; Copy ALL existing facts. EDB-only relations bring their + ;; tuples; mixed EDB+IDB relations bring both their EDB + ;; portion and any pre-saturated IDB tuples (which the + ;; rewritten rules would re-derive anyway). Skipping facts + ;; for rule-headed relations would leave the magic run + ;; without the EDB portion of mixed relations. + (for-each + (fn + (rel) + (for-each + (fn (t) (dl-add-fact! mdb t)) + (dl-rel-tuples db rel))) + (keys (get db :facts))) + ;; Seed + rewritten rules. + (dl-add-fact! mdb (get rewritten :seed)) + (for-each (fn (r) (dl-add-rule! mdb r)) (get rewritten :rules)) + (dl-query mdb query-goal)))))))))) diff --git a/lib/datalog/parser.sx b/lib/datalog/parser.sx new file mode 100644 index 00000000..16701597 --- /dev/null +++ b/lib/datalog/parser.sx @@ -0,0 +1,252 @@ +;; lib/datalog/parser.sx — Datalog tokens → AST +;; +;; Output shapes: +;; Literal (positive) := (relname arg ... arg) — SX list +;; Literal (negative) := {:neg (relname arg ... arg)} — dict +;; Argument := var-symbol | atom-symbol | number | string +;; | (op-name arg ... arg) — arithmetic compound +;; Fact := {:head literal :body ()} +;; Rule := {:head literal :body (lit ... lit)} +;; Query := {:query (lit ... lit)} +;; Program := list of facts / rules / queries +;; +;; Variables and constants are both SX symbols; the evaluator dispatches +;; on first-char case ('A'..'Z' or '_' = variable, otherwise constant). +;; +;; The parser permits nested compounds in arg position to support +;; arithmetic (e.g. (is Z (+ X Y))). Safety analysis at rule-load time +;; rejects compounds whose head is not an arithmetic operator. + +(define + dl-pp-peek + (fn + (st) + (let + ((i (get st :idx)) (tokens (get st :tokens))) + (if (< i (len tokens)) (nth tokens i) {:type "eof" :value nil :pos 0})))) + +(define + dl-pp-peek2 + (fn + (st) + (let + ((i (+ (get st :idx) 1)) (tokens (get st :tokens))) + (if (< i (len tokens)) (nth tokens i) {:type "eof" :value nil :pos 0})))) + +(define + dl-pp-advance! + (fn (st) (dict-set! st :idx (+ (get st :idx) 1)))) + +(define + dl-pp-at? + (fn + (st type value) + (let + ((t (dl-pp-peek st))) + (and + (= (get t :type) type) + (or (= value nil) (= (get t :value) value)))))) + +(define + dl-pp-error + (fn + (st msg) + (let + ((t (dl-pp-peek st))) + (error + (str + "Parse error at pos " + (get t :pos) + ": " + msg + " (got " + (get t :type) + " '" + (if (= (get t :value) nil) "" (get t :value)) + "')"))))) + +(define + dl-pp-expect! + (fn + (st type value) + (let + ((t (dl-pp-peek st))) + (if + (dl-pp-at? st type value) + (do (dl-pp-advance! st) t) + (dl-pp-error + st + (str "expected " type (if (= value nil) "" (str " '" value "'")))))))) + +;; Argument: variable, atom, number, string, or compound (relname/op + parens). +(define + dl-pp-parse-arg + (fn + (st) + (let + ((t (dl-pp-peek st))) + (let + ((ty (get t :type)) (vv (get t :value))) + (cond + ((= ty "number") (do (dl-pp-advance! st) vv)) + ((= ty "string") (do (dl-pp-advance! st) vv)) + ((= ty "var") (do (dl-pp-advance! st) (string->symbol vv))) + ;; Negative numeric literal: `-` op directly followed by a + ;; number (no `(`) is parsed as a single negative number. + ;; This keeps `(-X Y)` (compound) and `-N` (literal) distinct. + ((and (= ty "op") (= vv "-") + (= (get (dl-pp-peek2 st) :type) "number")) + (do + (dl-pp-advance! st) + (let + ((n (get (dl-pp-peek st) :value))) + (do (dl-pp-advance! st) (- 0 n))))) + ((or (= ty "atom") (= ty "op")) + (do + (dl-pp-advance! st) + (if + (dl-pp-at? st "punct" "(") + (do + (dl-pp-advance! st) + (let + ((args (dl-pp-parse-arg-list st))) + (do + (dl-pp-expect! st "punct" ")") + (cons (string->symbol vv) args)))) + (string->symbol vv)))) + (else (dl-pp-error st "expected term"))))))) + +;; Comma-separated args inside parens. +(define + dl-pp-parse-arg-list + (fn + (st) + (let + ((args (list))) + (do + (append! args (dl-pp-parse-arg st)) + (define + dl-pp-arg-loop + (fn + () + (when + (dl-pp-at? st "punct" ",") + (do + (dl-pp-advance! st) + (append! args (dl-pp-parse-arg st)) + (dl-pp-arg-loop))))) + (dl-pp-arg-loop) + args)))) + +;; A positive literal: relname (atom or op) followed by optional (args). +(define + dl-pp-parse-positive + (fn + (st) + (let + ((t (dl-pp-peek st))) + (let + ((ty (get t :type)) (vv (get t :value))) + (if + (or (= ty "atom") (= ty "op")) + (do + (dl-pp-advance! st) + (if + (dl-pp-at? st "punct" "(") + (do + (dl-pp-advance! st) + (let + ((args (dl-pp-parse-arg-list st))) + (do + (dl-pp-expect! st "punct" ")") + (cons (string->symbol vv) args)))) + (list (string->symbol vv)))) + (dl-pp-error st "expected literal head")))))) + +;; A body literal: positive, or not(positive). +(define + dl-pp-parse-body-lit + (fn + (st) + (let + ((t1 (dl-pp-peek st)) (t2 (dl-pp-peek2 st))) + (if + (and + (= (get t1 :type) "atom") + (= (get t1 :value) "not") + (= (get t2 :type) "punct") + (= (get t2 :value) "(")) + (do + (dl-pp-advance! st) + (dl-pp-advance! st) + (let + ((inner (dl-pp-parse-positive st))) + (do (dl-pp-expect! st "punct" ")") {:neg inner}))) + (dl-pp-parse-positive st))))) + +;; Comma-separated body literals. +(define + dl-pp-parse-body + (fn + (st) + (let + ((lits (list))) + (do + (append! lits (dl-pp-parse-body-lit st)) + (define + dl-pp-body-loop + (fn + () + (when + (dl-pp-at? st "punct" ",") + (do + (dl-pp-advance! st) + (append! lits (dl-pp-parse-body-lit st)) + (dl-pp-body-loop))))) + (dl-pp-body-loop) + lits)))) + +;; Single clause: fact, rule, or query. Consumes trailing dot. +(define + dl-pp-parse-clause + (fn + (st) + (cond + ((dl-pp-at? st "op" "?-") + (do + (dl-pp-advance! st) + (let + ((body (dl-pp-parse-body st))) + (do (dl-pp-expect! st "punct" ".") {:query body})))) + (else + (let + ((head (dl-pp-parse-positive st))) + (cond + ((dl-pp-at? st "op" ":-") + (do + (dl-pp-advance! st) + (let + ((body (dl-pp-parse-body st))) + (do (dl-pp-expect! st "punct" ".") {:body body :head head})))) + (else (do (dl-pp-expect! st "punct" ".") {:body (list) :head head})))))))) + +(define + dl-parse-program + (fn + (tokens) + (let + ((st {:tokens tokens :idx 0}) (clauses (list))) + (do + (define + dl-pp-prog-loop + (fn + () + (when + (not (dl-pp-at? st "eof" nil)) + (do + (append! clauses (dl-pp-parse-clause st)) + (dl-pp-prog-loop))))) + (dl-pp-prog-loop) + clauses)))) + +(define dl-parse (fn (src) (dl-parse-program (dl-tokenize src)))) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json new file mode 100644 index 00000000..4dd9e4b1 --- /dev/null +++ b/lib/datalog/scoreboard.json @@ -0,0 +1,20 @@ +{ + "lang": "datalog", + "total_passed": 276, + "total_failed": 0, + "total": 276, + "suites": [ + {"name":"tokenize","passed":31,"failed":0,"total":31}, + {"name":"parse","passed":23,"failed":0,"total":23}, + {"name":"unify","passed":29,"failed":0,"total":29}, + {"name":"eval","passed":44,"failed":0,"total":44}, + {"name":"builtins","passed":26,"failed":0,"total":26}, + {"name":"semi_naive","passed":8,"failed":0,"total":8}, + {"name":"negation","passed":12,"failed":0,"total":12}, + {"name":"aggregates","passed":23,"failed":0,"total":23}, + {"name":"api","passed":22,"failed":0,"total":22}, + {"name":"magic","passed":37,"failed":0,"total":37}, + {"name":"demo","passed":21,"failed":0,"total":21} + ], + "generated": "2026-05-11T09:40:12+00:00" +} diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md new file mode 100644 index 00000000..c06a2f24 --- /dev/null +++ b/lib/datalog/scoreboard.md @@ -0,0 +1,17 @@ +# datalog scoreboard + +**276 / 276 passing** (0 failure(s)). + +| Suite | Passed | Total | Status | +|-------|--------|-------|--------| +| tokenize | 31 | 31 | ok | +| parse | 23 | 23 | ok | +| unify | 29 | 29 | ok | +| eval | 44 | 44 | ok | +| builtins | 26 | 26 | ok | +| semi_naive | 8 | 8 | ok | +| negation | 12 | 12 | ok | +| aggregates | 23 | 23 | ok | +| api | 22 | 22 | ok | +| magic | 37 | 37 | ok | +| demo | 21 | 21 | ok | diff --git a/lib/datalog/strata.sx b/lib/datalog/strata.sx new file mode 100644 index 00000000..feaad46f --- /dev/null +++ b/lib/datalog/strata.sx @@ -0,0 +1,323 @@ +;; lib/datalog/strata.sx — dependency graph, SCC analysis, stratum assignment. +;; +;; A program is stratifiable iff no cycle in its dependency graph passes +;; through a negative edge. The stratum of relation R is the depth at which +;; R can first be evaluated: +;; +;; stratum(R) = max over edges (R → S) of: +;; stratum(S) if the edge is positive +;; stratum(S) + 1 if the edge is negative +;; +;; All relations in the same SCC share a stratum (and the SCC must have only +;; positive internal edges, else the program is non-stratifiable). + +;; Build dep graph: dict {head-rel-name -> ({:rel str :neg bool} ...)}. +(define + dl-build-dep-graph + (fn + (db) + (let ((g {})) + (do + (for-each + (fn + (rule) + (let + ((head-rel (dl-rel-name (get rule :head)))) + (when + (not (nil? head-rel)) + (do + (when + (not (has-key? g head-rel)) + (dict-set! g head-rel (list))) + (let ((existing (get g head-rel))) + (for-each + (fn + (lit) + (cond + ((dl-aggregate? lit) + (let + ((edge (dl-aggregate-dep-edge lit))) + (when + (not (nil? edge)) + (append! existing edge)))) + (else + (let + ((target + (cond + ((and (dict? lit) (has-key? lit :neg)) + (dl-rel-name (get lit :neg))) + ((dl-builtin? lit) nil) + ((and (list? lit) (> (len lit) 0)) + (dl-rel-name lit)) + (else nil))) + (neg? + (and (dict? lit) (has-key? lit :neg)))) + (when + (not (nil? target)) + (append! + existing + {:rel target :neg neg?})))))) + (get rule :body))))))) + (dl-rules db)) + g)))) + +;; All relations referenced — heads of rules + EDB names + body relations. +(define + dl-all-relations + (fn + (db) + (let ((seen (list))) + (do + (for-each + (fn + (k) + (when (not (dl-member-string? k seen)) (append! seen k))) + (keys (get db :facts))) + (for-each + (fn + (rule) + (do + (let ((h (dl-rel-name (get rule :head)))) + (when + (and (not (nil? h)) (not (dl-member-string? h seen))) + (append! seen h))) + (for-each + (fn + (lit) + (let + ((t + (cond + ((dl-aggregate? lit) + (let ((edge (dl-aggregate-dep-edge lit))) + (if (nil? edge) nil (get edge :rel)))) + ((and (dict? lit) (has-key? lit :neg)) + (dl-rel-name (get lit :neg))) + ((dl-builtin? lit) nil) + ((and (list? lit) (> (len lit) 0)) + (dl-rel-name lit)) + (else nil)))) + (when + (and (not (nil? t)) (not (dl-member-string? t seen))) + (append! seen t)))) + (get rule :body)))) + (dl-rules db)) + seen)))) + +;; reach: dict {from: dict {to: edge-info}} where edge-info is +;; {:any bool :neg bool} +;; meaning "any path from `from` to `to`" and "exists a negative-passing +;; path from `from` to `to`". +;; +;; Floyd-Warshall over the dep graph. The 'neg' flag propagates through +;; concatenation: if any edge along the path is negative, the path's +;; flag is true. +(define + dl-build-reach + (fn + (graph nodes) + (let ((reach {})) + (do + (for-each + (fn (n) (dict-set! reach n {})) + nodes) + (for-each + (fn + (head) + (when + (has-key? graph head) + (for-each + (fn + (edge) + (let + ((target (get edge :rel)) (n (get edge :neg))) + (let ((row (get reach head))) + (cond + ((has-key? row target) + (let ((cur (get row target))) + (dict-set! + row + target + {:any true :neg (or n (get cur :neg))}))) + (else + (dict-set! row target {:any true :neg n})))))) + (get graph head)))) + nodes) + (for-each + (fn + (k) + (for-each + (fn + (i) + (let ((row-i (get reach i))) + (when + (has-key? row-i k) + (let ((ik (get row-i k)) (row-k (get reach k))) + (for-each + (fn + (j) + (when + (has-key? row-k j) + (let ((kj (get row-k j))) + (let + ((combined-neg (or (get ik :neg) (get kj :neg)))) + (cond + ((has-key? row-i j) + (let ((cur (get row-i j))) + (dict-set! + row-i + j + {:any true + :neg (or combined-neg (get cur :neg))}))) + (else + (dict-set! + row-i + j + {:any true :neg combined-neg}))))))) + nodes))))) + nodes)) + nodes) + reach)))) + +;; Returns nil on success, or error message string on failure. +(define + dl-check-stratifiable + (fn + (db) + (let + ((graph (dl-build-dep-graph db)) + (nodes (dl-all-relations db))) + (let ((reach (dl-build-reach graph nodes)) (err nil)) + (do + (for-each + (fn + (rule) + (when + (nil? err) + (let ((head-rel (dl-rel-name (get rule :head)))) + (for-each + (fn + (lit) + (cond + ((and (dict? lit) (has-key? lit :neg)) + (let ((tgt (dl-rel-name (get lit :neg)))) + (when + (and (not (nil? tgt)) + (dl-reach-cycle? reach head-rel tgt)) + (set! + err + (str "non-stratifiable: relation " head-rel + " transitively depends through negation on " + tgt + " which depends back on " head-rel))))) + ((dl-aggregate? lit) + (let ((edge (dl-aggregate-dep-edge lit))) + (when + (not (nil? edge)) + (let ((tgt (get edge :rel))) + (when + (and (not (nil? tgt)) + (dl-reach-cycle? reach head-rel tgt)) + (set! + err + (str "non-stratifiable: relation " + head-rel + " aggregates over " tgt + " which depends back on " + head-rel))))))))) + (get rule :body))))) + (dl-rules db)) + err))))) + +(define + dl-reach-cycle? + (fn + (reach a b) + (and + (dl-reach-row-has? reach b a) + (dl-reach-row-has? reach a b)))) + +(define + dl-reach-row-has? + (fn + (reach from to) + (let ((row (get reach from))) + (and (not (nil? row)) (has-key? row to))))) + +;; Compute stratum per relation. Iteratively propagate from EDB roots. +;; Uses the per-relation max-stratum-of-deps formula. Stops when stable. +(define + dl-compute-strata + (fn + (db) + (let + ((graph (dl-build-dep-graph db)) + (nodes (dl-all-relations db)) + (strata {})) + (do + (for-each (fn (n) (dict-set! strata n 0)) nodes) + (let ((changed true)) + (do + (define + dl-cs-loop + (fn + () + (when + changed + (do + (set! changed false) + (for-each + (fn + (head) + (when + (has-key? graph head) + (for-each + (fn + (edge) + (let + ((tgt (get edge :rel)) + (n (get edge :neg))) + (let + ((tgt-strat + (if (has-key? strata tgt) + (get strata tgt) 0)) + (cur (get strata head))) + (let + ((needed + (if n (+ tgt-strat 1) tgt-strat))) + (when + (> needed cur) + (do + (dict-set! strata head needed) + (set! changed true))))))) + (get graph head)))) + nodes) + (dl-cs-loop))))) + (dl-cs-loop))) + strata)))) + +;; Group rules by their head's stratum. Returns dict {stratum-int -> rules}. +(define + dl-group-rules-by-stratum + (fn + (db strata) + (let ((groups {}) (max-s 0)) + (do + (for-each + (fn + (rule) + (let + ((head-rel (dl-rel-name (get rule :head)))) + (let + ((s (if (has-key? strata head-rel) + (get strata head-rel) 0))) + (do + (when (> s max-s) (set! max-s s)) + (let + ((sk (str s))) + (do + (when + (not (has-key? groups sk)) + (dict-set! groups sk (list))) + (append! (get groups sk) rule))))))) + (dl-rules db)) + {:groups groups :max max-s})))) diff --git a/lib/datalog/tests/aggregates.sx b/lib/datalog/tests/aggregates.sx new file mode 100644 index 00000000..42c687a6 --- /dev/null +++ b/lib/datalog/tests/aggregates.sx @@ -0,0 +1,357 @@ +;; lib/datalog/tests/aggregates.sx — count / sum / min / max. + +(define dl-at-pass 0) +(define dl-at-fail 0) +(define dl-at-failures (list)) + +(define + dl-at-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-at-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-at-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-at-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-at-deep=? (nth a i) (nth b i))) false) + (else (dl-at-deq-l? a b (+ i 1)))))) + +(define + dl-at-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) + (not (dl-at-deep=? (get a k) (get b k)))) + false) + (else (dl-at-deq-d? a b ka (+ i 1)))))) + +(define + dl-at-set=? + (fn + (a b) + (and + (= (len a) (len b)) + (dl-at-subset? a b) + (dl-at-subset? b a)))) + +(define + dl-at-subset? + (fn + (xs ys) + (cond + ((= (len xs) 0) true) + ((not (dl-at-contains? ys (first xs))) false) + (else (dl-at-subset? (rest xs) ys))))) + +(define + dl-at-contains? + (fn + (xs target) + (cond + ((= (len xs) 0) false) + ((dl-at-deep=? (first xs) target) true) + (else (dl-at-contains? (rest xs) target))))) + +(define + dl-at-test! + (fn + (name got expected) + (if + (dl-at-deep=? got expected) + (set! dl-at-pass (+ dl-at-pass 1)) + (do + (set! dl-at-fail (+ dl-at-fail 1)) + (append! + dl-at-failures + (str + name + "\n expected: " expected + "\n got: " got)))))) + +(define + dl-at-test-set! + (fn + (name got expected) + (if + (dl-at-set=? got expected) + (set! dl-at-pass (+ dl-at-pass 1)) + (do + (set! dl-at-fail (+ dl-at-fail 1)) + (append! + dl-at-failures + (str + name + "\n expected (set): " expected + "\n got: " got)))))) + +(define + dl-at-throws? + (fn + (thunk) + (let + ((threw false)) + (do + (guard + (e (#t (set! threw true))) + (thunk)) + threw)))) + +(define + dl-at-run-all! + (fn + () + (do + ;; count + (dl-at-test-set! "count siblings" + (dl-query + (dl-program + "parent(p, bob). parent(p, alice). parent(p, charlie). + sibling(X, Y) :- parent(P, X), parent(P, Y), !=(X, Y). + sib_count(N) :- count(N, S, sibling(bob, S)).") + (list (quote sib_count) (quote N))) + (list {:N 2})) + + ;; sum + (dl-at-test-set! "sum prices" + (dl-query + (dl-program + "price(apple, 5). price(pear, 7). price(plum, 3). + total(T) :- sum(T, X, price(F, X)).") + (list (quote total) (quote T))) + (list {:T 15})) + + ;; min + (dl-at-test-set! "min score" + (dl-query + (dl-program + "score(alice, 80). score(bob, 65). score(carol, 92). + lo(M) :- min(M, S, score(P, S)).") + (list (quote lo) (quote M))) + (list {:M 65})) + + ;; max + (dl-at-test-set! "max score" + (dl-query + (dl-program + "score(alice, 80). score(bob, 65). score(carol, 92). + hi(M) :- max(M, S, score(P, S)).") + (list (quote hi) (quote M))) + (list {:M 92})) + + ;; count over derived relation (stratification needed). + (dl-at-test-set! "count over derived" + (dl-query + (dl-program + "parent(a, b). parent(a, c). parent(b, d). parent(c, e). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z). + num_ancestors(N) :- count(N, X, ancestor(a, X)).") + (list (quote num_ancestors) (quote N))) + (list {:N 4})) + + ;; count with no matches → 0. + (dl-at-test-set! "count empty" + (dl-query + (dl-program + "p(1). p(2). + zero(N) :- count(N, X, q(X)).") + (list (quote zero) (quote N))) + (list {:N 0})) + + ;; sum with no matches → 0. + (dl-at-test-set! "sum empty" + (dl-query + (dl-program + "p(1). p(2). + total(T) :- sum(T, X, q(X)).") + (list (quote total) (quote T))) + (list {:T 0})) + + ;; min with no matches → rule does not fire. + (dl-at-test-set! "min empty" + (dl-query + (dl-program + "p(1). p(2). + lo(M) :- min(M, X, q(X)).") + (list (quote lo) (quote M))) + (list)) + + ;; Aggregate with comparison filter on result. + (dl-at-test-set! "popularity threshold" + (dl-query + (dl-program + "post(p1). post(p2). + liked(u1, p1). liked(u2, p1). liked(u3, p1). + liked(u1, p2). liked(u2, p2). + popular(P) :- post(P), count(N, U, liked(U, P)), >=(N, 3).") + (list (quote popular) (quote P))) + (list {:P (quote p1)})) + + ;; findall: collect distinct values into a list. + (dl-at-test-set! "findall over EDB" + (dl-query + (dl-program + "p(a). p(b). p(c). + all_p(L) :- findall(L, X, p(X)).") + (list (quote all_p) (quote L))) + (list {:L (list (quote a) (quote b) (quote c))})) + + (dl-at-test-set! "findall over derived" + (dl-query + (dl-program + "parent(a, b). parent(b, c). parent(c, d). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z). + desc(L) :- findall(L, X, ancestor(a, X)).") + (list (quote desc) (quote L))) + (list {:L (list (quote b) (quote c) (quote d))})) + + (dl-at-test-set! "findall empty" + (dl-query + (dl-program + "p(1). + all_q(L) :- findall(L, X, q(X)).") + (list (quote all_q) (quote L))) + (list {:L (list)})) + + ;; Aggregate vs single distinct. + ;; Group-by via aggregate-in-rule-body. Per-user friend count + ;; over a friends relation. The U var is bound by the prior + ;; positive lit u(U) so the aggregate counts only U-rooted + ;; friends per group. + (dl-at-test-set! "group-by per-user friend count" + (dl-query + (dl-program + "u(alice). u(bob). u(carol). + f(alice, x). f(alice, y). f(bob, x). + counts(U, N) :- u(U), count(N, X, f(U, X)).") + (list (quote counts) (quote U) (quote N))) + (list + {:U (quote alice) :N 2} + {:U (quote bob) :N 1} + {:U (quote carol) :N 0})) + + ;; Stratification: recursion through aggregation is rejected. + ;; Aggregate validates that second arg is a variable. + (dl-at-test! "agg second arg must be var" + (dl-at-throws? + (fn () (dl-eval "p(1). q(N) :- count(N, 5, p(X))." "?- q(N)."))) + true) + + ;; Aggregate validates that third arg is a positive literal. + (dl-at-test! "agg third arg must be a literal" + (dl-at-throws? + (fn () (dl-eval "p(1). q(N) :- count(N, X, 42)." "?- q(N)."))) + true) + + ;; Aggregate validates that the agg-var (2nd arg) appears in the + ;; goal. Without it every match contributes the same unbound + ;; symbol — count silently returns 1, sum raises a confusing + ;; "expected number" error, etc. Catch the mistake at safety + ;; check time instead. + (dl-at-test! "agg-var must appear in goal" + (dl-at-throws? + (fn () + (dl-eval + "p(1). p(2). c(N) :- count(N, Y, p(X))." + "?- c(N)."))) + true) + + ;; Indirect recursion through aggregation also rejected. + ;; q -> r (via positive lit), r -> q (via aggregate body). + ;; The aggregate edge counts as negation for stratification. + (dl-at-test! "indirect agg cycle rejected" + (dl-at-throws? + (fn () + (let ((db (dl-make-db))) + (do + (dl-add-rule! db + {:head (list (quote q) (quote N)) + :body (list (list (quote r) (quote N)))}) + (dl-add-rule! db + {:head (list (quote r) (quote N)) + :body (list (list (quote count) (quote N) (quote X) + (list (quote q) (quote X))))}) + (dl-saturate! db))))) + true) + + (dl-at-test! "agg recursion rejected" + (dl-at-throws? + (fn () + (let ((db (dl-make-db))) + (do + (dl-add-rule! db + {:head (list (quote q) (quote N)) + :body (list (list (quote count) (quote N) (quote X) + (list (quote q) (quote X))))}) + (dl-saturate! db))))) + true) + + ;; Negation + aggregation in the same body — different strata. + (dl-at-test-set! "neg + agg coexist" + (dl-query + (dl-program + "u(a). u(b). u(c). banned(b). + active(X) :- u(X), not(banned(X)). + cnt(N) :- count(N, X, active(X)).") + (list (quote cnt) (quote N))) + (list {:N 2})) + + ;; Min over a derived empty relation: no result. + (dl-at-test-set! "min over empty derived" + (dl-query + (dl-program + "s(50). s(60). + score(N) :- s(N), >(N, 100). + low(M) :- min(M, X, score(X)).") + (list (quote low) (quote M))) + (list)) + + ;; Aggregates as the top-level query goal (regression for + ;; dl-match-lit aggregate dispatch and projection cleanup). + (dl-at-test-set! "count as query goal" + (dl-query + (dl-program "p(1). p(2). p(3). p(4).") + (list (quote count) (quote N) (quote X) (list (quote p) (quote X)))) + (list {:N 4})) + + (dl-at-test-set! "findall as query goal" + (dl-query + (dl-program "p(1). p(2). p(3).") + (list (quote findall) (quote L) (quote X) + (list (quote p) (quote X)))) + (list {:L (list 1 2 3)})) + + (dl-at-test-set! "distinct counted once" + (dl-query + (dl-program + "rated(alice, x). rated(alice, y). rated(bob, x). + rater_count(N) :- count(N, U, rated(U, F)).") + (list (quote rater_count) (quote N))) + (list {:N 2}))))) + +(define + dl-aggregates-tests-run! + (fn + () + (do + (set! dl-at-pass 0) + (set! dl-at-fail 0) + (set! dl-at-failures (list)) + (dl-at-run-all!) + {:passed dl-at-pass + :failed dl-at-fail + :total (+ dl-at-pass dl-at-fail) + :failures dl-at-failures}))) diff --git a/lib/datalog/tests/api.sx b/lib/datalog/tests/api.sx new file mode 100644 index 00000000..df430d30 --- /dev/null +++ b/lib/datalog/tests/api.sx @@ -0,0 +1,350 @@ +;; lib/datalog/tests/api.sx — SX-data embedding API. + +(define dl-api-pass 0) +(define dl-api-fail 0) +(define dl-api-failures (list)) + +(define + dl-api-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-api-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-api-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-api-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-api-deep=? (nth a i) (nth b i))) false) + (else (dl-api-deq-l? a b (+ i 1)))))) + +(define + dl-api-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) + (not (dl-api-deep=? (get a k) (get b k)))) + false) + (else (dl-api-deq-d? a b ka (+ i 1)))))) + +(define + dl-api-set=? + (fn + (a b) + (and + (= (len a) (len b)) + (dl-api-subset? a b) + (dl-api-subset? b a)))) + +(define + dl-api-subset? + (fn + (xs ys) + (cond + ((= (len xs) 0) true) + ((not (dl-api-contains? ys (first xs))) false) + (else (dl-api-subset? (rest xs) ys))))) + +(define + dl-api-contains? + (fn + (xs target) + (cond + ((= (len xs) 0) false) + ((dl-api-deep=? (first xs) target) true) + (else (dl-api-contains? (rest xs) target))))) + +(define + dl-api-test! + (fn + (name got expected) + (if + (dl-api-deep=? got expected) + (set! dl-api-pass (+ dl-api-pass 1)) + (do + (set! dl-api-fail (+ dl-api-fail 1)) + (append! + dl-api-failures + (str + name + "\n expected: " expected + "\n got: " got)))))) + +(define + dl-api-test-set! + (fn + (name got expected) + (if + (dl-api-set=? got expected) + (set! dl-api-pass (+ dl-api-pass 1)) + (do + (set! dl-api-fail (+ dl-api-fail 1)) + (append! + dl-api-failures + (str + name + "\n expected (set): " expected + "\n got: " got)))))) + +(define + dl-api-run-all! + (fn + () + (do + ;; dl-program-data with arrow form. + (dl-api-test-set! "data API ancestor closure" + (dl-query + (dl-program-data + (quote ((parent tom bob) (parent bob ann) (parent ann pat))) + (quote + ((ancestor X Y <- (parent X Y)) + (ancestor X Z <- (parent X Y) (ancestor Y Z))))) + (quote (ancestor tom X))) + (list {:X (quote bob)} {:X (quote ann)} {:X (quote pat)})) + + ;; dl-program-data with dict rules. + (dl-api-test-set! "data API with dict rules" + (dl-query + (dl-program-data + (quote ((p a) (p b) (p c))) + (list + {:head (quote (q X)) :body (quote ((p X)))})) + (quote (q X))) + (list {:X (quote a)} {:X (quote b)} {:X (quote c)})) + + ;; dl-rule helper. + (dl-api-test-set! "dl-rule constructor" + (dl-query + (dl-program-data + (quote ((p 1) (p 2))) + (list (dl-rule (quote (q X)) (quote ((p X)))))) + (quote (q X))) + (list {:X 1} {:X 2})) + + ;; dl-assert! adds and re-derives. + (dl-api-test-set! "dl-assert! incremental" + (let + ((db (dl-program-data + (quote ((parent tom bob) (parent bob ann))) + (quote + ((ancestor X Y <- (parent X Y)) + (ancestor X Z <- (parent X Y) (ancestor Y Z))))))) + (do + (dl-saturate! db) + (dl-assert! db (quote (parent ann pat))) + (dl-query db (quote (ancestor tom X))))) + (list {:X (quote bob)} {:X (quote ann)} {:X (quote pat)})) + + ;; dl-retract! removes a fact and recomputes IDB. + (dl-api-test-set! "dl-retract! removes derived" + (let + ((db (dl-program-data + (quote ((parent tom bob) (parent bob ann) (parent ann pat))) + (quote + ((ancestor X Y <- (parent X Y)) + (ancestor X Z <- (parent X Y) (ancestor Y Z))))))) + (do + (dl-saturate! db) + (dl-retract! db (quote (parent bob ann))) + (dl-query db (quote (ancestor tom X))))) + (list {:X (quote bob)})) + + ;; dl-retract! on a relation with BOTH explicit facts AND a rule + ;; (a "mixed" relation) used to wipe the EDB portion when the IDB + ;; was re-derived, even when the retract didn't match anything. + ;; :edb-keys provenance now preserves user-asserted facts. + (dl-api-test-set! "dl-retract! preserves EDB in mixed relation" + (let + ((db (dl-program-data + (quote ((p a) (p b) (q c))) + (quote ((p X <- (q X))))))) + (do + (dl-saturate! db) + ;; Retract a non-existent tuple — should be a no-op. + (dl-retract! db (quote (p z))) + (dl-query db (quote (p X))))) + (list {:X (quote a)} {:X (quote b)} {:X (quote c)})) + + ;; And retracting an actual EDB fact in a mixed relation drops + ;; only that fact; the derived portion stays. + (dl-api-test-set! "dl-retract! mixed: drop EDB, keep IDB" + (let + ((db (dl-program-data + (quote ((p a) (p b) (q c))) + (quote ((p X <- (q X))))))) + (do + (dl-saturate! db) + (dl-retract! db (quote (p a))) + (dl-query db (quote (p X))))) + (list {:X (quote b)} {:X (quote c)})) + + ;; dl-program-data + dl-query with constants in head. + (dl-api-test-set! "constant-in-head data" + (dl-query + (dl-program-data + (quote ((edge a b) (edge b c) (edge c a))) + (quote + ((reach X Y <- (edge X Y)) + (reach X Z <- (edge X Y) (reach Y Z))))) + (quote (reach a X))) + (list {:X (quote a)} {:X (quote b)} {:X (quote c)})) + + ;; Assert into empty db. + (dl-api-test-set! "assert into empty" + (let + ((db (dl-program-data (list) (list)))) + (do + (dl-assert! db (quote (p 1))) + (dl-assert! db (quote (p 2))) + (dl-query db (quote (p X))))) + (list {:X 1} {:X 2})) + + ;; Multi-goal query: pass list of literals. + (dl-api-test-set! "multi-goal query" + (dl-query + (dl-program-data + (quote ((p 1) (p 2) (p 3) (q 2) (q 3))) + (list)) + (list (quote (p X)) (quote (q X)))) + (list {:X 2} {:X 3})) + + ;; Multi-goal with comparison. + (dl-api-test-set! "multi-goal with comparison" + (dl-query + (dl-program-data + (quote ((n 1) (n 2) (n 3) (n 4) (n 5))) + (list)) + (list (quote (n X)) (list (string->symbol ">") (quote X) 2))) + (list {:X 3} {:X 4} {:X 5})) + + ;; dl-eval: single-call source + query. + (dl-api-test-set! "dl-eval ancestor" + (dl-eval + "parent(a, b). parent(b, c). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)." + "?- ancestor(a, X).") + (list {:X (quote b)} {:X (quote c)})) + + (dl-api-test-set! "dl-eval multi-goal" + (dl-eval + "p(1). p(2). p(3). q(2). q(3)." + "?- p(X), q(X).") + (list {:X 2} {:X 3})) + + ;; dl-rules-of: rules with head matching a relation name. + (dl-api-test! "dl-rules-of count" + (let + ((db (dl-program + "p(1). q(X) :- p(X). r(X) :- p(X). q(2)."))) + (len (dl-rules-of db "q"))) + 1) + + (dl-api-test! "dl-rules-of empty" + (let + ((db (dl-program "p(1). p(2)."))) + (len (dl-rules-of db "q"))) + 0) + + ;; dl-clear-idb!: wipe rule-headed relations. + (dl-api-test! "dl-clear-idb! wipes IDB" + (let + ((db (dl-program + "parent(a, b). parent(b, c). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (do + (dl-saturate! db) + (dl-clear-idb! db) + (len (dl-relation db "ancestor")))) + 0) + + (dl-api-test! "dl-clear-idb! preserves EDB" + (let + ((db (dl-program + "parent(a, b). parent(b, c). + ancestor(X, Y) :- parent(X, Y)."))) + (do + (dl-saturate! db) + (dl-clear-idb! db) + (len (dl-relation db "parent")))) + 2) + + ;; dl-eval-magic — routes single-goal queries through + ;; magic-sets evaluation. + (dl-api-test-set! "dl-eval-magic ancestor" + (dl-eval-magic + "parent(a, b). parent(b, c). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)." + "?- ancestor(a, X).") + (list {:X (quote b)} {:X (quote c)})) + + ;; Equivalence: dl-eval and dl-eval-magic produce the same + ;; answers for any well-formed query (magic-sets is a perf + ;; alternative, not a semantic change). + (dl-api-test! "dl-eval ≡ dl-eval-magic on ancestor" + (let + ((source "parent(a, b). parent(b, c). parent(c, d). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).")) + (let + ((semi (dl-eval source "?- ancestor(a, X).")) + (magic (dl-eval-magic source "?- ancestor(a, X)."))) + (= (len semi) (len magic)))) + true) + + ;; Comprehensive integration: recursion + stratified negation + ;; + aggregation + comparison composed in a single program. + ;; (Uses _Anything as a regular var instead of `_` so the + ;; outer rule binds via the reach lit.) + (dl-api-test-set! "integration" + (dl-eval + (str + "edge(a, b). edge(b, c). edge(c, d). edge(a, d). " + "banned(c). " + "reach(X, Y) :- edge(X, Y). " + "reach(X, Z) :- edge(X, Y), reach(Y, Z). " + "safe(X, Y) :- reach(X, Y), not(banned(Y)). " + "reach_count(X, N) :- reach(X, Z), count(N, Y, safe(X, Y)). " + "popular(X) :- reach_count(X, N), >=(N, 2).") + "?- popular(X).") + (list {:X (quote a)})) + + ;; dl-rule-from-list with no arrow → fact-style. + (dl-api-test-set! "no arrow → fact-like rule" + (let + ((rule (dl-rule-from-list (quote (foo X Y))))) + (list rule)) + (list {:head (quote (foo X Y)) :body (list)})) + + ;; dl-coerce-rule on dict passes through. + (dl-api-test-set! "coerce dict rule" + (let + ((d {:head (quote (h X)) :body (quote ((b X)))})) + (list (dl-coerce-rule d))) + (list {:head (quote (h X)) :body (quote ((b X)))}))))) + +(define + dl-api-tests-run! + (fn + () + (do + (set! dl-api-pass 0) + (set! dl-api-fail 0) + (set! dl-api-failures (list)) + (dl-api-run-all!) + {:passed dl-api-pass + :failed dl-api-fail + :total (+ dl-api-pass dl-api-fail) + :failures dl-api-failures}))) diff --git a/lib/datalog/tests/builtins.sx b/lib/datalog/tests/builtins.sx new file mode 100644 index 00000000..13bb2709 --- /dev/null +++ b/lib/datalog/tests/builtins.sx @@ -0,0 +1,285 @@ +;; lib/datalog/tests/builtins.sx — comparison + arithmetic body literals. + +(define dl-bt-pass 0) +(define dl-bt-fail 0) +(define dl-bt-failures (list)) + +(define + dl-bt-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-bt-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let + ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-bt-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-bt-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-bt-deep=? (nth a i) (nth b i))) false) + (else (dl-bt-deq-l? a b (+ i 1)))))) + +(define + dl-bt-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) (not (dl-bt-deep=? (get a k) (get b k)))) + false) + (else (dl-bt-deq-d? a b ka (+ i 1)))))) + +(define + dl-bt-set=? + (fn + (a b) + (and (= (len a) (len b)) (dl-bt-subset? a b) (dl-bt-subset? b a)))) + +(define + dl-bt-subset? + (fn + (xs ys) + (cond + ((= (len xs) 0) true) + ((not (dl-bt-contains? ys (first xs))) false) + (else (dl-bt-subset? (rest xs) ys))))) + +(define + dl-bt-contains? + (fn + (xs target) + (cond + ((= (len xs) 0) false) + ((dl-bt-deep=? (first xs) target) true) + (else (dl-bt-contains? (rest xs) target))))) + +(define + dl-bt-test-set! + (fn + (name got expected) + (if + (dl-bt-set=? got expected) + (set! dl-bt-pass (+ dl-bt-pass 1)) + (do + (set! dl-bt-fail (+ dl-bt-fail 1)) + (append! + dl-bt-failures + (str + name + "\n expected (set): " + expected + "\n got: " + got)))))) + +(define + dl-bt-test! + (fn + (name got expected) + (if + (dl-bt-deep=? got expected) + (set! dl-bt-pass (+ dl-bt-pass 1)) + (do + (set! dl-bt-fail (+ dl-bt-fail 1)) + (append! + dl-bt-failures + (str name "\n expected: " expected "\n got: " got)))))) + +(define + dl-bt-throws? + (fn + (thunk) + (let + ((threw false)) + (do (guard (e (#t (set! threw true))) (thunk)) threw)))) + +(define + dl-bt-run-all! + (fn + () + (do + (dl-bt-test-set! + "less than filter" + (dl-query + (dl-program + "age(alice, 30). age(bob, 17). age(carol, 22).\n adult(X) :- age(X, A), >=(A, 18).") + (list (quote adult) (quote X))) + (list {:X (quote alice)} {:X (quote carol)})) + (dl-bt-test-set! + "less-equal filter" + (dl-query + (dl-program + "n(1). n(2). n(3). n(4). n(5).\n small(X) :- n(X), <=(X, 3).") + (list (quote small) (quote X))) + (list {:X 1} {:X 2} {:X 3})) + (dl-bt-test-set! + "not-equal filter" + (dl-query + (dl-program + "p(1, 2). p(2, 2). p(3, 4).\n diff(X, Y) :- p(X, Y), !=(X, Y).") + (list (quote diff) (quote X) (quote Y))) + (list {:X 1 :Y 2} {:X 3 :Y 4})) + (dl-bt-test-set! + "is plus" + (dl-query + (dl-program + "n(1). n(2). n(3).\n succ(X, Y) :- n(X), is(Y, +(X, 1)).") + (list (quote succ) (quote X) (quote Y))) + (list {:X 1 :Y 2} {:X 2 :Y 3} {:X 3 :Y 4})) + (dl-bt-test-set! + "is multiply" + (dl-query + (dl-program + "n(2). n(3). n(4).\n square(X, Y) :- n(X), is(Y, *(X, X)).") + (list (quote square) (quote X) (quote Y))) + (list {:X 2 :Y 4} {:X 3 :Y 9} {:X 4 :Y 16})) + (dl-bt-test-set! + "is nested expr" + (dl-query + (dl-program + "n(1). n(2). n(3).\n f(X, Y) :- n(X), is(Y, *(+(X, 1), 2)).") + (list (quote f) (quote X) (quote Y))) + (list {:X 1 :Y 4} {:X 2 :Y 6} {:X 3 :Y 8})) + (dl-bt-test-set! + "is bound LHS — equality" + (dl-query + (dl-program + "n(1, 2). n(2, 5). n(3, 4).\n succ(X, Y) :- n(X, Y), is(Y, +(X, 1)).") + (list (quote succ) (quote X) (quote Y))) + (list {:X 1 :Y 2} {:X 3 :Y 4})) + (dl-bt-test-set! + "triple via is" + (dl-query + (dl-program + "n(1). n(2). n(3).\n triple(X, Y) :- n(X), is(Y, *(X, 3)).") + (list (quote triple) (quote X) (quote Y))) + (list {:X 1 :Y 3} {:X 2 :Y 6} {:X 3 :Y 9})) + (dl-bt-test-set! + "= unifies var with constant" + (dl-query + (dl-program "p(a). p(b).\n qual(X) :- p(X), =(X, a).") + (list (quote qual) (quote X))) + (list {:X (quote a)})) + (dl-bt-test-set! + "= unifies two vars (one bound)" + (dl-query + (dl-program "p(a). p(b).\n twin(X, Y) :- p(X), =(Y, X).") + (list (quote twin) (quote X) (quote Y))) + (list {:X (quote a) :Y (quote a)} {:X (quote b) :Y (quote b)})) + (dl-bt-test! + "big count" + (let + ((db (dl-program "n(0). n(1). n(2). n(3). n(4). n(5). n(6). n(7). n(8). n(9).\n big(X) :- n(X), >=(X, 5)."))) + (do (dl-saturate! db) (len (dl-relation db "big")))) + 5) + ;; Built-in / arithmetic literals work as standalone query goals + ;; (without needing a wrapper rule). + (dl-bt-test-set! "comparison-only goal true" + (dl-eval "" "?- <(1, 2).") + (list {})) + + (dl-bt-test-set! "comparison-only goal false" + (dl-eval "" "?- <(2, 1).") + (list)) + + (dl-bt-test-set! "is goal binds" + (dl-eval "" "?- is(N, +(2, 3)).") + (list {:N 5})) + + ;; Bounded successor: a recursive rule with a comparison + ;; guard terminates because the Herbrand base is effectively + ;; bounded. + (dl-bt-test-set! "bounded successor" + (dl-query + (dl-program + "nat(0). + nat(Y) :- nat(X), is(Y, +(X, 1)), <(Y, 5).") + (list (quote nat) (quote X))) + (list {:X 0} {:X 1} {:X 2} {:X 3} {:X 4})) + + (dl-bt-test! + "unsafe — comparison without binder" + (dl-bt-throws? (fn () (dl-program "p(X) :- <(X, 5)."))) + true) + (dl-bt-test! + "unsafe — comparison both unbound" + (dl-bt-throws? (fn () (dl-program "p(X, Y) :- <(X, Y), q(X)."))) + true) + (dl-bt-test! + "unsafe — is uses unbound RHS var" + (dl-bt-throws? + (fn () (dl-program "p(X, Y) :- q(X), is(Y, +(X, Z))."))) + true) + (dl-bt-test! + "unsafe — is on its own" + (dl-bt-throws? (fn () (dl-program "p(Y) :- is(Y, +(X, 1))."))) + true) + (dl-bt-test! + "unsafe — = between two unbound" + (dl-bt-throws? (fn () (dl-program "p(X, Y) :- =(X, Y)."))) + true) + (dl-bt-test! + "safe — is binds head var" + (dl-bt-throws? + (fn () (dl-program "n(1). p(Y) :- n(X), is(Y, +(X, 1))."))) + false) + (dl-bt-test! + "safe — comparison after binder" + (dl-bt-throws? + (fn () (dl-program "n(1). big(X) :- n(X), >=(X, 0)."))) + false) + (dl-bt-test! + "safe — = binds head var" + (dl-bt-throws? + (fn () (dl-program "p(a). p(b). x(Y) :- p(X), =(Y, X)."))) + false) + + ;; Division by zero raises with a clear error. Without this guard + ;; SX's `/` returned IEEE infinity, which then silently flowed + ;; through comparisons and aggregations. + (dl-bt-test! + "is — division by zero raises" + (dl-bt-throws? + (fn () + (dl-eval "p(10). q(R) :- p(X), is(R, /(X, 0))." "?- q(R)."))) + true) + + ;; Comparison ops `<`, `<=`, `>`, `>=` require both operands to + ;; have the same primitive type. Cross-type comparisons used to + ;; silently return false (for some pairs) or raise a confusing + ;; host-level error (for others) — now they all raise with a + ;; message that names the offending values. + (dl-bt-test! + "comparison — string vs number raises" + (dl-bt-throws? + (fn () + (dl-eval "p(\"hello\"). q(X) :- p(X), <(X, 5)." "?- q(X)."))) + true) + + ;; `!=` is the exception — it's a polymorphic inequality test + ;; (uses dl-tuple-equal? underneath) so cross-type pairs are + ;; legitimate (and trivially unequal). + (dl-bt-test-set! "!= works across types" + (dl-query + (dl-program + "p(1). p(\"1\"). q(X) :- p(X), !=(X, 1).") + (quote (q X))) + (list {:X "1"}))))) + +(define + dl-builtins-tests-run! + (fn + () + (do + (set! dl-bt-pass 0) + (set! dl-bt-fail 0) + (set! dl-bt-failures (list)) + (dl-bt-run-all!) + {:failures dl-bt-failures :total (+ dl-bt-pass dl-bt-fail) :passed dl-bt-pass :failed dl-bt-fail}))) diff --git a/lib/datalog/tests/demo.sx b/lib/datalog/tests/demo.sx new file mode 100644 index 00000000..c893ade3 --- /dev/null +++ b/lib/datalog/tests/demo.sx @@ -0,0 +1,321 @@ +;; lib/datalog/tests/demo.sx — Phase 10 demo programs. + +(define dl-demo-pass 0) +(define dl-demo-fail 0) +(define dl-demo-failures (list)) + +(define + dl-demo-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-demo-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-demo-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-demo-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-demo-deep=? (nth a i) (nth b i))) false) + (else (dl-demo-deq-l? a b (+ i 1)))))) + +(define + dl-demo-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) + (not (dl-demo-deep=? (get a k) (get b k)))) + false) + (else (dl-demo-deq-d? a b ka (+ i 1)))))) + +(define + dl-demo-set=? + (fn + (a b) + (and + (= (len a) (len b)) + (dl-demo-subset? a b) + (dl-demo-subset? b a)))) + +(define + dl-demo-subset? + (fn + (xs ys) + (cond + ((= (len xs) 0) true) + ((not (dl-demo-contains? ys (first xs))) false) + (else (dl-demo-subset? (rest xs) ys))))) + +(define + dl-demo-contains? + (fn + (xs target) + (cond + ((= (len xs) 0) false) + ((dl-demo-deep=? (first xs) target) true) + (else (dl-demo-contains? (rest xs) target))))) + +(define + dl-demo-test-set! + (fn + (name got expected) + (if + (dl-demo-set=? got expected) + (set! dl-demo-pass (+ dl-demo-pass 1)) + (do + (set! dl-demo-fail (+ dl-demo-fail 1)) + (append! + dl-demo-failures + (str + name + "\n expected (set): " expected + "\n got: " got)))))) + +(define + dl-demo-run-all! + (fn + () + (do + ;; ── Federation ────────────────────────────────────────── + (dl-demo-test-set! "mutuals" + (dl-query + (dl-demo-make + (quote ((follows alice bob) (follows bob alice) + (follows bob carol) (follows carol dave))) + dl-demo-federation-rules) + (quote (mutual alice X))) + (list {:X (quote bob)})) + + (dl-demo-test-set! "reachable transitive" + (dl-query + (dl-demo-make + (quote ((follows alice bob) (follows bob carol) (follows carol dave))) + dl-demo-federation-rules) + (quote (reachable alice X))) + (list {:X (quote bob)} {:X (quote carol)} {:X (quote dave)})) + + (dl-demo-test-set! "foaf" + (dl-query + (dl-demo-make + (quote ((follows alice bob) (follows bob carol) (follows alice dave))) + dl-demo-federation-rules) + (quote (foaf alice X))) + (list {:X (quote carol)})) + + ;; ── Content ───────────────────────────────────────────── + (dl-demo-test-set! "popular posts" + (dl-query + (dl-demo-make + (quote + ((authored alice p1) (authored bob p2) (authored carol p3) + (liked u1 p1) (liked u2 p1) (liked u3 p1) + (liked u1 p2))) + dl-demo-content-rules) + (quote (popular P))) + (list {:P (quote p1)})) + + (dl-demo-test-set! "interesting feed" + (dl-query + (dl-demo-make + (quote + ((follows me alice) (follows me bob) + (authored alice p1) (authored bob p2) + (liked u1 p1) (liked u2 p1) (liked u3 p1) + (liked u4 p2))) + dl-demo-content-rules) + (quote (interesting me P))) + (list {:P (quote p1)})) + + (dl-demo-test-set! "post likes count" + (dl-query + (dl-demo-make + (quote + ((authored alice p1) + (liked u1 p1) (liked u2 p1) (liked u3 p1))) + dl-demo-content-rules) + (quote (post-likes p1 N))) + (list {:N 3})) + + ;; ── Permissions ───────────────────────────────────────── + (dl-demo-test-set! "direct group access" + (dl-query + (dl-demo-make + (quote + ((member alice editors) + (allowed editors blog))) + dl-demo-perm-rules) + (quote (can-access X blog))) + (list {:X (quote alice)})) + + (dl-demo-test-set! "subgroup access" + (dl-query + (dl-demo-make + (quote + ((member bob writers) + (subgroup writers editors) + (allowed editors blog))) + dl-demo-perm-rules) + (quote (can-access X blog))) + (list {:X (quote bob)})) + + (dl-demo-test-set! "transitive subgroup" + (dl-query + (dl-demo-make + (quote + ((member carol drafters) + (subgroup drafters writers) + (subgroup writers editors) + (allowed editors blog))) + dl-demo-perm-rules) + (quote (can-access X blog))) + (list {:X (quote carol)})) + + ;; ── Cooking posts (canonical Phase 10 example) ───────── + (dl-demo-test-set! "cooking posts by network" + (dl-query + (dl-demo-make + (quote + ((follows me alice) (follows alice bob) (follows alice carol) + (authored alice p1) (authored bob p2) + (authored carol p3) (authored carol p4) + (tagged p1 travel) (tagged p2 cooking) + (tagged p3 cooking) (tagged p4 books))) + dl-demo-cooking-rules) + (quote (cooking-post-by-network me P))) + (list {:P (quote p2)} {:P (quote p3)})) + + (dl-demo-test-set! "cooking — direct follow only" + (dl-query + (dl-demo-make + (quote + ((follows me bob) + (authored bob p1) (authored bob p2) + (tagged p1 cooking) (tagged p2 books))) + dl-demo-cooking-rules) + (quote (cooking-post-by-network me P))) + (list {:P (quote p1)})) + + (dl-demo-test-set! "cooking — none in network" + (dl-query + (dl-demo-make + (quote + ((follows me bob) + (authored bob p1) (tagged p1 books))) + dl-demo-cooking-rules) + (quote (cooking-post-by-network me P))) + (list)) + + ;; ── Tag co-occurrence ────────────────────────────────── + (dl-demo-test-set! "cotagged posts" + (dl-query + (dl-demo-make + (quote + ((tagged p1 cooking) (tagged p1 vegetarian) + (tagged p2 cooking) (tagged p2 quick) + (tagged p3 vegetarian))) + dl-demo-tag-cooccur-rules) + (quote (cotagged P cooking vegetarian))) + (list {:P (quote p1)})) + + (dl-demo-test-set! "tag pair count" + (dl-query + (dl-demo-make + (quote + ((tagged p1 cooking) (tagged p1 vegetarian) + (tagged p2 cooking) (tagged p2 quick) + (tagged p3 cooking) (tagged p3 vegetarian))) + dl-demo-tag-cooccur-rules) + (quote (tag-pair-count cooking vegetarian N))) + (list {:N 2})) + + ;; ── Shortest path on a weighted DAG ────────────────── + (dl-demo-test-set! "shortest a→d via DAG" + (dl-query + (dl-demo-make + (quote ((edge a b 5) (edge b c 3) (edge a c 10) (edge c d 2))) + dl-demo-shortest-path-rules) + (quote (shortest a d W))) + (list {:W 10})) + + (dl-demo-test-set! "shortest a→c picks 2-hop" + (dl-query + (dl-demo-make + (quote ((edge a b 5) (edge b c 3) (edge a c 10))) + dl-demo-shortest-path-rules) + (quote (shortest a c W))) + (list {:W 8})) + + (dl-demo-test-set! "shortest unreachable empty" + (dl-query + (dl-demo-make + (quote ((edge a b 5) (edge b c 3))) + dl-demo-shortest-path-rules) + (quote (shortest a d W))) + (list)) + + ;; ── Org chart + headcount ───────────────────────────── + (dl-demo-test-set! "ceo subordinate transitive" + (dl-query + (dl-demo-make + (quote + ((manager ic1 mgr1) (manager ic2 mgr1) + (manager mgr1 vp1) (manager ic3 vp1) + (manager vp1 ceo))) + dl-demo-org-rules) + (quote (subordinate ceo X))) + (list + {:X (quote vp1)} {:X (quote mgr1)} {:X (quote ic1)} + {:X (quote ic2)} {:X (quote ic3)})) + + (dl-demo-test-set! "ceo headcount = 5" + (dl-query + (dl-demo-make + (quote + ((manager ic1 mgr1) (manager ic2 mgr1) + (manager mgr1 vp1) (manager ic3 vp1) + (manager vp1 ceo))) + dl-demo-org-rules) + (quote (headcount ceo N))) + (list {:N 5})) + + (dl-demo-test-set! "mgr1 headcount = 2" + (dl-query + (dl-demo-make + (quote + ((manager ic1 mgr1) (manager ic2 mgr1) + (manager mgr1 vp1) (manager ic3 vp1) + (manager vp1 ceo))) + dl-demo-org-rules) + (quote (headcount mgr1 N))) + (list {:N 2})) + + (dl-demo-test-set! "no access without grant" + (dl-query + (dl-demo-make + (quote ((member dave outsiders) (allowed editors blog))) + dl-demo-perm-rules) + (quote (can-access X blog))) + (list))))) + +(define + dl-demo-tests-run! + (fn + () + (do + (set! dl-demo-pass 0) + (set! dl-demo-fail 0) + (set! dl-demo-failures (list)) + (dl-demo-run-all!) + {:passed dl-demo-pass + :failed dl-demo-fail + :total (+ dl-demo-pass dl-demo-fail) + :failures dl-demo-failures}))) diff --git a/lib/datalog/tests/eval.sx b/lib/datalog/tests/eval.sx new file mode 100644 index 00000000..2625f04e --- /dev/null +++ b/lib/datalog/tests/eval.sx @@ -0,0 +1,463 @@ +;; lib/datalog/tests/eval.sx — naive evaluation + safety analysis tests. + +(define dl-et-pass 0) +(define dl-et-fail 0) +(define dl-et-failures (list)) + +;; Same deep-equal helper used in other suites. +(define + dl-et-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-et-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let + ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-et-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-et-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-et-deep=? (nth a i) (nth b i))) false) + (else (dl-et-deq-l? a b (+ i 1)))))) + +(define + dl-et-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) (not (dl-et-deep=? (get a k) (get b k)))) + false) + (else (dl-et-deq-d? a b ka (+ i 1)))))) + +;; Set-equality on lists (order-independent, uses dl-et-deep=?). +(define + dl-et-set=? + (fn + (a b) + (and (= (len a) (len b)) (dl-et-subset? a b) (dl-et-subset? b a)))) + +(define + dl-et-subset? + (fn + (xs ys) + (cond + ((= (len xs) 0) true) + ((not (dl-et-contains? ys (first xs))) false) + (else (dl-et-subset? (rest xs) ys))))) + +(define + dl-et-contains? + (fn + (xs target) + (cond + ((= (len xs) 0) false) + ((dl-et-deep=? (first xs) target) true) + (else (dl-et-contains? (rest xs) target))))) + +(define + dl-et-test! + (fn + (name got expected) + (if + (dl-et-deep=? got expected) + (set! dl-et-pass (+ dl-et-pass 1)) + (do + (set! dl-et-fail (+ dl-et-fail 1)) + (append! + dl-et-failures + (str name "\n expected: " expected "\n got: " got)))))) + +(define + dl-et-test-set! + (fn + (name got expected) + (if + (dl-et-set=? got expected) + (set! dl-et-pass (+ dl-et-pass 1)) + (do + (set! dl-et-fail (+ dl-et-fail 1)) + (append! + dl-et-failures + (str + name + "\n expected (set): " + expected + "\n got: " + got)))))) + +(define + dl-et-throws? + (fn + (thunk) + (let + ((threw false)) + (do (guard (e (#t (set! threw true))) (thunk)) threw)))) + +(define + dl-et-run-all! + (fn + () + (do + (dl-et-test-set! + "fact lookup any" + (dl-query + (dl-program "parent(tom, bob). parent(bob, ann).") + (list (quote parent) (quote X) (quote Y))) + (list {:X (quote tom) :Y (quote bob)} {:X (quote bob) :Y (quote ann)})) + (dl-et-test-set! + "fact lookup constant arg" + (dl-query + (dl-program "parent(tom, bob). parent(tom, liz). parent(bob, ann).") + (list (quote parent) (quote tom) (quote Y))) + (list {:Y (quote bob)} {:Y (quote liz)})) + (dl-et-test-set! + "no match" + (dl-query + (dl-program "parent(tom, bob).") + (list (quote parent) (quote nobody) (quote X))) + (list)) + (dl-et-test-set! + "ancestor closure" + (dl-query + (dl-program + "parent(tom, bob). parent(bob, ann). parent(ann, pat).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).") + (list (quote ancestor) (quote tom) (quote X))) + (list {:X (quote bob)} {:X (quote ann)} {:X (quote pat)})) + (dl-et-test-set! + "sibling" + (dl-query + (dl-program + "parent(tom, bob). parent(tom, liz). parent(jane, bob). parent(jane, liz).\n sibling(X, Y) :- parent(P, X), parent(P, Y).") + (list (quote sibling) (quote bob) (quote Y))) + (list {:Y (quote bob)} {:Y (quote liz)})) + (dl-et-test-set! + "same-generation" + (dl-query + (dl-program + "parent(tom, bob). parent(tom, liz). parent(bob, ann). parent(liz, joe).\n person(tom). person(bob). person(liz). person(ann). person(joe).\n sg(X, X) :- person(X).\n sg(X, Y) :- parent(P1, X), sg(P1, P2), parent(P2, Y).") + (list (quote sg) (quote ann) (quote X))) + (list {:X (quote ann)} {:X (quote joe)})) + (dl-et-test! + "ancestor count" + (let + ((db (dl-program "parent(a, b). parent(b, c). parent(c, d).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (do (dl-saturate! db) (len (dl-relation db "ancestor")))) + 6) + (dl-et-test-set! + "grandparent" + (dl-query + (dl-program + "parent(a, b). parent(b, c). parent(c, d).\n grandparent(X, Z) :- parent(X, Y), parent(Y, Z).") + (list (quote grandparent) (quote X) (quote Y))) + (list {:X (quote a) :Y (quote c)} {:X (quote b) :Y (quote d)})) + (dl-et-test! + "no recursion infinite loop" + (let + ((db (dl-program "edge(1, 2). edge(2, 3). edge(3, 1).\n reach(X, Y) :- edge(X, Y).\n reach(X, Z) :- edge(X, Y), reach(Y, Z)."))) + (do (dl-saturate! db) (len (dl-relation db "reach")))) + 9) + ;; Rule-shape sanity: empty-list head and non-list body raise + ;; clear errors rather than crashing inside the saturator. + (dl-et-test! "empty head rejected" + (dl-et-throws? + (fn () + (dl-add-rule! (dl-make-db) + {:head (list) :body (list)}))) + true) + + (dl-et-test! "non-list body rejected" + (dl-et-throws? + (fn () + (dl-add-rule! (dl-make-db) + {:head (list (quote p) (quote X)) :body 42}))) + true) + + ;; Reserved relation names rejected as rule/fact heads. + (dl-et-test! + "reserved name `not` as head rejected" + (dl-et-throws? (fn () (dl-program "not(X) :- p(X)."))) + true) + + (dl-et-test! + "reserved name `count` as head rejected" + (dl-et-throws? + (fn () (dl-program "count(N, X, p(X)) :- p(X)."))) + true) + + (dl-et-test! + "reserved name `<` as head rejected" + (dl-et-throws? (fn () (dl-program "<(X, 5) :- p(X)."))) + true) + + (dl-et-test! + "reserved name `is` as head rejected" + (dl-et-throws? (fn () (dl-program "is(N, +(1, 2)) :- p(N)."))) + true) + + ;; Body literal with a reserved-name positive head is rejected. + ;; The parser only treats outer-level `not(P)` as negation; nested + ;; `not(not(P))` would otherwise silently parse as a positive call + ;; to a relation named `not` and succeed vacuously. The safety + ;; checker now flags this so the user gets a clear error. + ;; Body literal with a reserved-name positive head is rejected. + ;; The parser only treats outer-level `not(P)` as negation; nested + ;; `not(not(P))` would otherwise silently parse as a positive call + ;; to a relation named `not` and succeed vacuously — so the safety + ;; checker now flags this to give the user a clear error. + (dl-et-test! + "nested not(not(...)) rejected" + (dl-et-throws? + (fn () + (dl-program + "banned(a). u(a). vip(X) :- u(X), not(not(banned(X)))."))) + true) + + ;; A dict body literal that isn't `{:neg ...}` is almost always a + ;; typo — it would otherwise silently fall through to a confusing + ;; head-var-unbound safety error. Now caught with a clear message. + (dl-et-test! + "dict body lit without :neg rejected" + (dl-et-throws? + (fn () + (let ((db (dl-make-db))) + (dl-add-rule! db + {:head (list (quote p) (quote X)) + :body (list {:weird "stuff"})})))) + true) + + ;; Facts may only have simple-term args (number / string / symbol). + ;; A compound arg like `+(1, 2)` would otherwise be silently + ;; stored as the unreduced expression `(+ 1 2)` because dl-ground? + ;; sees no free variables. + (dl-et-test! + "compound arg in fact rejected" + (dl-et-throws? (fn () (dl-program "p(+(1, 2))."))) + true) + + ;; Rule heads may only have variable or constant args — no + ;; compounds. Compound heads would be saturated as unreduced + ;; tuples rather than the arithmetic result the user expected. + (dl-et-test! + "compound arg in rule head rejected" + (dl-et-throws? + (fn () (dl-program "n(3). double(*(X, 2)) :- n(X)."))) + true) + + ;; The anonymous-variable renamer used to start at `_anon1` + ;; unconditionally; a rule that wrote `q(_anon1) :- p(_anon1, _)` + ;; (the user picking the same name the renamer would generate) + ;; would see the `_` renamed to `_anon1` too, collapsing the + ;; two positions in `p(_anon1, _)` to a single var. Now the + ;; renamer scans the rule for the max `_anon` and starts past + ;; it, so user-written names of that form are preserved. + (dl-et-test-set! "anonymous-rename avoids user `_anon` collision" + (dl-query + (dl-program + "p(a, b). p(c, d). q(_anon1) :- p(_anon1, _).") + (quote (q X))) + (list {:X (quote a)} {:X (quote c)})) + + (dl-et-test! + "unsafe head var" + (dl-et-throws? (fn () (dl-program "p(X, Y) :- q(X)."))) + true) + (dl-et-test! + "unsafe — empty body" + (dl-et-throws? (fn () (dl-program "p(X) :- ."))) + true) + ;; Underscore in head is unsafe — it's a fresh existential per + ;; occurrence after Phase 5d's anonymous-var renaming, and there's + ;; nothing in the body to bind it. (Old behavior accepted this by + ;; treating '_' as a literal name to skip; the renaming made it an + ;; ordinary unbound variable.) + (dl-et-test! + "underscore in head — unsafe" + (dl-et-throws? (fn () (dl-program "p(X, _) :- q(X)."))) + true) + (dl-et-test! + "underscore in body only — safe" + (dl-et-throws? (fn () (dl-program "p(X) :- q(X, _)."))) + false) + (dl-et-test! + "var only in head — unsafe" + (dl-et-throws? (fn () (dl-program "p(X, Y) :- q(Z)."))) + true) + (dl-et-test! + "head var bound by body" + (dl-et-throws? (fn () (dl-program "p(X) :- q(X)."))) + false) + (dl-et-test! + "head subset of body" + (dl-et-throws? + (fn + () + (dl-program + "edge(a,b). edge(b,c). reach(X, Z) :- edge(X, Y), edge(Y, Z)."))) + false) + + ;; Anonymous variables: each occurrence must be independent. + (dl-et-test-set! "anon vars in rule are independent" + (dl-query + (dl-program + "p(a, b). p(c, d). q(X) :- p(X, _), p(_, Y).") + (list (quote q) (quote X))) + (list {:X (quote a)} {:X (quote c)})) + + (dl-et-test-set! "anon vars in goal are independent" + (dl-query + (dl-program "p(1, 2, 3). p(4, 5, 6).") + (list (quote p) (quote _) (quote X) (quote _))) + (list {:X 2} {:X 5})) + + ;; dl-summary: relation -> tuple-count for inspection. + (dl-et-test! "dl-summary basic" + (dl-summary + (let + ((db (dl-program "p(1). p(2). q(3)."))) + (do (dl-saturate! db) db))) + {:p 2 :q 1}) + + (dl-et-test! "dl-summary empty IDB shown" + (dl-summary + (let + ((db (dl-program "r(X) :- s(X)."))) + (do (dl-saturate! db) db))) + {:r 0}) + + (dl-et-test! "dl-summary mixed EDB and IDB" + (dl-summary + (let + ((db (dl-program + "parent(a, b). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (do (dl-saturate! db) db))) + {:parent 1 :ancestor 1}) + + (dl-et-test! "dl-summary empty db" + (dl-summary (dl-make-db)) + {}) + + ;; Strategy hook: default semi-naive; :magic accepted but + ;; falls back to semi-naive (the transformation itself is + ;; deferred — Phase 6 in plan). + (dl-et-test! "default strategy" + (dl-get-strategy (dl-make-db)) + :semi-naive) + + (dl-et-test! "set strategy" + (let ((db (dl-make-db))) + (do (dl-set-strategy! db :magic) (dl-get-strategy db))) + :magic) + + ;; Unknown strategy values are rejected so typos don't silently + ;; fall back to the default. + (dl-et-test! + "unknown strategy rejected" + (dl-et-throws? + (fn () + (let ((db (dl-make-db))) + (dl-set-strategy! db :semi_naive)))) + true) + + ;; dl-saturated?: no-work-left predicate. + (dl-et-test! "saturated? after saturation" + (let ((db (dl-program + "parent(a, b). + ancestor(X, Y) :- parent(X, Y)."))) + (do (dl-saturate! db) (dl-saturated? db))) + true) + + (dl-et-test! "saturated? before saturation" + (let ((db (dl-program + "parent(a, b). + ancestor(X, Y) :- parent(X, Y)."))) + (dl-saturated? db)) + false) + + ;; Disjunction via multiple rules — Datalog has no `;` in + ;; body, so disjunction is expressed as separate rules with + ;; the same head. Here plant_based(X) is satisfied by either + ;; vegan(X) or vegetarian(X). + (dl-et-test-set! "disjunction via multiple rules" + (dl-query + (dl-program + "vegan(alice). vegetarian(bob). meat_eater(carol). + plant_based(X) :- vegan(X). + plant_based(X) :- vegetarian(X).") + (list (quote plant_based) (quote X))) + (list {:X (quote alice)} {:X (quote bob)})) + + ;; Bipartite-style join: pair-of-friends who share a hobby. + ;; Three-relation join exercising the planner's join order. + (dl-et-test-set! "bipartite friends-with-hobby" + (dl-query + (dl-program + "hobby(alice, climb). hobby(bob, paint). + hobby(carol, climb). + friend(alice, carol). friend(bob, alice). + match(A, B, H) :- friend(A, B), hobby(A, H), hobby(B, H).") + (list (quote match) (quote A) (quote B) (quote H))) + (list {:A (quote alice) :B (quote carol) :H (quote climb)})) + + ;; Repeated variable (diagonal): p(X, X) only matches tuples + ;; whose two args are equal. The unifier handles this via the + ;; subst chain — first occurrence binds X, second occurrence + ;; checks against the binding. + (dl-et-test-set! "diagonal query" + (dl-query + (dl-program "p(1, 1). p(2, 3). p(4, 4). p(5, 5).") + (list (quote p) (quote X) (quote X))) + (list {:X 1} {:X 4} {:X 5})) + + ;; A relation can be both EDB-seeded and rule-derived; + ;; saturate combines facts + derivations. + (dl-et-test-set! "mixed EDB + IDB same relation" + (dl-query + (dl-program + "link(a, b). link(c, d). link(e, c). + via(a, e). + link(X, Y) :- via(X, M), link(M, Y).") + (list (quote link) (quote a) (quote X))) + (list {:X (quote b)} {:X (quote c)})) + + (dl-et-test! "saturated? after assert" + (let ((db (dl-program + "parent(a, b). + ancestor(X, Y) :- parent(X, Y)."))) + (do + (dl-saturate! db) + (dl-add-fact! db (list (quote parent) (quote b) (quote c))) + (dl-saturated? db))) + false) + + (dl-et-test-set! "magic-set still derives correctly" + (let + ((db (dl-program + "parent(a, b). parent(b, c). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (do + (dl-set-strategy! db :magic) + (dl-query db (list (quote ancestor) (quote a) (quote X))))) + (list {:X (quote b)} {:X (quote c)}))))) + +(define + dl-eval-tests-run! + (fn + () + (do + (set! dl-et-pass 0) + (set! dl-et-fail 0) + (set! dl-et-failures (list)) + (dl-et-run-all!) + {:failures dl-et-failures :total (+ dl-et-pass dl-et-fail) :passed dl-et-pass :failed dl-et-fail}))) diff --git a/lib/datalog/tests/magic.sx b/lib/datalog/tests/magic.sx new file mode 100644 index 00000000..b6a895ca --- /dev/null +++ b/lib/datalog/tests/magic.sx @@ -0,0 +1,528 @@ +;; lib/datalog/tests/magic.sx — adornment + SIPS analysis tests. + +(define dl-mt-pass 0) +(define dl-mt-fail 0) +(define dl-mt-failures (list)) + +(define + dl-mt-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-mt-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-mt-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-mt-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-mt-deep=? (nth a i) (nth b i))) false) + (else (dl-mt-deq-l? a b (+ i 1)))))) + +(define + dl-mt-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) + (not (dl-mt-deep=? (get a k) (get b k)))) + false) + (else (dl-mt-deq-d? a b ka (+ i 1)))))) + +(define + dl-mt-test! + (fn + (name got expected) + (if + (dl-mt-deep=? got expected) + (set! dl-mt-pass (+ dl-mt-pass 1)) + (do + (set! dl-mt-fail (+ dl-mt-fail 1)) + (append! + dl-mt-failures + (str + name + "\n expected: " expected + "\n got: " got)))))) + +(define + dl-mt-run-all! + (fn + () + (do + ;; Goal adornment. + (dl-mt-test! "adorn 0-ary" + (dl-adorn-goal (list (quote ready))) + "") + (dl-mt-test! "adorn all bound" + (dl-adorn-goal (list (quote p) 1 2 3)) + "bbb") + (dl-mt-test! "adorn all free" + (dl-adorn-goal (list (quote p) (quote X) (quote Y))) + "ff") + (dl-mt-test! "adorn mixed" + (dl-adorn-goal (list (quote ancestor) (quote tom) (quote X))) + "bf") + (dl-mt-test! "adorn const var const" + (dl-adorn-goal (list (quote p) (quote a) (quote X) (quote b))) + "bfb") + + ;; dl-adorn-lit with explicit bound set. + (dl-mt-test! "adorn lit with bound" + (dl-adorn-lit (list (quote p) (quote X) (quote Y)) (list "X")) + "bf") + + ;; Rule SIPS — chain ancestor. + (dl-mt-test! "sips chain ancestor bf" + (dl-rule-sips + {:head (list (quote ancestor) (quote X) (quote Z)) + :body (list (list (quote parent) (quote X) (quote Y)) + (list (quote ancestor) (quote Y) (quote Z)))} + "bf") + (list + {:lit (list (quote parent) (quote X) (quote Y)) :adornment "bf"} + {:lit (list (quote ancestor) (quote Y) (quote Z)) :adornment "bf"})) + + ;; SIPS — head fully bound. + (dl-mt-test! "sips head bb" + (dl-rule-sips + {:head (list (quote q) (quote X) (quote Y)) + :body (list (list (quote p) (quote X) (quote Z)) + (list (quote r) (quote Z) (quote Y)))} + "bb") + (list + {:lit (list (quote p) (quote X) (quote Z)) :adornment "bf"} + {:lit (list (quote r) (quote Z) (quote Y)) :adornment "bb"})) + + ;; SIPS — comparison; vars must be bound by prior body lit. + (dl-mt-test! "sips with comparison" + (dl-rule-sips + {:head (list (quote q) (quote X)) + :body (list (list (quote p) (quote X)) + (list (string->symbol "<") (quote X) 5))} + "f") + (list + {:lit (list (quote p) (quote X)) :adornment "f"} + {:lit (list (string->symbol "<") (quote X) 5) :adornment "bb"})) + + ;; SIPS — `is` binds its left arg. + (dl-mt-test! "sips with is" + (dl-rule-sips + {:head (list (quote q) (quote X) (quote Y)) + :body (list (list (quote p) (quote X)) + (list (quote is) (quote Y) (list (string->symbol "+") (quote X) 1)))} + "ff") + (list + {:lit (list (quote p) (quote X)) :adornment "f"} + {:lit (list (quote is) (quote Y) + (list (string->symbol "+") (quote X) 1)) + :adornment "fb"})) + + ;; Magic predicate naming. + (dl-mt-test! "magic-rel-name" + (dl-magic-rel-name "ancestor" "bf") + "magic_ancestor^bf") + + ;; Bound-args extraction. + (dl-mt-test! "bound-args bf" + (dl-bound-args (list (quote ancestor) (quote tom) (quote X)) "bf") + (list (quote tom))) + + (dl-mt-test! "bound-args mixed" + (dl-bound-args (list (quote p) 1 (quote Y) 3) "bfb") + (list 1 3)) + + (dl-mt-test! "bound-args all-free" + (dl-bound-args (list (quote p) (quote X) (quote Y)) "ff") + (list)) + + ;; Magic literal construction. + (dl-mt-test! "magic-lit" + (dl-magic-lit "ancestor" "bf" (list (quote tom))) + (list (string->symbol "magic_ancestor^bf") (quote tom))) + + ;; Magic-sets rewriter: structural sanity. + (dl-mt-test! "rewrite ancestor produces seed" + (let + ((rules + (list + {:head (list (quote ancestor) (quote X) (quote Y)) + :body (list (list (quote parent) (quote X) (quote Y)))} + {:head (list (quote ancestor) (quote X) (quote Z)) + :body + (list (list (quote parent) (quote X) (quote Y)) + (list (quote ancestor) (quote Y) (quote Z)))}))) + (get + (dl-magic-rewrite rules "ancestor" "bf" (list (quote a))) + :seed)) + (list (string->symbol "magic_ancestor^bf") (quote a))) + + ;; Equivalence: rewritten program derives same ancestor tuples. + ;; In a chain a→b→c→d, magic-rewritten run still derives all + ;; ancestor pairs reachable from any node a/b/c/d propagated via + ;; magic_ancestor^bf — i.e. the full closure (6 tuples). Magic + ;; saves work only when the EDB has irrelevant nodes outside + ;; the seed's transitive cone. + (dl-mt-test! "magic-rewritten ancestor count" + (let + ((rules + (list + {:head (list (quote ancestor) (quote X) (quote Y)) + :body (list (list (quote parent) (quote X) (quote Y)))} + {:head (list (quote ancestor) (quote X) (quote Z)) + :body + (list (list (quote parent) (quote X) (quote Y)) + (list (quote ancestor) (quote Y) (quote Z)))})) + (edb (list + (list (quote parent) (quote a) (quote b)) + (list (quote parent) (quote b) (quote c)) + (list (quote parent) (quote c) (quote d))))) + (let + ((rewritten (dl-magic-rewrite rules "ancestor" "bf" (list (quote a)))) + (db (dl-make-db))) + (do + (for-each (fn (f) (dl-add-fact! db f)) edb) + (dl-add-fact! db (get rewritten :seed)) + (for-each (fn (r) (dl-add-rule! db r)) (get rewritten :rules)) + (dl-saturate! db) + (len (dl-relation db "ancestor"))))) + 6) + + ;; dl-magic-query: end-to-end driver, doesn't mutate caller's db. + ;; Magic over a rule with negated body literal — propagation + ;; rules generated only for positive lits; negated lits pass + ;; through unchanged. + (dl-mt-test! "magic over rule with negation" + (let + ((db (dl-program + "u(a). u(b). u(c). banned(b). + active(X) :- u(X), not(banned(X))."))) + (let + ((semi (dl-query db (list (quote active) (quote X)))) + (magic (dl-magic-query db (list (quote active) (quote X))))) + (= (len semi) (len magic)))) + true) + + ;; All-bound query (existence check) generates an "bb" + ;; adornment chain. Verifies the rewriter walks multiple + ;; (rel, adn) pairs through the worklist. + (dl-mt-test! "magic existence check via bb" + (let + ((db (dl-program + "parent(a, b). parent(b, c). parent(c, d). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (let + ((found (dl-magic-query + db (list (quote ancestor) (quote a) (quote c)))) + (missing (dl-magic-query + db (list (quote ancestor) (quote a) (quote z))))) + (and (= (len found) 1) (= (len missing) 0)))) + true) + + ;; Magic equivalence on the federation demo. + (dl-mt-test! "magic ≡ semi on foaf demo" + (let + ((db (dl-program-data + (quote ((follows alice bob) + (follows bob carol) + (follows alice dave))) + dl-demo-federation-rules))) + (let + ((semi (dl-query db (quote (foaf alice X)))) + (magic (dl-magic-query db (quote (foaf alice X))))) + (= (len semi) (len magic)))) + true) + + ;; Shape validation: dl-magic-query rejects non-list / non- + ;; dict goal shapes cleanly rather than crashing in `rest`. + (dl-mt-test! "magic rejects string goal" + (let ((threw false)) + (do + (guard (e (#t (set! threw true))) + (dl-magic-query (dl-make-db) "foo")) + threw)) + true) + + (dl-mt-test! "magic rejects bare dict goal" + (let ((threw false)) + (do + (guard (e (#t (set! threw true))) + (dl-magic-query (dl-make-db) {:foo "bar"})) + threw)) + true) + + ;; 3-stratum program under magic — distinct rule heads at + ;; strata 0/1/2 must all rewrite via the worklist. + (dl-mt-test! "magic 3-stratum program" + (let + ((db (dl-program + "a(1). a(2). a(3). b(2). + c(X) :- a(X), not(b(X)). + d(X) :- c(X), not(banned(X)). + banned(3)."))) + (let + ((semi (dl-query db (list (quote d) (quote X)))) + (magic (dl-magic-query db (list (quote d) (quote X))))) + (= (len semi) (len magic)))) + true) + + ;; Aggregate -> derived -> threshold chain via magic. + (dl-mt-test! "magic aggregate-derived chain" + (let + ((db (dl-program + "src(1). src(2). src(3). + cnt(N) :- count(N, X, src(X)). + active(N) :- cnt(N), >=(N, 2)."))) + (let + ((semi (dl-query db (list (quote active) (quote N)))) + (magic (dl-magic-query db (list (quote active) (quote N))))) + (= (len semi) (len magic)))) + true) + + ;; Multi-relation rewrite chain: query r4 → propagate to r3, + ;; r2, r1, a. The worklist must process all of them; an + ;; earlier bug stopped after only the head pair. + (dl-mt-test! "magic chain through 4 rule levels" + (let + ((db (dl-program + "a(1). a(2). r1(X) :- a(X). r2(X) :- r1(X). + r3(X) :- r2(X). r4(X) :- r3(X)."))) + (= 2 (len (dl-magic-query db (list (quote r4) (quote X)))))) + true) + + ;; Shortest-path demo via magic — exercises the rewriter + ;; against rules that mix recursive positive lits with an + ;; aggregate body literal. + (dl-mt-test! "magic on shortest-path demo" + (let + ((db (dl-program-data + (quote ((edge a b 5) (edge b c 3) (edge a c 10))) + dl-demo-shortest-path-rules))) + (let + ((semi (dl-query db (quote (shortest a c W)))) + (magic (dl-magic-query db (quote (shortest a c W))))) + (and (= (len semi) (len magic)) + (= (len semi) 1)))) + true) + + ;; Same relation called with different adornment patterns + ;; in different rules. The worklist must enqueue and process + ;; each (rel, adornment) pair. + (dl-mt-test! "magic with multi-adornment same relation" + (let + ((db (dl-program + "parent(p1, alice). parent(p2, bob). + parent(g, p1). parent(g, p2). + sibling(P1, P2) :- parent(G, P1), parent(G, P2), + !=(P1, P2). + cousin(X, Y) :- parent(P1, X), parent(P2, Y), + sibling(P1, P2)."))) + (let + ((semi (dl-query db (list (quote cousin) (quote alice) (quote Y)))) + (magic (dl-magic-query db (list (quote cousin) (quote alice) (quote Y))))) + (= (len semi) (len magic)))) + true) + + ;; Magic over a rule whose body contains an aggregate. + ;; The rewriter passes aggregate body lits through unchanged + ;; (no propagation generated for them), so semi-naive's count + ;; logic still fires correctly under the rewritten program. + (dl-mt-test! "magic over rule with aggregate body" + (let + ((db (dl-program + "post(p1). post(p2). post(p3). + liked(u1, p1). liked(u2, p1). liked(u3, p1). + liked(u1, p2). + rich(P) :- post(P), count(N, U, liked(U, P)), + >=(N, 2)."))) + (let + ((semi (dl-query db (list (quote rich) (quote P)))) + (magic (dl-magic-query db (list (quote rich) (quote P))))) + (= (len semi) (len magic)))) + true) + + ;; Mixed EDB + IDB: a relation can be both EDB-seeded and + ;; rule-derived. dl-magic-query must include the EDB portion + ;; even though the relation has rules. + (dl-mt-test! "magic mixed EDB+IDB" + (len + (dl-magic-query + (dl-program + "link(a, b). link(c, d). link(e, c). + via(a, e). + link(X, Y) :- via(X, M), link(M, Y).") + (list (quote link) (quote a) (quote X)))) + 2) + + ;; dl-magic-query falls back to dl-query for built-in, + ;; aggregate, and negation goals (the magic seed would + ;; otherwise be non-ground). + (dl-mt-test! "magic-query falls back on aggregate" + (let + ((r (dl-magic-query + (dl-program "p(1). p(2). p(3).") + (list (quote count) (quote N) (quote X) + (list (quote p) (quote X)))))) + (and (= (len r) 1) (= (get (first r) "N") 3))) + true) + + (dl-mt-test! "magic-query equivalent to dl-query" + (let + ((db (dl-program + "parent(a, b). parent(b, c). parent(c, d). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (let + ((semi (dl-query db (list (quote ancestor) (quote a) (quote X)))) + (magic (dl-magic-query + db (list (quote ancestor) (quote a) (quote X))))) + (= (len semi) (len magic)))) + true) + + ;; The magic rewriter passes aggregate body lits through + ;; unchanged, so an aggregate over an IDB relation would see an + ;; empty inner-goal in the magic db unless the IDB is already + ;; materialised. dl-magic-query now pre-saturates the source db + ;; to guarantee equivalence with dl-query for every stratified + ;; program. Previously this returned `({:N 0})` because `active` + ;; (IDB, derived through negation) was never derived in the + ;; magic db. + (dl-mt-test! "magic over aggregate-of-IDB matches vanilla" + (let + ((src + "u(a). u(b). u(c). u(d). banned(b). banned(d). + active(X) :- u(X), not(banned(X)). + n(N) :- count(N, X, active(X)).")) + (let + ((vanilla (dl-eval src "?- n(N).")) + (magic (dl-eval-magic src "?- n(N)."))) + (and (= (len vanilla) 1) + (= (len magic) 1) + (= (get (first vanilla) "N") + (get (first magic) "N"))))) + true) + + ;; magic-query doesn't mutate caller db. + (dl-mt-test! "magic-query preserves caller db" + (let + ((db (dl-program + "parent(a, b). parent(b, c). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (let + ((rules-before (len (dl-rules db)))) + (do + (dl-magic-query db (list (quote ancestor) (quote a) (quote X))) + (= rules-before (len (dl-rules db)))))) + true) + + ;; Magic-sets benefit: query touches only one cluster of a + ;; multi-component graph. Semi-naive derives the full closure + ;; over both clusters; magic only the seeded one. + ;; Magic-vs-semi work shape: chain of 12. Semi-naive + ;; derives the full closure (78 = 12·13/2). A magic query + ;; rooted at node 0 returns the 12 descendants only — + ;; demonstrating that magic limits derivation to the + ;; query's transitive cone. + (dl-mt-test! "magic vs semi work-shape on chain-12" + (let + ((source (str + "parent(0, 1). parent(1, 2). parent(2, 3). " + "parent(3, 4). parent(4, 5). parent(5, 6). " + "parent(6, 7). parent(7, 8). parent(8, 9). " + "parent(9, 10). parent(10, 11). parent(11, 12). " + "ancestor(X, Y) :- parent(X, Y). " + "ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (let + ((db1 (dl-make-db)) (db2 (dl-make-db))) + (do + (dl-load-program! db1 source) + (dl-saturate! db1) + (dl-load-program! db2 source) + (let + ((semi-count (len (dl-relation db1 "ancestor"))) + (magic-count + (len (dl-magic-query + db2 (list (quote ancestor) 0 (quote X)))))) + ;; Magic returns only descendants of 0 (12 of them). + (and (= semi-count 78) (= magic-count 12)))))) + true) + + ;; Magic + arithmetic: rules with `is` clauses pass through + ;; the rewriter unchanged (built-ins aren't propagated). + (dl-mt-test! "magic preserves arithmetic" + (let + ((source "n(1). n(2). n(3). + doubled(X, Y) :- n(X), is(Y, *(X, 2)).")) + (let + ((semi (dl-eval source "?- doubled(2, Y).")) + (magic (dl-eval-magic source "?- doubled(2, Y)."))) + (= (len semi) (len magic)))) + true) + + (dl-mt-test! "magic skips irrelevant clusters" + (let + ;; Two disjoint chains. Query is rooted in cluster 1. + ((db (dl-program + "parent(a, b). parent(b, c). + parent(x, y). parent(y, z). + ancestor(X, Y) :- parent(X, Y). + ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (do + (dl-saturate! db) + (let + ((semi-count (len (dl-relation db "ancestor"))) + (magic-results + (dl-magic-query + db (list (quote ancestor) (quote a) (quote X))))) + ;; Semi-naive derives 6 (3 in each cluster). Magic + ;; gives 3 query results (a's reachable: b, c). + (and (= semi-count 6) (= (len magic-results) 2))))) + true) + + (dl-mt-test! "magic-rewritten finds same answers" + (let + ((rules + (list + {:head (list (quote ancestor) (quote X) (quote Y)) + :body (list (list (quote parent) (quote X) (quote Y)))} + {:head (list (quote ancestor) (quote X) (quote Z)) + :body + (list (list (quote parent) (quote X) (quote Y)) + (list (quote ancestor) (quote Y) (quote Z)))})) + (edb (list + (list (quote parent) (quote a) (quote b)) + (list (quote parent) (quote b) (quote c))))) + (let + ((rewritten (dl-magic-rewrite rules "ancestor" "bf" (list (quote a)))) + (db (dl-make-db))) + (do + (for-each (fn (f) (dl-add-fact! db f)) edb) + (dl-add-fact! db (get rewritten :seed)) + (for-each (fn (r) (dl-add-rule! db r)) (get rewritten :rules)) + (dl-saturate! db) + (len (dl-query db (list (quote ancestor) (quote a) (quote X))))))) + 2)))) + +(define + dl-magic-tests-run! + (fn + () + (do + (set! dl-mt-pass 0) + (set! dl-mt-fail 0) + (set! dl-mt-failures (list)) + (dl-mt-run-all!) + {:passed dl-mt-pass + :failed dl-mt-fail + :total (+ dl-mt-pass dl-mt-fail) + :failures dl-mt-failures}))) diff --git a/lib/datalog/tests/negation.sx b/lib/datalog/tests/negation.sx new file mode 100644 index 00000000..9a5aae3d --- /dev/null +++ b/lib/datalog/tests/negation.sx @@ -0,0 +1,252 @@ +;; lib/datalog/tests/negation.sx — stratified negation tests. + +(define dl-nt-pass 0) +(define dl-nt-fail 0) +(define dl-nt-failures (list)) + +(define + dl-nt-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-nt-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-nt-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-nt-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-nt-deep=? (nth a i) (nth b i))) false) + (else (dl-nt-deq-l? a b (+ i 1)))))) + +(define + dl-nt-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) + (not (dl-nt-deep=? (get a k) (get b k)))) + false) + (else (dl-nt-deq-d? a b ka (+ i 1)))))) + +(define + dl-nt-set=? + (fn + (a b) + (and + (= (len a) (len b)) + (dl-nt-subset? a b) + (dl-nt-subset? b a)))) + +(define + dl-nt-subset? + (fn + (xs ys) + (cond + ((= (len xs) 0) true) + ((not (dl-nt-contains? ys (first xs))) false) + (else (dl-nt-subset? (rest xs) ys))))) + +(define + dl-nt-contains? + (fn + (xs target) + (cond + ((= (len xs) 0) false) + ((dl-nt-deep=? (first xs) target) true) + (else (dl-nt-contains? (rest xs) target))))) + +(define + dl-nt-test! + (fn + (name got expected) + (if + (dl-nt-deep=? got expected) + (set! dl-nt-pass (+ dl-nt-pass 1)) + (do + (set! dl-nt-fail (+ dl-nt-fail 1)) + (append! + dl-nt-failures + (str + name + "\n expected: " expected + "\n got: " got)))))) + +(define + dl-nt-test-set! + (fn + (name got expected) + (if + (dl-nt-set=? got expected) + (set! dl-nt-pass (+ dl-nt-pass 1)) + (do + (set! dl-nt-fail (+ dl-nt-fail 1)) + (append! + dl-nt-failures + (str + name + "\n expected (set): " expected + "\n got: " got)))))) + +(define + dl-nt-throws? + (fn + (thunk) + (let + ((threw false)) + (do + (guard + (e (#t (set! threw true))) + (thunk)) + threw)))) + +(define + dl-nt-run-all! + (fn + () + (do + ;; Negation against EDB-only relation. + (dl-nt-test-set! "not against EDB" + (dl-query + (dl-program + "p(1). p(2). p(3). r(2). + q(X) :- p(X), not(r(X)).") + (list (quote q) (quote X))) + (list {:X 1} {:X 3})) + + ;; Negation against derived relation — needs stratification. + (dl-nt-test-set! "not against derived rel" + (dl-query + (dl-program + "p(1). p(2). p(3). s(2). + r(X) :- s(X). + q(X) :- p(X), not(r(X)).") + (list (quote q) (quote X))) + (list {:X 1} {:X 3})) + + ;; Two-step strata: r derives via s; q derives via not r. + (dl-nt-test-set! "two-step strata" + (dl-query + (dl-program + "node(a). node(b). node(c). node(d). + edge(a, b). edge(b, c). edge(c, a). + reach(X, Y) :- edge(X, Y). + reach(X, Z) :- edge(X, Y), reach(Y, Z). + unreachable(X) :- node(X), not(reach(a, X)).") + (list (quote unreachable) (quote X))) + (list {:X (quote d)})) + + ;; Combine negation with arithmetic and comparison. + (dl-nt-test-set! "negation with arithmetic" + (dl-query + (dl-program + "n(1). n(2). n(3). n(4). n(5). odd(1). odd(3). odd(5). + even(X) :- n(X), not(odd(X)).") + (list (quote even) (quote X))) + (list {:X 2} {:X 4})) + + ;; Empty negation result. + (dl-nt-test-set! "negation always succeeds" + (dl-query + (dl-program + "p(1). p(2). q(X) :- p(X), not(r(X)).") + (list (quote q) (quote X))) + (list {:X 1} {:X 2})) + + ;; Negation always fails. + (dl-nt-test-set! "negation always fails" + (dl-query + (dl-program + "p(1). p(2). r(1). r(2). q(X) :- p(X), not(r(X)).") + (list (quote q) (quote X))) + (list)) + + ;; Anonymous `_` in a negated literal is existentially quantified + ;; — it doesn't need to be bound by an earlier body lit. Without + ;; this exemption the safety check would reject the common idiom + ;; `orphan(X) :- person(X), not(parent(X, _))`. + (dl-nt-test-set! "negation with anonymous var — orphan idiom" + (dl-query + (dl-program + "person(a). person(b). person(c). parent(a, b). + orphan(X) :- person(X), not(parent(X, _)).") + (list (quote orphan) (quote X))) + (list {:X (quote b)} {:X (quote c)})) + + ;; Multiple anonymous vars are each independently existential. + (dl-nt-test-set! "negation with multiple anonymous vars" + (dl-query + (dl-program + "u(a). u(b). u(c). edge(a, x). edge(b, y). + solo(X) :- u(X), not(edge(X, _)).") + (list (quote solo) (quote X))) + (list {:X (quote c)})) + + ;; Stratifiability checks. + (dl-nt-test! "non-stratifiable rejected" + (dl-nt-throws? + (fn () + (let ((db (dl-make-db))) + (do + (dl-add-rule! + db + {:head (list (quote p) (quote X)) + :body (list (list (quote q) (quote X)) + {:neg (list (quote r) (quote X))})}) + (dl-add-rule! + db + {:head (list (quote r) (quote X)) + :body (list (list (quote p) (quote X)))}) + (dl-add-fact! db (list (quote q) 1)) + (dl-saturate! db))))) + true) + + (dl-nt-test! "stratifiable accepted" + (dl-nt-throws? + (fn () + (dl-program + "p(1). p(2). r(2). + q(X) :- p(X), not(r(X))."))) + false) + + ;; Multi-stratum chain. + (dl-nt-test-set! "three-level strata" + (dl-query + (dl-program + "a(1). a(2). a(3). a(4). + b(X) :- a(X), not(c(X)). + c(X) :- d(X). + d(2). + d(4).") + (list (quote b) (quote X))) + (list {:X 1} {:X 3})) + + ;; Safety violation: negation refers to unbound var. + (dl-nt-test! "negation safety violation" + (dl-nt-throws? + (fn () + (dl-program + "p(1). q(X) :- p(X), not(r(Y))."))) + true)))) + +(define + dl-negation-tests-run! + (fn + () + (do + (set! dl-nt-pass 0) + (set! dl-nt-fail 0) + (set! dl-nt-failures (list)) + (dl-nt-run-all!) + {:passed dl-nt-pass + :failed dl-nt-fail + :total (+ dl-nt-pass dl-nt-fail) + :failures dl-nt-failures}))) diff --git a/lib/datalog/tests/parse.sx b/lib/datalog/tests/parse.sx new file mode 100644 index 00000000..6fc81b27 --- /dev/null +++ b/lib/datalog/tests/parse.sx @@ -0,0 +1,179 @@ +;; lib/datalog/tests/parse.sx — parser unit tests +;; +;; Run via: bash lib/datalog/conformance.sh +;; Or: (load "lib/datalog/tokenizer.sx") (load "lib/datalog/parser.sx") +;; (load "lib/datalog/tests/parse.sx") (dl-parse-tests-run!) + +(define dl-pt-pass 0) +(define dl-pt-fail 0) +(define dl-pt-failures (list)) + +;; Order-independent structural equality. Lists compared positionally, +;; dicts as sets of (key, value) pairs. Numbers via = (so 30.0 = 30). +(define + dl-deep-equal? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-deep-equal-list? a b 0))) + ((and (dict? a) (dict? b)) + (let + ((ka (keys a)) (kb (keys b))) + (and + (= (len ka) (len kb)) + (dl-deep-equal-dict? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-deep-equal-list? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-deep-equal? (nth a i) (nth b i))) false) + (else (dl-deep-equal-list? a b (+ i 1)))))) + +(define + dl-deep-equal-dict? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) (not (dl-deep-equal? (get a k) (get b k)))) + false) + (else (dl-deep-equal-dict? a b ka (+ i 1)))))) + +(define + dl-pt-test! + (fn + (name got expected) + (if + (dl-deep-equal? got expected) + (set! dl-pt-pass (+ dl-pt-pass 1)) + (do + (set! dl-pt-fail (+ dl-pt-fail 1)) + (append! + dl-pt-failures + (str name "\n expected: " expected "\n got: " got)))))) + +(define + dl-pt-throws? + (fn + (thunk) + (let + ((threw false)) + (do (guard (e (#t (set! threw true))) (thunk)) threw)))) + +(define + dl-pt-run-all! + (fn + () + (do + (dl-pt-test! "empty program" (dl-parse "") (list)) + (dl-pt-test! "fact" (dl-parse "parent(tom, bob).") (list {:body (list) :head (list (quote parent) (quote tom) (quote bob))})) + (dl-pt-test! + "two facts" + (dl-parse "parent(tom, bob). parent(bob, ann).") + (list {:body (list) :head (list (quote parent) (quote tom) (quote bob))} {:body (list) :head (list (quote parent) (quote bob) (quote ann))})) + (dl-pt-test! "zero-ary fact" (dl-parse "ready.") (list {:body (list) :head (list (quote ready))})) + (dl-pt-test! + "rule one body lit" + (dl-parse "ancestor(X, Y) :- parent(X, Y).") + (list {:body (list (list (quote parent) (quote X) (quote Y))) :head (list (quote ancestor) (quote X) (quote Y))})) + (dl-pt-test! + "recursive rule" + (dl-parse "ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).") + (list {:body (list (list (quote parent) (quote X) (quote Y)) (list (quote ancestor) (quote Y) (quote Z))) :head (list (quote ancestor) (quote X) (quote Z))})) + (dl-pt-test! + "query single" + (dl-parse "?- ancestor(tom, X).") + (list {:query (list (list (quote ancestor) (quote tom) (quote X)))})) + (dl-pt-test! + "query multi" + (dl-parse "?- p(X), q(X).") + (list {:query (list (list (quote p) (quote X)) (list (quote q) (quote X)))})) + (dl-pt-test! + "negation" + (dl-parse "safe(X) :- person(X), not(parent(X, _)).") + (list {:body (list (list (quote person) (quote X)) {:neg (list (quote parent) (quote X) (quote _))}) :head (list (quote safe) (quote X))})) + (dl-pt-test! + "number arg" + (dl-parse "age(alice, 30).") + (list {:body (list) :head (list (quote age) (quote alice) 30)})) + (dl-pt-test! + "string arg" + (dl-parse "label(x, \"hi\").") + (list {:body (list) :head (list (quote label) (quote x) "hi")})) + ;; Quoted 'atoms' parse as strings — a uppercase-starting name + ;; in quotes used to misclassify as a variable and reject the + ;; fact as non-ground. + (dl-pt-test! + "quoted atom arg parses as string" + (dl-parse "p('Hello World').") + (list {:body (list) :head (list (quote p) "Hello World")})) + (dl-pt-test! + "comparison literal" + (dl-parse "p(X) :- <(X, 5).") + (list {:body (list (list (string->symbol "<") (quote X) 5)) :head (list (quote p) (quote X))})) + (dl-pt-test! + "is with arith" + (dl-parse "succ(X, Y) :- nat(X), is(Y, +(X, 1)).") + (list {:body (list (list (quote nat) (quote X)) (list (quote is) (quote Y) (list (string->symbol "+") (quote X) 1))) :head (list (quote succ) (quote X) (quote Y))})) + (dl-pt-test! + "mixed program" + (dl-parse "p(a). p(b). q(X) :- p(X). ?- q(Y).") + (list {:body (list) :head (list (quote p) (quote a))} {:body (list) :head (list (quote p) (quote b))} {:body (list (list (quote p) (quote X))) :head (list (quote q) (quote X))} {:query (list (list (quote q) (quote Y)))})) + (dl-pt-test! + "comments skipped" + (dl-parse "% comment\nfoo(a).\n/* block */ bar(b).") + (list {:body (list) :head (list (quote foo) (quote a))} {:body (list) :head (list (quote bar) (quote b))})) + (dl-pt-test! + "underscore var" + (dl-parse "p(X) :- q(X, _).") + (list {:body (list (list (quote q) (quote X) (quote _))) :head (list (quote p) (quote X))})) + ;; Negative number literals parse as one negative number, + ;; while subtraction (`-(X, Y)`) compound is preserved. + (dl-pt-test! + "negative integer literal" + (dl-parse "n(-3).") + (list {:head (list (quote n) -3) :body (list)})) + + (dl-pt-test! + "subtraction compound preserved" + (dl-parse "r(X) :- is(X, -(10, 3)).") + (list + {:head (list (quote r) (quote X)) + :body (list (list (quote is) (quote X) + (list (string->symbol "-") 10 3)))})) + + (dl-pt-test! + "number as relation name raises" + (dl-pt-throws? (fn () (dl-parse "1(X) :- p(X)."))) + true) + + (dl-pt-test! + "var as relation name raises" + (dl-pt-throws? (fn () (dl-parse "P(X)."))) + true) + + (dl-pt-test! + "missing dot raises" + (dl-pt-throws? (fn () (dl-parse "p(a)"))) + true) + (dl-pt-test! + "trailing comma raises" + (dl-pt-throws? (fn () (dl-parse "p(a,)."))) + true)))) + +(define + dl-parse-tests-run! + (fn + () + (do + (set! dl-pt-pass 0) + (set! dl-pt-fail 0) + (set! dl-pt-failures (list)) + (dl-pt-run-all!) + {:failures dl-pt-failures :total (+ dl-pt-pass dl-pt-fail) :passed dl-pt-pass :failed dl-pt-fail}))) diff --git a/lib/datalog/tests/semi_naive.sx b/lib/datalog/tests/semi_naive.sx new file mode 100644 index 00000000..6df2a9e5 --- /dev/null +++ b/lib/datalog/tests/semi_naive.sx @@ -0,0 +1,153 @@ +;; lib/datalog/tests/semi_naive.sx — semi-naive correctness vs naive. +;; +;; Strategy: differential — run both saturators on each program and +;; compare the resulting per-relation tuple counts. Counting (not +;; element-wise set equality) keeps the suite fast under the bundled +;; conformance session; correctness on the inhabitants is covered by +;; eval.sx and builtins.sx (which use dl-saturate! by default — the +;; semi-naive saturator). + +(define dl-sn-pass 0) +(define dl-sn-fail 0) +(define dl-sn-failures (list)) + +(define + dl-sn-test! + (fn + (name got expected) + (if + (equal? got expected) + (set! dl-sn-pass (+ dl-sn-pass 1)) + (do + (set! dl-sn-fail (+ dl-sn-fail 1)) + (append! + dl-sn-failures + (str name "\n expected: " expected "\n got: " got)))))) + +;; Load `source` into both a semi-naive and a naive db and return a +;; list of (rel-name semi-count naive-count) triples. Both sets must +;; have the same union of relation names. +(define + dl-sn-counts + (fn + (source) + (let + ((db-s (dl-program source)) (db-n (dl-program source))) + (do + (dl-saturate! db-s) + (dl-saturate-naive! db-n) + (let + ((out (list))) + (do + (for-each + (fn + (k) + (append! + out + (list + k + (len (dl-relation db-s k)) + (len (dl-relation db-n k))))) + (keys (get db-s :facts))) + out)))))) + +(define + dl-sn-counts-agree? + (fn + (counts) + (cond + ((= (len counts) 0) true) + (else + (let + ((row (first counts))) + (and + (= (nth row 1) (nth row 2)) + (dl-sn-counts-agree? (rest counts)))))))) + +(define + dl-sn-chain-source + (fn + (n) + (let + ((parts (list ""))) + (do + (define + dl-sn-loop + (fn + (i) + (when + (< i n) + (do + (append! parts (str "parent(" i ", " (+ i 1) "). ")) + (dl-sn-loop (+ i 1)))))) + (dl-sn-loop 0) + (str + (join "" parts) + "ancestor(X, Y) :- parent(X, Y). " + "ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))))) + +(define + dl-sn-run-all! + (fn + () + (do + (dl-sn-test! + "ancestor closure counts match" + (dl-sn-counts-agree? + (dl-sn-counts + "parent(a, b). parent(b, c). parent(c, d).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).")) + true) + (dl-sn-test! + "cyclic reach counts match" + (dl-sn-counts-agree? + (dl-sn-counts + "edge(1, 2). edge(2, 3). edge(3, 1). edge(3, 4).\n reach(X, Y) :- edge(X, Y).\n reach(X, Z) :- edge(X, Y), reach(Y, Z).")) + true) + (dl-sn-test! + "same-gen counts match" + (dl-sn-counts-agree? + (dl-sn-counts + "parent(a, b). parent(a, c). parent(b, d). parent(c, e).\n person(a). person(b). person(c). person(d). person(e).\n sg(X, X) :- person(X).\n sg(X, Y) :- parent(P1, X), sg(P1, P2), parent(P2, Y).")) + true) + (dl-sn-test! + "rules with builtins counts match" + (dl-sn-counts-agree? + (dl-sn-counts + "n(1). n(2). n(3). n(4). n(5).\n small(X) :- n(X), <(X, 5).\n succ(X, Y) :- n(X), <(X, 5), is(Y, +(X, 1)).")) + true) + (dl-sn-test! + "static rule fires under semi-naive" + (dl-sn-counts-agree? + (dl-sn-counts "p(a). p(b). q(X) :- p(X), =(X, a).")) + true) + ;; Chain length 12 — multiple semi-naive iterations against + ;; the recursive ancestor rule (differential vs naive). + (dl-sn-test! + "chain-12 ancestor counts match" + (dl-sn-counts-agree? (dl-sn-counts (dl-sn-chain-source 12))) + true) + ;; Chain length 25 — semi-naive only — first-arg index makes + ;; this tractable in conformance budget. + (dl-sn-test! + "chain-25 ancestor count value (semi only)" + (let + ((db (dl-program (dl-sn-chain-source 25)))) + (do (dl-saturate! db) (len (dl-relation db "ancestor")))) + 325) + (dl-sn-test! + "query through semi saturate" + (let + ((db (dl-program "parent(a, b). parent(b, c).\n ancestor(X, Y) :- parent(X, Y).\n ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z)."))) + (len (dl-query db (list (quote ancestor) (quote a) (quote X))))) + 2)))) + +(define + dl-semi-naive-tests-run! + (fn + () + (do + (set! dl-sn-pass 0) + (set! dl-sn-fail 0) + (set! dl-sn-failures (list)) + (dl-sn-run-all!) + {:failures dl-sn-failures :total (+ dl-sn-pass dl-sn-fail) :passed dl-sn-pass :failed dl-sn-fail}))) diff --git a/lib/datalog/tests/tokenize.sx b/lib/datalog/tests/tokenize.sx new file mode 100644 index 00000000..6c7b94a9 --- /dev/null +++ b/lib/datalog/tests/tokenize.sx @@ -0,0 +1,189 @@ +;; lib/datalog/tests/tokenize.sx — tokenizer unit tests +;; +;; Run via: bash lib/datalog/conformance.sh +;; Or: (load "lib/datalog/tokenizer.sx") (load "lib/datalog/tests/tokenize.sx") +;; (dl-tokenize-tests-run!) + +(define dl-tk-pass 0) +(define dl-tk-fail 0) +(define dl-tk-failures (list)) + +(define + dl-tk-test! + (fn + (name got expected) + (if + (= got expected) + (set! dl-tk-pass (+ dl-tk-pass 1)) + (do + (set! dl-tk-fail (+ dl-tk-fail 1)) + (append! + dl-tk-failures + (str name "\n expected: " expected "\n got: " got)))))) + +(define dl-tk-types (fn (toks) (map (fn (t) (get t :type)) toks))) +(define dl-tk-values (fn (toks) (map (fn (t) (get t :value)) toks))) + +(define + dl-tk-run-all! + (fn + () + (do + (dl-tk-test! "empty" (dl-tk-types (dl-tokenize "")) (list "eof")) + (dl-tk-test! + "atom dot" + (dl-tk-types (dl-tokenize "foo.")) + (list "atom" "punct" "eof")) + (dl-tk-test! + "atom dot value" + (dl-tk-values (dl-tokenize "foo.")) + (list "foo" "." nil)) + (dl-tk-test! + "var" + (dl-tk-types (dl-tokenize "X.")) + (list "var" "punct" "eof")) + (dl-tk-test! + "underscore var" + (dl-tk-types (dl-tokenize "_x.")) + (list "var" "punct" "eof")) + (dl-tk-test! + "integer" + (dl-tk-values (dl-tokenize "42")) + (list 42 nil)) + (dl-tk-test! + "decimal" + (dl-tk-values (dl-tokenize "3.14")) + (list 3.14 nil)) + (dl-tk-test! + "string" + (dl-tk-values (dl-tokenize "\"hello\"")) + (list "hello" nil)) + ;; Quoted 'atoms' tokenize as strings — see the type-table + ;; comment in lib/datalog/tokenizer.sx for the rationale. + (dl-tk-test! + "quoted atom as string" + (dl-tk-types (dl-tokenize "'two words'")) + (list "string" "eof")) + (dl-tk-test! + "quoted atom value" + (dl-tk-values (dl-tokenize "'two words'")) + (list "two words" nil)) + ;; A quoted atom whose name would otherwise be a variable + ;; (uppercase / leading underscore) is now safely a string — + ;; this was the bug that motivated the type change. + (dl-tk-test! + "quoted Uppercase as string" + (dl-tk-types (dl-tokenize "'Hello'")) + (list "string" "eof")) + (dl-tk-test! ":-" (dl-tk-values (dl-tokenize ":-")) (list ":-" nil)) + (dl-tk-test! "?-" (dl-tk-values (dl-tokenize "?-")) (list "?-" nil)) + (dl-tk-test! "<=" (dl-tk-values (dl-tokenize "<=")) (list "<=" nil)) + (dl-tk-test! ">=" (dl-tk-values (dl-tokenize ">=")) (list ">=" nil)) + (dl-tk-test! "!=" (dl-tk-values (dl-tokenize "!=")) (list "!=" nil)) + (dl-tk-test! + "single op values" + (dl-tk-values (dl-tokenize "< > = + - * /")) + (list "<" ">" "=" "+" "-" "*" "/" nil)) + (dl-tk-test! + "single op types" + (dl-tk-types (dl-tokenize "< > = + - * /")) + (list "op" "op" "op" "op" "op" "op" "op" "eof")) + (dl-tk-test! + "punct" + (dl-tk-values (dl-tokenize "( ) , .")) + (list "(" ")" "," "." nil)) + (dl-tk-test! + "fact tokens" + (dl-tk-types (dl-tokenize "parent(tom, bob).")) + (list "atom" "punct" "atom" "punct" "atom" "punct" "punct" "eof")) + (dl-tk-test! + "rule shape" + (dl-tk-types (dl-tokenize "p(X) :- q(X).")) + (list + "atom" + "punct" + "var" + "punct" + "op" + "atom" + "punct" + "var" + "punct" + "punct" + "eof")) + (dl-tk-test! + "comparison literal" + (dl-tk-values (dl-tokenize "<(X, 5)")) + (list "<" "(" "X" "," 5 ")" nil)) + (dl-tk-test! + "is form" + (dl-tk-values (dl-tokenize "is(Y, +(X, 1))")) + (list "is" "(" "Y" "," "+" "(" "X" "," 1 ")" ")" nil)) + (dl-tk-test! + "line comment" + (dl-tk-types (dl-tokenize "% comment line\nfoo.")) + (list "atom" "punct" "eof")) + (dl-tk-test! + "block comment" + (dl-tk-types (dl-tokenize "/* a\nb */ x.")) + (list "atom" "punct" "eof")) + ;; Unexpected characters surface at tokenize time rather + ;; than being silently consumed (previously `?(X)` parsed as + ;; if the leading `?` weren't there). + (dl-tk-test! + "unexpected char raises" + (let ((threw false)) + (do + (guard (e (#t (set! threw true))) + (dl-tokenize "?(X)")) + threw)) + true) + + ;; Unterminated string / quoted-atom must raise. + (dl-tk-test! + "unterminated string raises" + (let ((threw false)) + (do + (guard (e (#t (set! threw true))) + (dl-tokenize "\"unclosed")) + threw)) + true) + + (dl-tk-test! + "unterminated quoted atom raises" + (let ((threw false)) + (do + (guard (e (#t (set! threw true))) + (dl-tokenize "'unclosed")) + threw)) + true) + + ;; Unterminated block comment must raise — previously it was + ;; silently consumed to EOF. + (dl-tk-test! + "unterminated block comment raises" + (let ((threw false)) + (do + (guard (e (#t (set! threw true))) + (dl-tokenize "/* unclosed comment")) + threw)) + true) + (dl-tk-test! + "whitespace" + (dl-tk-types (dl-tokenize " foo ,\t bar .")) + (list "atom" "punct" "atom" "punct" "eof")) + (dl-tk-test! + "positions" + (map (fn (t) (get t :pos)) (dl-tokenize "foo bar")) + (list 0 4 7))))) + +(define + dl-tokenize-tests-run! + (fn + () + (do + (set! dl-tk-pass 0) + (set! dl-tk-fail 0) + (set! dl-tk-failures (list)) + (dl-tk-run-all!) + {:failures dl-tk-failures :total (+ dl-tk-pass dl-tk-fail) :passed dl-tk-pass :failed dl-tk-fail}))) diff --git a/lib/datalog/tests/unify.sx b/lib/datalog/tests/unify.sx new file mode 100644 index 00000000..3db114ed --- /dev/null +++ b/lib/datalog/tests/unify.sx @@ -0,0 +1,194 @@ +;; lib/datalog/tests/unify.sx — unification + substitution tests. + +(define dl-ut-pass 0) +(define dl-ut-fail 0) +(define dl-ut-failures (list)) + +(define + dl-ut-deep-equal? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-ut-deq-list? a b 0))) + ((and (dict? a) (dict? b)) + (let + ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-ut-deq-dict? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-ut-deq-list? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-ut-deep-equal? (nth a i) (nth b i))) false) + (else (dl-ut-deq-list? a b (+ i 1)))))) + +(define + dl-ut-deq-dict? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) (not (dl-ut-deep-equal? (get a k) (get b k)))) + false) + (else (dl-ut-deq-dict? a b ka (+ i 1)))))) + +(define + dl-ut-test! + (fn + (name got expected) + (if + (dl-ut-deep-equal? got expected) + (set! dl-ut-pass (+ dl-ut-pass 1)) + (do + (set! dl-ut-fail (+ dl-ut-fail 1)) + (append! + dl-ut-failures + (str name "\n expected: " expected "\n got: " got)))))) + +(define + dl-ut-run-all! + (fn + () + (do + (dl-ut-test! "var? uppercase" (dl-var? (quote X)) true) + (dl-ut-test! "var? underscore" (dl-var? (quote _foo)) true) + (dl-ut-test! "var? lowercase" (dl-var? (quote tom)) false) + (dl-ut-test! "var? number" (dl-var? 5) false) + (dl-ut-test! "var? string" (dl-var? "hi") false) + (dl-ut-test! "var? list" (dl-var? (list 1)) false) + (dl-ut-test! + "atom-atom match" + (dl-unify (quote tom) (quote tom) (dl-empty-subst)) + {}) + (dl-ut-test! + "atom-atom fail" + (dl-unify (quote tom) (quote bob) (dl-empty-subst)) + nil) + (dl-ut-test! + "num-num match" + (dl-unify 5 5 (dl-empty-subst)) + {}) + (dl-ut-test! + "num-num fail" + (dl-unify 5 6 (dl-empty-subst)) + nil) + (dl-ut-test! + "string match" + (dl-unify "hi" "hi" (dl-empty-subst)) + {}) + (dl-ut-test! "string fail" (dl-unify "hi" "bye" (dl-empty-subst)) nil) + (dl-ut-test! + "var-atom binds" + (dl-unify (quote X) (quote tom) (dl-empty-subst)) + {:X (quote tom)}) + (dl-ut-test! + "atom-var binds" + (dl-unify (quote tom) (quote X) (dl-empty-subst)) + {:X (quote tom)}) + (dl-ut-test! + "var-var same" + (dl-unify (quote X) (quote X) (dl-empty-subst)) + {}) + (dl-ut-test! + "var-var bind" + (let + ((s (dl-unify (quote X) (quote Y) (dl-empty-subst)))) + (dl-walk (quote X) s)) + (quote Y)) + (dl-ut-test! + "tuple match" + (dl-unify + (list (quote parent) (quote X) (quote bob)) + (list (quote parent) (quote tom) (quote Y)) + (dl-empty-subst)) + {:X (quote tom) :Y (quote bob)}) + (dl-ut-test! + "tuple arity mismatch" + (dl-unify + (list (quote p) (quote X)) + (list (quote p) (quote a) (quote b)) + (dl-empty-subst)) + nil) + (dl-ut-test! + "tuple head mismatch" + (dl-unify + (list (quote p) (quote X)) + (list (quote q) (quote X)) + (dl-empty-subst)) + nil) + (dl-ut-test! + "walk chain" + (let + ((s1 (dl-unify (quote X) (quote Y) (dl-empty-subst)))) + (let + ((s2 (dl-unify (quote Y) (quote tom) s1))) + (dl-walk (quote X) s2))) + (quote tom)) + + ;; Walk with circular substitution must not infinite-loop. + ;; Cycles return the current term unchanged. + (dl-ut-test! + "walk circular subst no hang" + (let ((s (dl-bind (quote B) (quote A) + (dl-bind (quote A) (quote B) (dl-empty-subst))))) + (dl-walk (quote A) s)) + (quote A)) + (dl-ut-test! + "apply subst on tuple" + (let + ((s (dl-bind (quote X) (quote tom) (dl-empty-subst)))) + (dl-apply-subst (list (quote parent) (quote X) (quote Y)) s)) + (list (quote parent) (quote tom) (quote Y))) + (dl-ut-test! + "ground? all const" + (dl-ground? + (list (quote p) (quote tom) 5) + (dl-empty-subst)) + true) + (dl-ut-test! + "ground? unbound var" + (dl-ground? (list (quote p) (quote X)) (dl-empty-subst)) + false) + (dl-ut-test! + "ground? bound var" + (let + ((s (dl-bind (quote X) (quote tom) (dl-empty-subst)))) + (dl-ground? (list (quote p) (quote X)) s)) + true) + (dl-ut-test! + "ground? bare var" + (dl-ground? (quote X) (dl-empty-subst)) + false) + (dl-ut-test! + "vars-of basic" + (dl-vars-of + (list (quote p) (quote X) (quote tom) (quote Y) (quote X))) + (list "X" "Y")) + (dl-ut-test! + "vars-of ground" + (dl-vars-of (list (quote p) (quote tom) (quote bob))) + (list)) + (dl-ut-test! + "vars-of nested compound" + (dl-vars-of + (list + (quote is) + (quote Z) + (list (string->symbol "+") (quote X) 1))) + (list "Z" "X"))))) + +(define + dl-unify-tests-run! + (fn + () + (do + (set! dl-ut-pass 0) + (set! dl-ut-fail 0) + (set! dl-ut-failures (list)) + (dl-ut-run-all!) + {:failures dl-ut-failures :total (+ dl-ut-pass dl-ut-fail) :passed dl-ut-pass :failed dl-ut-fail}))) diff --git a/lib/datalog/tokenizer.sx b/lib/datalog/tokenizer.sx new file mode 100644 index 00000000..cf0aa730 --- /dev/null +++ b/lib/datalog/tokenizer.sx @@ -0,0 +1,269 @@ +;; lib/datalog/tokenizer.sx — Datalog source → token stream +;; +;; Tokens: {:type T :value V :pos P} +;; Types: +;; "atom" — lowercase-start bare identifier +;; "var" — uppercase-start or _-start ident (value is the name) +;; "number" — numeric literal (decoded to number) +;; "string" — "..." string literal OR quoted 'atom' (treated as a +;; string value to avoid the var-vs-atom ambiguity that +;; would arise from a quoted atom whose name starts with +;; an uppercase letter or underscore) +;; "punct" — ( ) , . +;; "op" — :- ?- <= >= != < > = + - * / +;; "eof" +;; +;; Datalog has no function symbols in arg position; the parser still +;; accepts nested compounds for arithmetic ((is X (+ A B))) but safety +;; analysis rejects non-arithmetic nesting at rule-load time. + +(define dl-make-token (fn (type value pos) {:type type :value value :pos pos})) + +(define dl-digit? (fn (c) (and (>= c "0") (<= c "9")))) +(define dl-lower? (fn (c) (and (>= c "a") (<= c "z")))) +(define dl-upper? (fn (c) (and (>= c "A") (<= c "Z")))) + +(define + dl-ident-char? + (fn (c) (or (dl-lower? c) (dl-upper? c) (dl-digit? c) (= c "_")))) + +(define dl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) + +(define + dl-tokenize + (fn + (src) + (let + ((tokens (list)) (pos 0) (src-len (len src))) + (define + dl-peek + (fn + (offset) + (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) + (define cur (fn () (dl-peek 0))) + (define advance! (fn (n) (set! pos (+ pos n)))) + (define + at? + (fn + (s) + (let + ((sl (len s))) + (and (<= (+ pos sl) src-len) (= (slice src pos (+ pos sl)) s))))) + (define + dl-emit! + (fn + (type value start) + (append! tokens (dl-make-token type value start)))) + (define + skip-line-comment! + (fn + () + (when + (and (< pos src-len) (not (= (cur) "\n"))) + (do (advance! 1) (skip-line-comment!))))) + (define + skip-block-comment! + (fn + () + (cond + ((>= pos src-len) + (error (str "Tokenizer: unterminated block comment " + "(started at position " pos ")"))) + ((and (= (cur) "*") (< (+ pos 1) src-len) (= (dl-peek 1) "/")) + (advance! 2)) + (else (do (advance! 1) (skip-block-comment!)))))) + (define + skip-ws! + (fn + () + (cond + ((>= pos src-len) nil) + ((dl-ws? (cur)) (do (advance! 1) (skip-ws!))) + ((= (cur) "%") + (do (advance! 1) (skip-line-comment!) (skip-ws!))) + ((and (= (cur) "/") (< (+ pos 1) src-len) (= (dl-peek 1) "*")) + (do (advance! 2) (skip-block-comment!) (skip-ws!))) + (else nil)))) + (define + read-ident + (fn + (start) + (do + (when + (and (< pos src-len) (dl-ident-char? (cur))) + (do (advance! 1) (read-ident start))) + (slice src start pos)))) + (define + read-decimal-digits! + (fn + () + (when + (and (< pos src-len) (dl-digit? (cur))) + (do (advance! 1) (read-decimal-digits!))))) + (define + read-number + (fn + (start) + (do + (read-decimal-digits!) + (when + (and + (< pos src-len) + (= (cur) ".") + (< (+ pos 1) src-len) + (dl-digit? (dl-peek 1))) + (do (advance! 1) (read-decimal-digits!))) + (parse-number (slice src start pos))))) + (define + read-quoted + (fn + (quote-char) + (let + ((chars (list))) + (advance! 1) + (define + loop + (fn + () + (cond + ((>= pos src-len) + (error + (str "Tokenizer: unterminated " + (if (= quote-char "'") "quoted atom" "string") + " (started near position " pos ")"))) + ((= (cur) "\\") + (do + (advance! 1) + (when + (< pos src-len) + (let + ((ch (cur))) + (do + (cond + ((= ch "n") (append! chars "\n")) + ((= ch "t") (append! chars "\t")) + ((= ch "r") (append! chars "\r")) + ((= ch "\\") (append! chars "\\")) + ((= ch "'") (append! chars "'")) + ((= ch "\"") (append! chars "\"")) + (else (append! chars ch))) + (advance! 1)))) + (loop))) + ((= (cur) quote-char) (advance! 1)) + (else + (do (append! chars (cur)) (advance! 1) (loop)))))) + (loop) + (join "" chars)))) + (define + scan! + (fn + () + (do + (skip-ws!) + (when + (< pos src-len) + (let + ((ch (cur)) (start pos)) + (cond + ((at? ":-") + (do + (dl-emit! "op" ":-" start) + (advance! 2) + (scan!))) + ((at? "?-") + (do + (dl-emit! "op" "?-" start) + (advance! 2) + (scan!))) + ((at? "<=") + (do + (dl-emit! "op" "<=" start) + (advance! 2) + (scan!))) + ((at? ">=") + (do + (dl-emit! "op" ">=" start) + (advance! 2) + (scan!))) + ((at? "!=") + (do + (dl-emit! "op" "!=" start) + (advance! 2) + (scan!))) + ((dl-digit? ch) + (do + (dl-emit! "number" (read-number start) start) + (scan!))) + ((= ch "'") + ;; Quoted 'atoms' tokenize as strings so a name + ;; like 'Hello World' doesn't get misclassified + ;; as a variable by dl-var? (which inspects the + ;; symbol's first character). + (do (dl-emit! "string" (read-quoted "'") start) (scan!))) + ((= ch "\"") + (do (dl-emit! "string" (read-quoted "\"") start) (scan!))) + ((dl-lower? ch) + (do (dl-emit! "atom" (read-ident start) start) (scan!))) + ((or (dl-upper? ch) (= ch "_")) + (do (dl-emit! "var" (read-ident start) start) (scan!))) + ((= ch "(") + (do + (dl-emit! "punct" "(" start) + (advance! 1) + (scan!))) + ((= ch ")") + (do + (dl-emit! "punct" ")" start) + (advance! 1) + (scan!))) + ((= ch ",") + (do + (dl-emit! "punct" "," start) + (advance! 1) + (scan!))) + ((= ch ".") + (do + (dl-emit! "punct" "." start) + (advance! 1) + (scan!))) + ((= ch "<") + (do + (dl-emit! "op" "<" start) + (advance! 1) + (scan!))) + ((= ch ">") + (do + (dl-emit! "op" ">" start) + (advance! 1) + (scan!))) + ((= ch "=") + (do + (dl-emit! "op" "=" start) + (advance! 1) + (scan!))) + ((= ch "+") + (do + (dl-emit! "op" "+" start) + (advance! 1) + (scan!))) + ((= ch "-") + (do + (dl-emit! "op" "-" start) + (advance! 1) + (scan!))) + ((= ch "*") + (do + (dl-emit! "op" "*" start) + (advance! 1) + (scan!))) + ((= ch "/") + (do + (dl-emit! "op" "/" start) + (advance! 1) + (scan!))) + (else (error + (str "Tokenizer: unexpected character '" ch + "' at position " start))))))))) + (scan!) + (dl-emit! "eof" nil pos) + tokens))) diff --git a/lib/datalog/unify.sx b/lib/datalog/unify.sx new file mode 100644 index 00000000..d5de7eda --- /dev/null +++ b/lib/datalog/unify.sx @@ -0,0 +1,171 @@ +;; lib/datalog/unify.sx — unification + substitution for Datalog terms. +;; +;; Term taxonomy (after parsing): +;; variable — SX symbol whose first char is uppercase A–Z or '_'. +;; constant — SX symbol whose first char is lowercase a–z (atom name). +;; number — numeric literal. +;; string — string literal. +;; compound — SX list (functor arg ... arg). In core Datalog these +;; only appear as arithmetic expressions (see Phase 4 +;; safety analysis); compound-against-compound unification +;; is supported anyway for completeness. +;; +;; Substitutions are immutable dicts keyed by variable name (string). +;; A failed unification returns nil; success returns the extended subst. + +(define dl-empty-subst (fn () {})) + +(define + dl-var? + (fn + (term) + (and + (symbol? term) + (let + ((name (symbol->string term))) + (and + (> (len name) 0) + (let + ((c (slice name 0 1))) + (or (and (>= c "A") (<= c "Z")) (= c "_")))))))) + +;; Walk: chase variable bindings until we hit a non-variable or an unbound +;; variable. The result is either a non-variable term or an unbound var. +(define + dl-walk + (fn (term subst) (dl-walk-aux term subst (list)))) + +;; Internal: walk with a visited-var set so circular substitutions +;; (from raw dl-bind misuse) don't infinite-loop. Cycles return the +;; current term unchanged. +(define + dl-walk-aux + (fn + (term subst visited) + (if + (dl-var? term) + (let + ((name (symbol->string term))) + (cond + ((dl-member? name visited) term) + ((and (dict? subst) (has-key? subst name)) + (let ((seen (list))) + (do + (for-each (fn (v) (append! seen v)) visited) + (append! seen name) + (dl-walk-aux (get subst name) subst seen)))) + (else term))) + term))) + +;; Bind a variable symbol to a value in subst, returning a new subst. +(define + dl-bind + (fn (var-sym value subst) (assoc subst (symbol->string var-sym) value))) + +(define + dl-unify + (fn + (t1 t2 subst) + (if + (nil? subst) + nil + (let + ((u1 (dl-walk t1 subst)) (u2 (dl-walk t2 subst))) + (cond + ((dl-var? u1) + (cond + ((and (dl-var? u2) (= (symbol->string u1) (symbol->string u2))) + subst) + (else (dl-bind u1 u2 subst)))) + ((dl-var? u2) (dl-bind u2 u1 subst)) + ((and (list? u1) (list? u2)) + (if + (= (len u1) (len u2)) + (dl-unify-list u1 u2 subst 0) + nil)) + ((and (number? u1) (number? u2)) (if (= u1 u2) subst nil)) + ((and (string? u1) (string? u2)) (if (= u1 u2) subst nil)) + ((and (symbol? u1) (symbol? u2)) + (if (= (symbol->string u1) (symbol->string u2)) subst nil)) + (else nil)))))) + +(define + dl-unify-list + (fn + (a b subst i) + (cond + ((nil? subst) nil) + ((>= i (len a)) subst) + (else + (dl-unify-list + a + b + (dl-unify (nth a i) (nth b i) subst) + (+ i 1)))))) + +;; Apply substitution: walk the term and recurse into lists. +(define + dl-apply-subst + (fn + (term subst) + (let + ((w (dl-walk term subst))) + (if (list? w) (map (fn (x) (dl-apply-subst x subst)) w) w)))) + +;; Ground? — true iff no free variables remain after walking. +(define + dl-ground? + (fn + (term subst) + (let + ((w (dl-walk term subst))) + (cond + ((dl-var? w) false) + ((list? w) (dl-ground-list? w subst 0)) + (else true))))) + +(define + dl-ground-list? + (fn + (xs subst i) + (cond + ((>= i (len xs)) true) + ((not (dl-ground? (nth xs i) subst)) false) + (else (dl-ground-list? xs subst (+ i 1)))))) + +;; Return the list of variable names appearing in a term (deduped, in +;; left-to-right order). Useful for safety analysis later. +(define + dl-vars-of + (fn (term) (let ((seen (list))) (do (dl-vars-of-aux term seen) seen)))) + +(define + dl-vars-of-aux + (fn + (term acc) + (cond + ((dl-var? term) + (let + ((name (symbol->string term))) + (when (not (dl-member? name acc)) (append! acc name)))) + ((list? term) (dl-vars-of-list term acc 0)) + (else nil)))) + +(define + dl-vars-of-list + (fn + (xs acc i) + (when + (< i (len xs)) + (do + (dl-vars-of-aux (nth xs i) acc) + (dl-vars-of-list xs acc (+ i 1)))))) + +(define + dl-member? + (fn + (x xs) + (cond + ((= (len xs) 0) false) + ((= (first xs) x) true) + (else (dl-member? x (rest xs)))))) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index e7d26683..d1b84163 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -13,6 +13,20 @@ End-state goal: **full core Datalog** (facts, rules, stratified negation, aggreg recursion) with a clean SX query API, and a demonstration of Datalog as a query engine for rose-ash data (e.g. federation graph, content relationships). +## Status (rolling) + +`bash lib/datalog/conformance.sh` → **276/276 across 11 suites** +(tokenize, parse, unify, eval, builtins, semi_naive, negation, aggregates, +api, magic, demo). Source is ~3100 LOC, tests ~2900 LOC, public API +documented in `lib/datalog/datalog.sx`. + +Phases 1–9 are functionally complete; Phase 10 covers the rose-ash +domain demos (in `lib/datalog/demo.sx` — federation, content, +permissions, cooking-posts, tag co-occurrence, shortest path, org chart). +The PostgreSQL loader and `/internal/datalog` HTTP endpoint listed in +Phase 10 require service-tree edits outside `lib/datalog/**` and are +flagged as out-of-scope for this loop. + ## Ground rules - **Scope:** only touch `lib/datalog/**` and `plans/datalog-on-sx.md`. Do **not** edit @@ -25,23 +39,6 @@ for rose-ash data (e.g. federation graph, content relationships). Dalmau "Datalog and Constraint Satisfaction". - **Commits:** one feature per commit. Keep `## Progress log` updated and tick boxes. -## Non-goals - -Deliberately out of scope for this implementation. Real engines (Soufflé, Cozo, DDlog) include -some of these; we accept they're missing and will note them in `Blockers` if a use case demands -one later. - -- **Function symbols** — keeps termination guaranteed and prevents collapse into Prolog. -- **Disjunctive heads** (`p :- q. p :- r.` is fine; `p ; q :- r.` is not) — research extension. -- **Well-founded semantics** — only stratified negation. Programs that aren't stratifiable are - rejected at load time, not evaluated under WFS. -- **Tabled top-down (SLG resolution)** — bottom-up only. If you want top-down with termination, - use the Prolog implementation. -- **Constraint Datalog** (Datalog over reals, intervals, finite domains) — research extension. -- **Distributed evaluation / Differential Dataflow** — single-process fixpoint only. The rose-ash - cross-service story (Phase 10) federates by querying each service's local Datalog instance and - joining results, not by running a distributed fixpoint. - ## Architecture sketch ``` @@ -75,128 +72,647 @@ Key differences from Prolog: ## Roadmap ### Phase 1 — tokenizer + parser -- [ ] Tokenizer: atoms (lowercase/quoted), variables (uppercase/`_`), numbers, strings, - operators (`:- `, `?-`, `,`, `.`), arithmetic + comparison operators - (`+`, `-`, `*`, `/`, `<`, `<=`, `>`, `>=`, `=`, `!=`), comments (`%`, `/* */`) - Note: no function symbol syntax (no nested `f(...)` in arg position). -- [ ] Parser: +- [x] Tokenizer: atoms (lowercase/quoted), variables (uppercase/`_`), numbers, strings, + punct (`( )`, `,`, `.`), operators (`:-`, `?-`, `<=`, `>=`, `!=`, `<`, `>`, `=`, + `+`, `-`, `*`, `/`), comments (`%`, `/* */`) + Note: no function symbol syntax (no nested `f(...)` in arg position) — but the + parser permits nested compounds for arithmetic; safety analysis (Phase 3) rejects + non-arithmetic nesting. +- [x] Parser: - Facts: `parent(tom, bob).` → `{:head (parent tom bob) :body ()}` - Rules: `ancestor(X,Z) :- parent(X,Y), ancestor(Y,Z).` → `{:head (ancestor X Z) :body ((parent X Y) (ancestor Y Z))}` - - Queries: `?- ancestor(tom, X).` → `{:query (ancestor tom X)}` + - Queries: `?- ancestor(tom, X).` → `{:query ((ancestor tom X))}` + (`:query` value is always a list of literals; `?- p, q.` → `{:query ((p) (q))}`) - Negation: `not(parent(X,Y))` in body position → `{:neg (parent X Y)}` -- [ ] Tests in `lib/datalog/tests/parse.sx` +- [x] Tests in `lib/datalog/tests/parse.sx` (18) and `lib/datalog/tests/tokenize.sx` (26). + Conformance harness: `bash lib/datalog/conformance.sh` → 44 / 44 passing. ### Phase 2 — unification + substitution -- [ ] Share or port unification from `lib/prolog/` — term walk, occurs check off by default -- [ ] `dl-unify` `t1` `t2` `subst` → extended subst or nil (no function symbols means simpler) -- [ ] `dl-ground?` `term` → bool — all variables bound in substitution -- [ ] Tests: atom/atom, var/atom, var/var, list args +- [x] Ported (not shared) from `lib/prolog/` — term walk, no occurs check. +- [x] `dl-unify t1 t2 subst` → extended subst dict, or `nil` on failure. +- [x] `dl-walk`, `dl-bind`, `dl-apply-subst`, `dl-ground?`, `dl-vars-of`. +- [x] Substitutions are immutable dicts keyed by variable name (string). + Lists/tuples unify element-wise (used for arithmetic compounds too). +- [x] Tests in `lib/datalog/tests/unify.sx` (28). 72 / 72 conformance. -### Phase 3 — extensional DB + naive evaluation -- [ ] EDB: `{:relation-name → set-of-ground-tuples}` using SX sets (Phase 18 of primitives) -- [ ] `dl-add-fact!` `db` `relation` `args` → add ground tuple -- [ ] `dl-add-rule!` `db` `head` `body` → add rule clause -- [ ] Naive evaluation: iterate rules until fixpoint - For each rule, for each combination of body tuples that unify, derive head tuple. - Repeat until no new tuples added. -- [ ] `dl-query` `db` `goal` → list of substitutions satisfying goal against derived DB -- [ ] **Safety analysis**: every variable in a rule head must also appear in a positive body - literal; reject unsafe rules at `dl-add-rule!` time with a clear error pointing at the - offending variable. Built-in predicates and negated atoms do not satisfy safety on their - own (`p(X) :- X > 0.` is unsafe). -- [ ] Tests: transitive closure (ancestor), sibling, same-generation — classic Datalog programs; - safety violation rejection cases. +### Phase 3 — extensional DB + naive evaluation + safety analysis +- [x] EDB+IDB combined: `{:facts { -> (literal ...)}}` — + relations indexed by name; tuples stored as full literals so they + unify directly. Dedup on insert via `dl-tuple-equal?`. +- [x] `dl-add-fact! db lit` (rejects non-ground) and `dl-add-rule! db rule` + (rejects unsafe). `dl-program source` parses + loads in one step. +- [x] Naive evaluation `dl-saturate! db`: iterate rules until no new tuples. + `dl-find-bindings` recursively joins body literals; `dl-match-positive` + unifies a literal against every tuple in the relation. +- [x] `dl-query db goal` → list of substitutions over `goal`'s vars, + deduplicated. `dl-relation db name` for derived tuples. +- [x] Safety analysis at `dl-add-rule!` time: every head variable except + `_` must appear in some positive body literal. Built-ins and negated + literals do not satisfy safety. Helpers `dl-positive-body-vars`, + `dl-rule-unsafe-head-vars` exposed for later phases. +- [x] Negation and arithmetic built-ins error cleanly at saturate time + (Phase 4 / Phase 7 will swap in real semantics). +- [x] Tests in `lib/datalog/tests/eval.sx` (15): transitive closure, + sibling, same-generation, grandparent, cyclic graph reach, six + safety cases. 87 / 87 conformance. ### Phase 4 — built-in predicates + body arithmetic - -Almost every real query needs `<`, `=`, simple arithmetic, and string comparisons in body -position. These are not EDB lookups — they're constraints that filter bindings. - -- [ ] Recognise built-in predicates in body: `(< X Y)`, `(<= X Y)`, `(> X Y)`, `(>= X Y)`, - `(= X Y)`, `(!= X Y)` and arithmetic forms `(is Z (+ X Y))`, `(is Z (- X Y))`, - `(is Z (* X Y))`, `(is Z (/ X Y))`. -- [ ] Built-in evaluation in the fixpoint: at the join step, after binding variables from EDB - lookups, evaluate built-ins as constraints. If any built-in fails or has unbound inputs, - drop the candidate substitution. -- [ ] **Safety extension**: `is` binds its left operand iff right operand is fully ground. - `(< X Y)` requires both X and Y bound by some prior body literal — reject unsafe. -- [ ] Wire arithmetic operators through to SX numeric primitives — no separate Datalog number - tower. -- [ ] Tests: range filters, arithmetic derivations (`(plus-one X Y :- ..., (is Y (+ X 1)))`), - comparison-based queries, safety violation detection on `(p X) :- (< X 5).` +Almost every real query needs `<`, `=`, simple arithmetic, and string +comparisons in body position. These are not EDB lookups — they're +constraints that filter bindings. +- [x] Recognise built-in predicates in body: `(< X Y)`, `(<= X Y)`, `(> X Y)`, + `(>= X Y)`, `(= X Y)`, `(!= X Y)` and arithmetic forms `(is Z (+ X Y))`, + `(is Z (- X Y))`, `(is Z (* X Y))`, `(is Z (/ X Y))`. Live in + `lib/datalog/builtins.sx`. +- [x] `dl-eval-builtin` dispatches; `dl-eval-arith` recursively evaluates + `(+ a b)` etc. with full nesting. `=` unifies; `!=` rejects equal + ground terms. +- [x] Order-aware safety analysis (`dl-rule-check-safety`): walks body + left-to-right tracking which vars are bound. `is`'s RHS vars must + be already bound; LHS becomes bound. Comparisons require both + sides bound. `=` is special-cased — at least one side bound binds + the other. Negation vars must be bound (will be enforced fully in + Phase 7). +- [x] Wired through SX numeric primitives — no separate number tower. +- [x] Tests in `lib/datalog/tests/builtins.sx` (19): range filters, + arithmetic derivations, equality binding, eight safety violations + and three safe-shape tests. Conformance 106 / 106. ### Phase 5 — semi-naive evaluation (performance) -- [ ] Delta sets: track newly derived tuples per iteration -- [ ] Semi-naive rule: only join against delta tuples from last iteration, not full relation -- [ ] Significant speedup for recursive rules — avoids re-deriving known tuples -- [ ] `dl-stratify` `db` → dependency graph + SCC analysis → stratum ordering -- [ ] Tests: verify semi-naive produces same results as naive; benchmark on large ancestor chain +- [x] Delta sets `{rel-name -> tuples}` track newly derived tuples per iter. + `dl-snapshot-facts` builds the initial delta from the EDB. +- [x] Semi-naive rule: for each rule, walk every positive body literal + position; substitute that one with the per-relation delta and join + the rest against the previous-iteration DB (`dl-find-bindings-semi`). + Candidates are collected before mutating the DB so the "full" sides + see a consistent snapshot. +- [x] `dl-collect-rule-candidates` falls back to a naive single pass when + a rule has no positive body literal (e.g. `(p X) :- (= X 5).`). +- [x] `dl-saturate!` is now semi-naive by default; `dl-saturate-naive!` + kept for differential testing and a reference implementation. +- [x] Tests in `lib/datalog/tests/semi_naive.sx` (8) — every recursive + program from earlier suites is run under both saturators with + per-relation tuple counts compared (cheap, robust under bundled + conformance session). A chain-5 differential exercises multiple + semi-naive iterations against the recursive ancestor rule. + Larger chains hit prohibitive wall-clock under conformance CPU + contention with other agents — a future Blocker tracks switching + `dl-tuple-member?` from O(n²) list scan to a hash-set per relation. -### Phase 6 — magic sets (goal-directed bottom-up) - -Naive bottom-up evaluation derives **all** consequences of all rules before answering, even when -the query touches a tiny slice of the EDB. Magic sets rewrite the program so the fixpoint only -derives tuples relevant to the goal — a major perf win for "what's reachable from node X" style -queries on large graphs. - -- [ ] Adornments: annotate rule predicates with bound (`b`) / free (`f`) patterns based on how - they're called (`ancestor^bf(tom, X)` vs `ancestor^ff(X, Y)`). -- [ ] Magic transformation: for each adorned predicate, generate a `magic_` relation and - rewrite rule bodies to filter through it. Seed with `magic_()`. -- [ ] Sideways information passing strategy (SIPS): left-to-right by default; pluggable. -- [ ] Optional pass — guarded behind `(dl-set-strategy! db :magic)`; default remains semi-naive. -- [ ] Tests: ancestor query from a single root on a 10k-node graph — magic-rewritten version - should be O(reachable) instead of O(graph). Equivalence vs naive on small inputs. +### Phase 6 — magic sets (goal-directed bottom-up, opt-in) +Naive bottom-up derives **all** consequences before answering. Magic sets +rewrite the program so the fixpoint only derives tuples relevant to the +goal — a major perf win for "what's reachable from node X" queries on +large graphs. +- [x] Adornments: `dl-adorn-goal goal` and `dl-adorn-lit lit bound` in + `lib/datalog/magic.sx`. Per-arg `b`/`f` based on whether the arg + is a constant or a variable already in the bound set. +- [x] Magic transformation: `dl-magic-rewrite rules query-rel adn args` + generates `{:rules :seed }`. Each + original rule is gated with a `magic_^(bound)` filter, + and propagation rules are emitted for each positive non-builtin + body literal. Worklist over `(rel, adn)` pairs starts from the + query and stops when no new pairs appear. EDB facts pass through + unchanged. +- [x] Sideways information passing strategy (SIPS): left-to-right + `dl-rule-sips rule head-adornment` walks body literals tracking + the bound set, returning `({:lit :adornment} ...)`. Recognises + `is`/aggregate result-vars as new binders; comparisons and + negation pass through with computed adornments. (Pluggable + strategies are future work.) +- [x] `dl-set-strategy! db strategy` hook + `dl-get-strategy db`. Default + `:semi-naive`. `:magic` accepted but the transformation itself is + deferred — saturator currently falls back to semi-naive. Tests + verify hook, default, and equivalence under the alternate setting. +- [x] Equivalence test: rewritten ancestor program over the same EDB + derives the same number of `ancestor` tuples and returns the + same query answers as the unrewritten program (chain-3 case). +- [x] `dl-magic-query db query-goal` — top-level driver. Builds a + fresh internal db with the caller's EDB facts, the magic seed, + and the rewritten rules; saturates and queries. Caller's db is + untouched. Equivalent to `dl-query` for fully-stratifiable + programs (sole motivation is a perf alternative on goal-shaped + queries against large recursive relations). +- [ ] Perf test: 10k-node reachability with magic vs semi-naive. + Left to a future iteration — would need a benchmarking harness + for large graphs and the conformance budget can't afford it. ### Phase 7 — stratified negation -- [ ] Dependency graph analysis: which relations depend on which (positively or negatively) -- [ ] Stratification check: error if negation is in a cycle (non-stratifiable program) -- [ ] Evaluation: process strata in order — lower stratum fully computed before using its - complement in a higher stratum -- [ ] `not(P)` in rule body: at evaluation time, check P is NOT in the derived EDB -- [ ] Tests: non-member (`not(member(X,L))`), colored-graph (`not(same-color(X,Y))`), - stratification error detection +- [x] Dependency graph: `dl-build-dep-graph db` returns `{head -> ({:rel + :neg} ...)}`. Built-ins drop out (they're not relations). +- [x] Reachability via Floyd-Warshall in `dl-build-reach`; cycles + detected by `reach[A][B] && reach[B][A]`. Programs are + non-stratifiable iff any negative dependency falls inside an SCC. + `dl-check-stratifiable` returns nil on success or a clear message. +- [x] `dl-compute-strata` propagates stratum numbers iteratively: + `stratum(R) = max over deps of (stratum(dep) + (1 if negated else 0))`. +- [x] Saturator refactor: `dl-saturate-rules! db rules` is the semi- + naive worker; `dl-saturate! db` rejects non-stratifiable programs, + groups rules by head's stratum, and runs the worker on each + stratum in increasing order. +- [x] `not(P)` in body: `dl-match-negation` walks the inner literal + under the current subst and uses `dl-match-positive` — succeeds + iff zero matches. Order-aware safety in `dl-rule-check-safety` + (already present from Phase 4) requires negation vars to be + bound by an earlier positive literal. +- [x] Tests in `lib/datalog/tests/negation.sx` (10): EDB and IDB + negation, two-step strata, multi-level strata, with-arithmetic, + empty-result and always-fail cases, non-stratifiability + rejection, and a negation safety violation. ### Phase 8 — aggregation (Datalog+) -- [ ] `count(X, Goal)` → number of distinct X satisfying Goal -- [ ] `sum(X, Goal)` → sum of X values satisfying Goal -- [ ] `min(X, Goal)` / `max(X, Goal)` → min/max of X satisfying Goal -- [ ] `group-by` semantics: `count(X, sibling(bob, X))` → count of bob's siblings -- [ ] Aggregation breaks stratification — evaluate in a separate post-fixpoint pass -- [ ] Tests: social network statistics, grade aggregation, inventory sums +- [x] `(count R V Goal)`, `(sum R V Goal)`, `(min R V Goal)`, + `(max R V Goal)`, `(findall L V Goal)` — first arg is the result + variable, second is the aggregated variable, third is the goal + literal. `findall` returns the distinct-value list itself; the + others reduce. Live in `lib/datalog/aggregates.sx`. +- [x] `dl-eval-aggregate`: runs `dl-find-bindings` on the goal under the + current subst (which provides outer-context bindings), collects + distinct values of the aggregated var, applies the aggregate. + `count`/`sum` produce 0 when no matches; `min`/`max` produce no + binding (rule fails) when empty. +- [x] Group-by emerges naturally: outer-context vars in the goal are + substituted from the current subst, so `popular(P) :- post(P), + count(N, U, liked(U, P)), >=(N, 3).` correctly counts per-post. +- [x] Stratification: `dl-aggregate-dep-edge` returns a negation-like + edge so the aggregate's goal relation is fully derived before the + aggregate fires. Non-monotonicity respected. +- [x] Safety: aggregate body lit binds the result var; goal-internal + vars are existentially quantified and don't need outer binding. +- [x] Tests in `lib/datalog/tests/aggregates.sx` (10): count siblings, + sum prices, min/max scores, count over derived relation, + empty-input cases for each operator, popularity threshold with + group-by, distinct-counted-once. ### Phase 9 — SX embedding API -- [ ] `(dl-program facts rules)` → database from SX data directly (no parsing required) +- [x] `(dl-program-data facts rules)` builds a db from SX data — + `facts` is a list of literals, `rules` is a list of either + dicts `{:head … :body …}` or lists `( <- )`. + Variables are SX symbols whose first char is uppercase or `_`, + matching the parser's convention. ``` - (dl-program - '((parent tom bob) (parent tom liz) (parent bob ann)) - '((ancestor X Z :- (parent X Y) (ancestor Y Z)) - (ancestor X Y :- (parent X Y)))) + (dl-program-data + '((parent tom bob) (parent bob ann)) + '((ancestor X Y <- (parent X Y)) + (ancestor X Z <- (parent X Y) (ancestor Y Z)))) ``` -- [ ] `(dl-query db '(ancestor tom ?X))` → `((ann) (bob) (liz) (pat))` -- [ ] `(dl-assert! db '(parent ann pat))` → incremental fact addition + re-derive -- [ ] `(dl-retract! db '(parent tom bob))` → fact removal + re-derive from scratch -- [ ] Integration demo: federation graph query — `(ancestor actor1 actor2)` over - rose-ash ActivityPub follow relationships +- [x] `(dl-rule head body)` constructor for the dict form. +- [x] `(dl-query db '(ancestor tom X))` already worked — same query API + consumes the SX-data goal. Now also accepts a *list* of body + literals for conjunctive queries: + `(dl-query db '((p X) (q X)))`, + `(dl-query db (list '(n X) '(> X 2)))`. Auto-dispatched via + `dl-query-coerce` on first-element shape. +- [x] `(dl-assert! db '(parent ann pat))` → adds the fact and re-saturates. +- [x] `(dl-retract! db '(parent bob ann))` → drops matching tuples from + the EDB list, wipes every relation that has a rule (those are IDB), + and re-saturates from the surviving EDB. +- [x] Tests in `lib/datalog/tests/api.sx` (9): closure via data API, + dict-rule form, dl-rule constructor, dl-assert! incremental, + dl-retract! removes derived, cyclic-graph reach via data, + assert into empty db, fact-style rule (no arrow), coerce dict. +- [x] Integration demo: federation graph query — `(reachable A B)` / + `(mutual A B)` / `(foaf A C)` over `(follows ACTOR-A ACTOR-B)` in + `lib/datalog/demo.sx`. Tests in `lib/datalog/tests/demo.sx`. + Wiring this to actual rose-ash ActivityPub data is Phase 10 + service work and is out of scope for this loop. ### Phase 10 — Datalog as a query language for rose-ash -- [ ] Schema: map SQLAlchemy model relationships to Datalog EDB facts - (e.g. `(follows user1 user2)`, `(authored user post)`, `(tagged post tag)`) -- [ ] Loader: `dl-load-from-db!` — query PostgreSQL, populate Datalog EDB -- [ ] Query examples: - - `?- ancestor(me, X), authored(X, Post), tagged(Post, cooking).` - → posts about cooking by people I follow (transitively) - - `?- popular(Post) :- tagged(Post, T), count(L, (liked(L, Post))) >= 10.` - → posts with 10+ likes -- [ ] Expose as a rose-ash service endpoint: `POST /internal/datalog` with program + query +- [x] Schema sketches in `lib/datalog/demo.sx`: + - **Federation**: `(follows A B)` → `(mutual A B)`, `(reachable A B)`, + `(foaf A C)` (friend-of-a-friend, distinct). + - **Content**: `(authored A P)`, `(liked U P)`, `(tagged P T)` → + `(post-likes P N)` via aggregation, `(popular P)` for likes ≥ 3, + `(interesting Me P)` joining follows + authored + popular. + - **Permissions**: `(member A G)`, `(subgroup C P)`, `(allowed G R)` + → `(in-group A G)` over transitive subgroups, `(can-access A R)`. + - **Cooking-posts** (the canonical example): `(reach Me Them)` over + the follow graph, then `(cooking-post-by-network Me P)` joining + reach + authored + `(tagged P cooking)`. +- [ ] Loader `dl-load-from-db!` — out of scope for this loop + (would need to edit `shared/services/` outside `lib/datalog/`). + Programs in `demo.sx` already document the EDB shape expected + from such a loader. `dl-program-data` consumes the same shape. +- [x] Query examples covered by `lib/datalog/tests/demo.sx` (10): + mutuals, transitive reach, FOAF, popular posts, interesting feed, + post likes count, direct/subgroup/transitive group access, no + access without grant. +- [ ] Service endpoint `POST /internal/datalog` — out of scope as above. + Once exposed, server-side handler would be `dl-program-data` + + `dl-query`, returning JSON-encoded substitutions. ## Blockers -_(none yet)_ +- **Saturation perf**: three rounds done. + - hash-set membership in `dl-add-fact!` (Phase 5b) + - indexed iteration in `dl-find-bindings` (Phase 5c) + - first-arg index per relation (Phase 5e) — when a body literal's + first arg walks to a non-variable, dl-match-positive looks up + by `(str arg)` instead of scanning the full relation. + chain-25 saturation drops from ~33s to ~18s real (10s user). + chain-50 still long (~120s+) due to dict-copy overhead in + unification subst threading. Future: per-rule "compiled" body + with pre-resolved var positions, slot-based subst representation + to avoid `assoc` per binding. ## Progress log _Newest first._ -_(awaiting phase 1)_ +- 2026-05-11 — `dl-set-strategy!` accepted arbitrary keyword values + silently. Typos like `:semi_naive` or `:semiNaive` were stored + uninspected; the saturator then used the default and the user + never learned their setting was a typo. Validator added: strategy + must be one of `:semi-naive`, `:naive`, `:magic`. 1 regression test; + 276/276. + +- 2026-05-11 — Anonymous-variable renamer collided with user-written + `_anon` symbols. The renamer started counter at 0 and produced + `_anon1, _anon2, ...` unconditionally; if the user wrote + `q(_anon1) :- p(_anon1, _).` the `_` got renamed to `_anon1` too, + collapsing the two positions of `p` to a single var and returning + the empty result instead of `{a, c}`. Fix: scan each rule (and + query) for the max `_anon` and start the renamer past it. The + renamer constructor now takes a `start` arg; new helpers + `dl-max-anon-num` / `dl-max-anon-num-list` walk the rule tree. + 1 regression test; 275/275. + +- 2026-05-11 — `dl-magic-query` could silently diverge from + `dl-query` when an aggregate's inner-goal relation was IDB. The + rewriter passes aggregate body lits through unchanged (no magic + propagation for them), so the inner relation was empty in the + magic db and the aggregate returned 0. Probe: + `dl-eval-magic "u(a). u(b). u(c). u(d). banned(b). banned(d). + active(X) :- u(X), not(banned(X)). + n(N) :- count(N, X, active(X))." "?- n(N)."` + returned `N=0` instead of `N=2`. Fix: `dl-magic-query` now + pre-saturates the source db before copying facts into the magic + db. This guarantees equivalence with `dl-query` for every + stratified program; the magic benefit comes from goal-directed + re-derivation of the query relation under the seed (which still + matters for large recursive joins). The existing test suite's + aggregate cases happened to dodge this because the inner goals + were all EDB. 1 new regression test; 274/274. + +- 2026-05-11 — Anonymous `_` in a negated literal was incorrectly + flagged by the safety check. The canonical idiom + `orphan(X) :- person(X), not(parent(X, _))` was rejected with + "negation refers to unbound variable(s) (\"_anon1\")" because the + parser renames each `_` to a fresh `_anon*` symbol and the negation + safety walk demanded all vars in the negated lit be bound by an + earlier positive body literal. Anonymous vars in negation are + existentially quantified — they shouldn't need outer binding. + Added `dl-non-anon-vars` filter; `dl-process-neg!` now strips + `_anon*` names from `needed` before the binding check. 2 new + regression tests; 273/273. + +- 2026-05-11 — Compound terms in fact-arg / rule-head positions were + silently stored as unreduced expressions. `p(+(1, 2)).` resulted + in a tuple `(p (+ 1 2))` (dl-ground? sees no free variables, so it + passed). `double(*(X, 2)) :- n(X).` saturated to `double((* 3 2))` + rather than `double(6)`. Datalog has no function symbols in arg + positions — `dl-add-fact!` and `dl-add-rule!` now reject compound + args via a new `dl-simple-term?` (number / string / symbol). + Compounds remain legal in body literals where they encode `is` / + arithmetic / aggregate sub-goals. 2 new regression tests; 271/271. + +- 2026-05-11 — Quoted atoms with uppercase-or-underscore-leading + names were misclassified as variables. `p('Hello World').` ran + through the tokenizer's `"atom"` branch and through the parser's + `string->symbol`, producing a symbol named "Hello World". dl-var? + checks the first character — "H" is uppercase, so the fact was + rejected as non-ground. Fix: tokenizer emits `"string"` for any + `'...'` quoted form, so quoted atoms become opaque string constants + (matching how Datalog idiomatically treats them — the alternative + was a per-symbol "quoted" marker which would have rippled through + unification and dl-var?). Updated the existing tokenize test and + added one for `'Hello'`; also added a parse-level regression. 269/269. + +- 2026-05-11 — Type-mixed comparisons were silently inconsistent: + `<(X, 5)` with `X` bound to a string returned `()` (no result, no + error), while `X` bound to a symbol raised "Expected number, got + symbol". Both should fail loudly. Added `dl-compare-typeok?` — + `<`, `<=`, `>`, `>=` now require both operands to share a primitive + type (both numbers or both strings) and raise otherwise. `!=` is + exempted since it's a polymorphic inequality test built on + `dl-tuple-equal?`. 2 new regression tests; 267/267. + +- 2026-05-11 — Body literal shape validation in + `dl-rule-check-safety`: a dict that isn't `{:neg ...}` (e.g. typo'd + `{:negs ...}`) used to silently fall through every dispatch clause, + contributing zero bound vars; the user would then see a confusing + "head var X unbound" error pointing at the head, not the malformed + body. Same for body lits that are bare numbers / strings / symbols. + Both shapes now raise a clear error naming the offending lit. 1 new + regression test; 265/265. + +- 2026-05-11 — Division by zero in `is` silently produced IEEE + infinity instead of raising. `is(R, /(X, 0))` returned `R = inf`, + which then flowed through comparisons and aggregations to produce + nonsense results. `dl-eval-arith` now raises with a clear + "division by zero in " message. 1 new test; 264/264. + +- 2026-05-11 — Aggregate variable validation: `count(N, Y, p(X))` + silently returned `N = 1` because `Y` was never bound in `p(X)` — + every match contributed the same unbound symbol, which dl-val-member? + deduped to a single entry. Similarly `sum(S, Y, p(X))` raised a + confusing "expected number" error from the underlying `+`. Added + a third validator in `dl-eval-aggregate`: the agg-var must appear + in the goal literal. Error names the variable and the goal and + explains the consequence. 1 new test; 263/263. + +- 2026-05-11 — `dl-retract!` was silently destroying EDB facts in + "mixed" relations (those with BOTH user-asserted facts AND a rule + defining the same head). The retract pass wiped every rule-head + relation wholesale and then re-saturated — but the saturator only + re-derives the IDB portion, so explicit EDB facts vanished even + for a no-op retract of a non-existent tuple. Probe: + `(let ((db (dl-program "p(a). p(b). p(X) :- q(X). q(c)."))) + (dl-retract! db (quote (p z))) (dl-query db (quote (p X))))` + went from `{a,b,c}` to just `{c}`. + Fix: tracked `:edb-keys` provenance in the db. `dl-add-fact!` (public + API) marks the tuple as EDB; saturator calls new internal + `dl-add-derived!` which doesn't mark it. `dl-retract!` now strips + only the IDB-derived portion of rule-head relations and preserves + EDB-marked tuples through the re-saturate pass. 2 new regression + tests; 262/262. + +- 2026-05-11 — Eval-semantics bug-hunt: nested `not(not(P))` was + silently misinterpreted. Outer-level `not(...)` is parsed as + negation, but the inner `not(banned(X))` was parsed as a regular + positive literal naming a relation called `not`. Since no `not` + relation existed, the inner match was empty and the outer + negation succeeded vacuously, making `vip(X) :- u(X), not(not(banned(X))).` + equivalent to `vip(X) :- u(X).` (a silent double-negation = identity + fallacy). Fix in `dl-rule-check-safety`: both the positive-literal + branch and `dl-process-neg!` now flag any body literal whose head + is in `dl-reserved-rel-names`. Error message names the relation and + points the user at intermediate-relation stratified negation. 1 new + regression test; 260/260. + +- 2026-05-10 — Bug-hunt round on parser/safety surfaced 7 real + bugs, each fixed with regression tests: + - Reserved relation names (`not`, `count`, `<`, `is`, ...) were + accepted as rule/fact heads — would silently shadow built-ins. + - Negative number literals (`n(-1).`) failed to parse — users + had to express them as `(- 0 1)` or via `is`. + - Unterminated block comment `/* ...` silently consumed the + rest of the input. Now raises with the position. + - Same silent-consume bug in unterminated string / quoted-atom. + - Empty-list rule head and non-list rule body weren't validated; + they'd crash later in `rest`. dl-add-rule! now checks shape. + - dl-magic-query with non-list / non-dict goal crashed cryptically. + - Tokenizer silently swallowed unrecognised characters (`?`, `!`, + `#`, `@`, etc.) — typos produced confusing downstream errors. + +- 2026-05-08 — Phase 6 driver: `dl-magic-query db query-goal`. + Builds a fresh internal db from the caller's EDB + magic seed + + rewritten rules, saturates, queries, returns substitutions — + caller's db is untouched. Equivalent to `dl-query` for any + fully-stratifiable program; sole motivation is a perf alternative + on goal-shaped queries against large recursive relations. + 2 new tests cover equivalence and non-mutation. + +- 2026-05-08 — Phase 6 magic-sets rewriter. `dl-magic-rewrite rules + query-rel adn args` returns `{:rules :seed }`. + Worklist over `(rel, adn)` pairs starts from the query, gates each + original rule with a `magic_^(bound)` filter, and emits + propagation rules for each positive non-builtin body literal so + that magic spreads to body relations. EDB facts pass through. + 3 new tests cover seed structure, equivalence on chain-3 by + ancestor-relation tuple count, and same-query-answers under + the rewritten program. The plumbing for a `dl-saturate-magic!` + driver and large-graph perf benchmarks is still future work. + +- 2026-05-08 — Phase 6 building blocks for the magic-sets + transformation: `dl-magic-rel-name`, `dl-magic-lit`, + `dl-bound-args`. The rewriter that generates magic seed and + propagation rules is still future work; with these primitives + in place it's a straightforward worklist algorithm. 4 new tests. + +- 2026-05-08 — Phase 6 adornments + SIPS in + `lib/datalog/magic.sx`. Inspection helpers — `dl-adorn-goal` and + `dl-adorn-lit` compute per-arg `b`/`f` patterns under a bound + set; `dl-rule-sips rule head-adornment` walks body literals + left-to-right propagating the bound set, recognising `is` and + aggregate result-vars as new binders. Lays groundwork for a + later magic-sets transformation. 10 new tests cover pure + adornment, SIPS over a chain rule, head-fully-bound rules, + comparisons, and `is`. Saturator does not yet consume these. + +- 2026-05-08 — Comprehensive integration test in api suite: a + single program exercising recursion (`reach` transitive closure) + + stratified negation (`safe X Y :- reach X Y, not banned Y`) + + aggregation (`reach_count` via count) + comparison (`>= N 2`) + composed end-to-end via `dl-eval source query-source`. Confirms + the full pipeline (parser → safety → stratifier → semi-naive + + aggregate post-pass → query) on a non-trivial program. + +- 2026-05-08 — Bug fix: aggregates work as top-level query goals. + `dl-match-lit` (the naive matcher used by `dl-find-bindings`) was + missing the `dl-aggregate?` dispatch — it was only present in + `dl-fbs-aux` (semi-naive). Symptom: `(dl-query db '(count N X (p X)))` + silently returned `()`. Also updated `dl-query-user-vars` to project + only the result var (first arg) of an aggregate goal — the + aggregated var and inner-goal vars are existentials and should not + appear in the projected substitution. 2 new aggregate tests cover + the regression. + +- 2026-05-08 — Convenience: `dl-eval source query-source`. Parses + both strings, builds a db, saturates, runs the query, returns + the substitution list. Single-call user-friendly entry. 2 new + api tests cover ancestor and multi-goal queries. + +- 2026-05-08 — Phase 6 stub: `dl-set-strategy! db strategy` and + `dl-get-strategy db` user-facing hooks. Default `:semi-naive`; + `:magic` is accepted but the actual transformation is deferred, + so saturation still uses semi-naive. Lets us tick the + "Optional pass — guarded behind dl-set-strategy!" Phase 6 box. + 3 new eval tests. + +- 2026-05-08 — Demo: weighted-DAG shortest path. `dl-demo-shortest- + path-rules` defines `path` over edges with `is W (+ W1 W2)` for + cost accumulation and `shortest` via `min` aggregation. 3 demo + tests cover direct/multi-hop choice, multi-hop wins on cheaper + route, and unreachable-empty. Added `dl-summary db` inspection + helper returning `{: count}` (4 eval tests). + +- 2026-05-08 — Phase 5e perf: first-arg index per relation. db gains + `:facts-index {: {: tuples}}` mirroring the + existing `:facts-keys` membership index. `dl-add-fact!` populates + it; `dl-match-positive` walks the body literal's first arg under + the current subst — if it's bound to a non-var, look up by + `(str arg)` and iterate only the matching subset. chain-25 + saturation 33s → 18s real (~2x). chain-50 still slow (~120s+) + but tractable; next bottleneck is subst dict copies during + unification. Differential test bumped to chain-12, semi-only + count to chain-25. + +- 2026-05-08 — Demo: tag co-occurrence. `(cotagged P T1 T2)` — post + has both T1 and T2 with T1 != T2 — and `(tag-pair-count T1 T2 N)` + counting posts per distinct tag pair. Demonstrates count + aggregation grouped by outer-context vars. 2 new demo tests. + +- 2026-05-08 — `dl-query` accepts a list of body literals for + conjunctive queries, in addition to a single positive literal. + `dl-query-coerce` dispatches based on the first element's shape: + positive lit (head is a symbol) or `:neg` dict → wrap as singleton; + list of lits → use as-is. `dl-query-user-vars` collects the union + of vars across all goals (deduped, `_` filtered) for projection. + 2 new api tests: multi-goal AND, and conjunction with comparison. + +- 2026-05-08 — Bug fix: `dl-check-stratifiable` now rejects recursion + through aggregation (e.g., `q(N) :- count(N, X, q(X))`). The + stratifier was already adding negation-like edges for aggregates, + but the cycle scan only looked at explicit `:neg` literals. Added + the matching aggregate branch to the body iteration. Also adds + doc-only `lib/datalog/datalog.sx` with the public-API surface + (since `load` is an epoch command and can't recurse from within an + `.sx` file). 3 new aggregate tests cover recursion-rejection, + negation-and-aggregation coexistence, and min-over-empty-derived. + +- 2026-05-08 — Phase 10 demo + canonical query. Added the "cooking + posts by people I follow (transitively)" example from the plan: + `dl-demo-cooking-rules` defines `reach` over the follow graph + (recursive transitive closure) and `cooking-post-by-network` that + joins reach with `authored` and `(tagged P cooking)`. 3 demo + tests cover transitive network, direct-only follow, and + empty-network cases. + +- 2026-05-08 — Phase 8 extension: `findall L V Goal` aggregate. Bind + L to the list of distinct V values for which Goal holds (or the + empty list when no matches). Implemented as a one-line case in + `dl-do-aggregate`. 3 new tests: EDB, derived relation, empty. + Useful for "give me all the X such that …" queries without + scalar reduction. + +- 2026-05-08 — Phase 5d semantic fix: anonymous `_` variables are + renamed per occurrence at `dl-add-rule!` and `dl-query` time so + `(p X _) (p _ Y)` no longer unifies the two `_`s. New helpers + `dl-rename-anon-term`, `dl-rename-anon-lit`, `dl-make-anon-renamer`, + `dl-rename-anon-rule` in db.sx; eval.sx's dl-query renames the goal + before search and projects only user-named vars (`_` is filtered + out of the projection list). The "underscore in head" test now + correctly rejects `(p X _) :- q(X).` — after renaming, the head's + fresh anon var has no body binder. Two new eval tests verify + rule-level and goal-level independence. 155/155 expected. + +- 2026-05-08 — Phase 5c perf: indexed `dl-find-bindings`. Replaced + the recursive `(rest lits)` walk with `dl-fb-aux lits db subst i n` + using `nth lits i`. Eliminates O(N²) list-copy per body of length + N. chain-15 saturation 25s → 16s; chain-25 finishes in 33s real + (vs. timeout previously). Bumped semi_naive tests: differential + on chain-10, semi-only count on chain-15 (was chain-5/chain-5). + 153/153. + +- 2026-05-08 — Phase 10 syntactic demo. New `lib/datalog/demo.sx` + with three programs over rose-ash-shaped data: federation + (`mutual`, `reachable`, `foaf`), content recommendation + (`post-likes` via count aggregation, `popular`, `interesting`), + and role-based permissions (`in-group` over transitive subgroups, + `can-access`). 10 demo tests pass against synthetic EDB tuples. + Postgres loader and `/internal/datalog` HTTP endpoint remain + out of scope for this loop (they need service-tree edits beyond + `lib/datalog/**`). Conformance now 153/153. + +- 2026-05-08 — Phase 5b perf: hash-set membership in `dl-add-fact!`. + db gains a parallel `:facts-keys {: {: true}}` + index alongside `:facts`. `dl-tuple-key` derives a stable string + key via `(str lit)` — `(p 30)` and `(p 30.0)` collide correctly + because SX prints them identically. Insertion is O(1) instead of + O(n). chain-7 saturation drops from ~12s to ~6s; chain-15 from + ~50s to ~25s under shared CPU. Larger chains are still slow due + to body-join overhead in dl-find-bindings (Blocker updated). + `dl-retract!` updated to keep both indices consistent. 143/143. + +- 2026-05-08 — Phase 9 done. New `lib/datalog/api.sx` exposes a + parser-free embedding: `dl-program-data facts rules` accepts SX + data lists, with rules in either dict form or list form using + `<-` as the rule arrow (since SX parses `:-` as a keyword). + `dl-rule head body` constructs the dict. `dl-assert! db lit` adds + a fact and re-saturates; `dl-retract! db lit` drops the fact from + EDB, wipes all rule-headed (IDB) relations, and re-saturates from + scratch — the simplest correct semantics until provenance tracking + arrives in a later phase. 9 API tests; conformance now 143/143. + +- 2026-05-08 — Phase 8 done. New `lib/datalog/aggregates.sx` (~110 + LOC): count / sum / min / max. Each is a body literal of shape + `(op R V Goal)` — `dl-eval-aggregate` runs `dl-find-bindings` on + the goal under the outer subst (so outer vars in the goal get + substituted, giving group-by-style aggregation), collects the + distinct values of `V`, and binds `R`. Empty input: count/sum + return 0; min/max produce no binding (rule fails). Stratifier + extended via `dl-aggregate-dep-edge` so the aggregate's goal + relation is fully derived before the aggregate fires. Safety check + treats goal-internal vars as existentials (no outer binding + required); only the result var becomes bound. Conformance now + 134 / 134. + +- 2026-05-08 — Phase 7 done (Phase 6 magic sets deferred — opt-in, + semi-naive default suffices for current test suite). New + `lib/datalog/strata.sx` (~290 LOC): dep graph build, Floyd-Warshall + reachability, SCC-via-mutual-reachability for non-stratifiability + detection, iterative stratum computation, rule grouping by head + stratum. eval.sx split: `dl-saturate-rules!` is the per-rule-set + semi-naive worker, `dl-saturate!` is now the stratified driver + (errors out on non-stratifiable programs). `dl-match-negation` in + eval.sx: succeeds iff inner positive match is empty. Stratum-keyed + dicts use `(str s)` since SX dicts only accept string/keyword keys. + 10 negation tests cover EDB/IDB negation, multi-level strata, + non-stratifiability rejection, and a negation safety violation. + +- 2026-05-08 — Phase 5 done. `lib/datalog/eval.sx` rewritten to + semi-naive default. `dl-saturate!` tracks a per-relation delta and + on each iteration walks every positive body position substituting + delta for that one literal — joining the rest against the full DB + snapshot. `dl-saturate-naive!` retained as the reference. Rules + with no positive body literal (e.g. `(p X) :- (= X 5).`) fall back + to a naive one-shot via `dl-collect-rule-candidates`. 8 tests + differentially compare the two saturators using per-relation tuple + counts (cheap). Chain-5 differential exercises multi-iteration + recursive saturation. Larger chains made conformance.sh time out + due to O(n) `dl-tuple-member?` × CPU sharing with other loop + agents — added a Blocker to swap to a hash-set for membership. + Also tightened `dl-tuple-member?` to use indexed iteration instead + of recursive `rest` (was creating a fresh list per step). + +- 2026-05-07 — Phase 4 done. `lib/datalog/builtins.sx` (~280 LOC) adds + `(< X Y)`, `(<= X Y)`, `(> X Y)`, `(>= X Y)`, `(= X Y)`, `(!= X Y)`, + and `(is X expr)` with `+ - * /`. `dl-eval-builtin` dispatches; + `dl-eval-arith` recursively evaluates nested compounds. Safety + check is now order-aware — it walks body literals left-to-right + tracking the bound set, requires comparison/`is` inputs to be + already bound, and special-cases `=` (binds the var-side; both + sides must include at least one bound to bind the other). Phase 3's + simple safety check stays in db.sx as a forward-reference fallback; + builtins.sx redefines `dl-rule-check-safety` to the comprehensive + version. eval.sx's `dl-match-lit` now dispatches built-ins through + `dl-eval-builtin`. 19 builtins tests; conformance 106 / 106. + +- 2026-05-07 — Phase 3 done. `lib/datalog/db.sx` (~250 LOC) holds facts + indexed by relation name plus the rules list, with `dl-add-fact!` / + `dl-add-rule!` (rejects non-ground facts and unsafe rules); + `lib/datalog/eval.sx` (~150 LOC) implements the naive bottom-up + fixpoint via `dl-find-bindings`/`dl-match-positive`/`dl-saturate!` + and `dl-query` (deduped projected substitutions). Safety analysis + rejects unsafe head vars at load time. Negation and arithmetic + built-ins raise clean errors (lifted in later phases). 15 eval + tests cover transitive closure, sibling, same-generation, cyclic + graph reach, and six safety violations. Conformance 87 / 87. + +- 2026-05-07 — Phase 2 done. `lib/datalog/unify.sx` (~140 LOC): + `dl-var?` (case + underscore), `dl-walk`, `dl-bind`, `dl-unify` (returns + extended dict subst or `nil`), `dl-apply-subst`, `dl-ground?`, `dl-vars-of`. + Substitutions are immutable dicts; `assoc` builds extended copies. 28 + unify tests; conformance now 72 / 72. + +- 2026-05-07 — Phase 1 done. `lib/datalog/tokenizer.sx` (~190 LOC) emits + `{:type :value :pos}` tokens; `lib/datalog/parser.sx` (~150 LOC) produces + `{:head … :body …}` / `{:query …}` clauses, with nested compounds + permitted for arithmetic and `not(...)` desugared to `{:neg …}`. 44 / 44 + via `bash lib/datalog/conformance.sh` (26 tokenize + 18 parse). Local + helpers namespace-prefixed (`dl-emit!`, `dl-peek`) after a host-primitive + shadow clash. Test harness uses a custom `dl-deep-equal?` that handles + out-of-order dict keys and number repr (`equal?` fails on dict key order + and on `30` vs `30.0`).