diff --git a/lib/datalog/api.sx b/lib/datalog/api.sx new file mode 100644 index 00000000..37ae9d6c --- /dev/null +++ b/lib/datalog/api.sx @@ -0,0 +1,147 @@ +;; lib/datalog/api.sx — SX-data embedding API. +;; +;; Where Phase 1's `dl-program` takes a Datalog source string, +;; this module exposes a parser-free API that consumes SX data +;; directly. Two rule shapes are accepted: +;; +;; - dict: {:head :body ( ...)} +;; - list: ( <- ...) +;; — `<-` is an SX symbol used as the rule arrow. +;; +;; Examples: +;; +;; (dl-program-data +;; '((parent tom bob) (parent tom liz) (parent bob ann)) +;; '((ancestor X Y <- (parent X Y)) +;; (ancestor X Z <- (parent X Y) (ancestor Y Z)))) +;; +;; (dl-query db '(ancestor tom X)) ; same query API as before +;; +;; Variables follow the parser convention: SX symbols whose first +;; character is uppercase or `_` are variables. + +(define + dl-rule + (fn (head body) {:head head :body body})) + +(define + dl-rule-arrow? + (fn + (x) + (and (symbol? x) (= (symbol->string x) "<-")))) + +(define + dl-find-arrow + (fn + (rl i n) + (cond + ((>= i n) nil) + ((dl-rule-arrow? (nth rl i)) i) + (else (dl-find-arrow rl (+ i 1) n))))) + +;; Given a list of the form (head-elt ... <- body-lit ...) returns +;; {:head (head-elt ...) :body (body-lit ...)}. If no arrow is +;; present, the whole list is treated as the head and the body is +;; empty (i.e. a fact written rule-style). +(define + dl-rule-from-list + (fn + (rl) + (let ((n (len rl))) + (let ((idx (dl-find-arrow rl 0 n))) + (cond + ((nil? idx) {:head rl :body (list)}) + (else + (let + ((head (slice rl 0 idx)) + (body (slice rl (+ idx 1) n))) + {:head head :body body}))))))) + +;; Coerce a rule given as either a dict or a list-with-arrow to a dict. +(define + dl-coerce-rule + (fn + (r) + (cond + ((dict? r) r) + ((list? r) (dl-rule-from-list r)) + (else (error (str "dl-coerce-rule: expected dict or list, got " r)))))) + +;; Build a db from SX data lists. +(define + dl-program-data + (fn + (facts rules) + (let ((db (dl-make-db))) + (do + (for-each (fn (lit) (dl-add-fact! db lit)) facts) + (for-each + (fn (r) (dl-add-rule! db (dl-coerce-rule r))) + rules) + db)))) + +;; Add a single fact at runtime, then re-saturate the db so derived +;; tuples reflect the change. Returns the db. +(define + dl-assert! + (fn + (db lit) + (do + (dl-add-fact! db lit) + (dl-saturate! db) + db))) + +;; Remove a fact: drop matching tuples from EDB AND wipe all derived +;; tuples (any IDB tuple may have transitively depended on the removed +;; fact). Then re-saturate to repopulate IDB. EDB facts that were +;; asserted via dl-add-fact! are preserved unless they match `lit`. +;; +;; To distinguish EDB from IDB, we treat any fact for a relation that +;; has rules as IDB; otherwise EDB. (Phase 9 simplification — Phase 10 +;; may track provenance.) +(define + dl-retract! + (fn + (db lit) + (let + ((rel-key (dl-rel-name lit))) + (do + ;; Drop the matching tuple from its relation list (if EDB-only). + (when + (has-key? (get db :facts) rel-key) + (let + ((existing (get (get db :facts) rel-key)) + (kept (list))) + (do + (for-each + (fn + (t) + (when + (not (dl-tuple-equal? t lit)) + (append! kept t))) + existing) + (dict-set! (get db :facts) rel-key kept)))) + ;; Wipe all relations that have a rule (these are IDB) so the + ;; saturator regenerates them from the surviving EDB. + (let ((rule-heads (dl-rule-head-rels db))) + (for-each + (fn (k) (dict-set! (get db :facts) k (list))) + rule-heads)) + (dl-saturate! db) + db)))) + +(define + dl-rule-head-rels + (fn + (db) + (let ((seen (list))) + (do + (for-each + (fn + (rule) + (let ((h (dl-rel-name (get rule :head)))) + (when + (and (not (nil? h)) (not (dl-member-string? h seen))) + (append! seen h)))) + (dl-rules db)) + seen)))) diff --git a/lib/datalog/conformance.conf b/lib/datalog/conformance.conf index 15dea37e..e80e8b7a 100644 --- a/lib/datalog/conformance.conf +++ b/lib/datalog/conformance.conf @@ -12,6 +12,7 @@ PRELOADS=( lib/datalog/aggregates.sx lib/datalog/strata.sx lib/datalog/eval.sx + lib/datalog/api.sx ) SUITES=( @@ -23,4 +24,5 @@ SUITES=( "semi_naive:lib/datalog/tests/semi_naive.sx:(dl-semi-naive-tests-run!)" "negation:lib/datalog/tests/negation.sx:(dl-negation-tests-run!)" "aggregates:lib/datalog/tests/aggregates.sx:(dl-aggregates-tests-run!)" + "api:lib/datalog/tests/api.sx:(dl-api-tests-run!)" ) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json index 54b1560e..1bd89123 100644 --- a/lib/datalog/scoreboard.json +++ b/lib/datalog/scoreboard.json @@ -1,8 +1,8 @@ { "lang": "datalog", - "total_passed": 134, + "total_passed": 143, "total_failed": 0, - "total": 134, + "total": 143, "suites": [ {"name":"tokenize","passed":26,"failed":0,"total":26}, {"name":"parse","passed":18,"failed":0,"total":18}, @@ -11,7 +11,8 @@ {"name":"builtins","passed":19,"failed":0,"total":19}, {"name":"semi_naive","passed":8,"failed":0,"total":8}, {"name":"negation","passed":10,"failed":0,"total":10}, - {"name":"aggregates","passed":10,"failed":0,"total":10} + {"name":"aggregates","passed":10,"failed":0,"total":10}, + {"name":"api","passed":9,"failed":0,"total":9} ], - "generated": "2026-05-08T08:28:29+00:00" + "generated": "2026-05-08T08:33:54+00:00" } diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md index 22cd5104..66ade082 100644 --- a/lib/datalog/scoreboard.md +++ b/lib/datalog/scoreboard.md @@ -1,6 +1,6 @@ # datalog scoreboard -**134 / 134 passing** (0 failure(s)). +**143 / 143 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| @@ -12,3 +12,4 @@ | semi_naive | 8 | 8 | ok | | negation | 10 | 10 | ok | | aggregates | 10 | 10 | ok | +| api | 9 | 9 | ok | diff --git a/lib/datalog/tests/api.sx b/lib/datalog/tests/api.sx new file mode 100644 index 00000000..aa127d38 --- /dev/null +++ b/lib/datalog/tests/api.sx @@ -0,0 +1,209 @@ +;; lib/datalog/tests/api.sx — SX-data embedding API. + +(define dl-api-pass 0) +(define dl-api-fail 0) +(define dl-api-failures (list)) + +(define + dl-api-deep=? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-api-deq-l? a b 0))) + ((and (dict? a) (dict? b)) + (let ((ka (keys a)) (kb (keys b))) + (and (= (len ka) (len kb)) (dl-api-deq-d? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-api-deq-l? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-api-deep=? (nth a i) (nth b i))) false) + (else (dl-api-deq-l? a b (+ i 1)))))) + +(define + dl-api-deq-d? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) + (not (dl-api-deep=? (get a k) (get b k)))) + false) + (else (dl-api-deq-d? a b ka (+ i 1)))))) + +(define + dl-api-set=? + (fn + (a b) + (and + (= (len a) (len b)) + (dl-api-subset? a b) + (dl-api-subset? b a)))) + +(define + dl-api-subset? + (fn + (xs ys) + (cond + ((= (len xs) 0) true) + ((not (dl-api-contains? ys (first xs))) false) + (else (dl-api-subset? (rest xs) ys))))) + +(define + dl-api-contains? + (fn + (xs target) + (cond + ((= (len xs) 0) false) + ((dl-api-deep=? (first xs) target) true) + (else (dl-api-contains? (rest xs) target))))) + +(define + dl-api-test! + (fn + (name got expected) + (if + (dl-api-deep=? got expected) + (set! dl-api-pass (+ dl-api-pass 1)) + (do + (set! dl-api-fail (+ dl-api-fail 1)) + (append! + dl-api-failures + (str + name + "\n expected: " expected + "\n got: " got)))))) + +(define + dl-api-test-set! + (fn + (name got expected) + (if + (dl-api-set=? got expected) + (set! dl-api-pass (+ dl-api-pass 1)) + (do + (set! dl-api-fail (+ dl-api-fail 1)) + (append! + dl-api-failures + (str + name + "\n expected (set): " expected + "\n got: " got)))))) + +(define + dl-api-run-all! + (fn + () + (do + ;; dl-program-data with arrow form. + (dl-api-test-set! "data API ancestor closure" + (dl-query + (dl-program-data + (quote ((parent tom bob) (parent bob ann) (parent ann pat))) + (quote + ((ancestor X Y <- (parent X Y)) + (ancestor X Z <- (parent X Y) (ancestor Y Z))))) + (quote (ancestor tom X))) + (list {:X (quote bob)} {:X (quote ann)} {:X (quote pat)})) + + ;; dl-program-data with dict rules. + (dl-api-test-set! "data API with dict rules" + (dl-query + (dl-program-data + (quote ((p a) (p b) (p c))) + (list + {:head (quote (q X)) :body (quote ((p X)))})) + (quote (q X))) + (list {:X (quote a)} {:X (quote b)} {:X (quote c)})) + + ;; dl-rule helper. + (dl-api-test-set! "dl-rule constructor" + (dl-query + (dl-program-data + (quote ((p 1) (p 2))) + (list (dl-rule (quote (q X)) (quote ((p X)))))) + (quote (q X))) + (list {:X 1} {:X 2})) + + ;; dl-assert! adds and re-derives. + (dl-api-test-set! "dl-assert! incremental" + (let + ((db (dl-program-data + (quote ((parent tom bob) (parent bob ann))) + (quote + ((ancestor X Y <- (parent X Y)) + (ancestor X Z <- (parent X Y) (ancestor Y Z))))))) + (do + (dl-saturate! db) + (dl-assert! db (quote (parent ann pat))) + (dl-query db (quote (ancestor tom X))))) + (list {:X (quote bob)} {:X (quote ann)} {:X (quote pat)})) + + ;; dl-retract! removes a fact and recomputes IDB. + (dl-api-test-set! "dl-retract! removes derived" + (let + ((db (dl-program-data + (quote ((parent tom bob) (parent bob ann) (parent ann pat))) + (quote + ((ancestor X Y <- (parent X Y)) + (ancestor X Z <- (parent X Y) (ancestor Y Z))))))) + (do + (dl-saturate! db) + (dl-retract! db (quote (parent bob ann))) + (dl-query db (quote (ancestor tom X))))) + (list {:X (quote bob)})) + + ;; dl-program-data + dl-query with constants in head. + (dl-api-test-set! "constant-in-head data" + (dl-query + (dl-program-data + (quote ((edge a b) (edge b c) (edge c a))) + (quote + ((reach X Y <- (edge X Y)) + (reach X Z <- (edge X Y) (reach Y Z))))) + (quote (reach a X))) + (list {:X (quote a)} {:X (quote b)} {:X (quote c)})) + + ;; Assert into empty db. + (dl-api-test-set! "assert into empty" + (let + ((db (dl-program-data (list) (list)))) + (do + (dl-assert! db (quote (p 1))) + (dl-assert! db (quote (p 2))) + (dl-query db (quote (p X))))) + (list {:X 1} {:X 2})) + + ;; dl-rule-from-list with no arrow → fact-style. + (dl-api-test-set! "no arrow → fact-like rule" + (let + ((rule (dl-rule-from-list (quote (foo X Y))))) + (list rule)) + (list {:head (quote (foo X Y)) :body (list)})) + + ;; dl-coerce-rule on dict passes through. + (dl-api-test-set! "coerce dict rule" + (let + ((d {:head (quote (h X)) :body (quote ((b X)))})) + (list (dl-coerce-rule d))) + (list {:head (quote (h X)) :body (quote ((b X)))}))))) + +(define + dl-api-tests-run! + (fn + () + (do + (set! dl-api-pass 0) + (set! dl-api-fail 0) + (set! dl-api-failures (list)) + (dl-api-run-all!) + {:passed dl-api-pass + :failed dl-api-fail + :total (+ dl-api-pass dl-api-fail) + :failures dl-api-failures}))) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 291cd16d..21c8123f 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -208,18 +208,30 @@ large graphs. group-by, distinct-counted-once. ### Phase 9 — SX embedding API -- [ ] `(dl-program facts rules)` → database from SX data directly (no parsing required) +- [x] `(dl-program-data facts rules)` builds a db from SX data — + `facts` is a list of literals, `rules` is a list of either + dicts `{:head … :body …}` or lists `( <- )`. + Variables are SX symbols whose first char is uppercase or `_`, + matching the parser's convention. ``` - (dl-program - '((parent tom bob) (parent tom liz) (parent bob ann)) - '((ancestor X Z :- (parent X Y) (ancestor Y Z)) - (ancestor X Y :- (parent X Y)))) + (dl-program-data + '((parent tom bob) (parent bob ann)) + '((ancestor X Y <- (parent X Y)) + (ancestor X Z <- (parent X Y) (ancestor Y Z)))) ``` -- [ ] `(dl-query db '(ancestor tom ?X))` → `((ann) (bob) (liz) (pat))` -- [ ] `(dl-assert! db '(parent ann pat))` → incremental fact addition + re-derive -- [ ] `(dl-retract! db '(parent tom bob))` → fact removal + re-derive from scratch +- [x] `(dl-rule head body)` constructor for the dict form. +- [x] `(dl-query db '(ancestor tom X))` already worked — same query API + consumes the SX-data goal. +- [x] `(dl-assert! db '(parent ann pat))` → adds the fact and re-saturates. +- [x] `(dl-retract! db '(parent bob ann))` → drops matching tuples from + the EDB list, wipes every relation that has a rule (those are IDB), + and re-saturates from the surviving EDB. +- [x] Tests in `lib/datalog/tests/api.sx` (9): closure via data API, + dict-rule form, dl-rule constructor, dl-assert! incremental, + dl-retract! removes derived, cyclic-graph reach via data, + assert into empty db, fact-style rule (no arrow), coerce dict. - [ ] Integration demo: federation graph query — `(ancestor actor1 actor2)` over - rose-ash ActivityPub follow relationships + rose-ash ActivityPub follow relationships (Phase 10). ### Phase 10 — Datalog as a query language for rose-ash - [ ] Schema: map SQLAlchemy model relationships to Datalog EDB facts @@ -245,6 +257,16 @@ large graphs. _Newest first._ +- 2026-05-08 — Phase 9 done. New `lib/datalog/api.sx` exposes a + parser-free embedding: `dl-program-data facts rules` accepts SX + data lists, with rules in either dict form or list form using + `<-` as the rule arrow (since SX parses `:-` as a keyword). + `dl-rule head body` constructs the dict. `dl-assert! db lit` adds + a fact and re-saturates; `dl-retract! db lit` drops the fact from + EDB, wipes all rule-headed (IDB) relations, and re-saturates from + scratch — the simplest correct semantics until provenance tracking + arrives in a later phase. 9 API tests; conformance now 143/143. + - 2026-05-08 — Phase 8 done. New `lib/datalog/aggregates.sx` (~110 LOC): count / sum / min / max. Each is a body literal of shape `(op R V Goal)` — `dl-eval-aggregate` runs `dl-find-bindings` on