diff --git a/lib/datalog/conformance.conf b/lib/datalog/conformance.conf new file mode 100644 index 00000000..a324032b --- /dev/null +++ b/lib/datalog/conformance.conf @@ -0,0 +1,14 @@ +# Datalog conformance config — sourced by lib/guest/conformance.sh. + +LANG_NAME=datalog +MODE=dict + +PRELOADS=( + lib/datalog/tokenizer.sx + lib/datalog/parser.sx +) + +SUITES=( + "tokenize:lib/datalog/tests/tokenize.sx:(dl-tokenize-tests-run!)" + "parse:lib/datalog/tests/parse.sx:(dl-parse-tests-run!)" +) diff --git a/lib/datalog/conformance.sh b/lib/datalog/conformance.sh new file mode 100755 index 00000000..0b4a0e13 --- /dev/null +++ b/lib/datalog/conformance.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash +# Thin wrapper — see lib/guest/conformance.sh and lib/datalog/conformance.conf. +exec bash "$(dirname "$0")/../guest/conformance.sh" "$(dirname "$0")/conformance.conf" "$@" diff --git a/lib/datalog/parser.sx b/lib/datalog/parser.sx new file mode 100644 index 00000000..66ff43dd --- /dev/null +++ b/lib/datalog/parser.sx @@ -0,0 +1,242 @@ +;; lib/datalog/parser.sx — Datalog tokens → AST +;; +;; Output shapes: +;; Literal (positive) := (relname arg ... arg) — SX list +;; Literal (negative) := {:neg (relname arg ... arg)} — dict +;; Argument := var-symbol | atom-symbol | number | string +;; | (op-name arg ... arg) — arithmetic compound +;; Fact := {:head literal :body ()} +;; Rule := {:head literal :body (lit ... lit)} +;; Query := {:query (lit ... lit)} +;; Program := list of facts / rules / queries +;; +;; Variables and constants are both SX symbols; the evaluator dispatches +;; on first-char case ('A'..'Z' or '_' = variable, otherwise constant). +;; +;; The parser permits nested compounds in arg position to support +;; arithmetic (e.g. (is Z (+ X Y))). Safety analysis at rule-load time +;; rejects compounds whose head is not an arithmetic operator. + +(define + dl-pp-peek + (fn + (st) + (let + ((i (get st :idx)) (tokens (get st :tokens))) + (if (< i (len tokens)) (nth tokens i) {:type "eof" :value nil :pos 0})))) + +(define + dl-pp-peek2 + (fn + (st) + (let + ((i (+ (get st :idx) 1)) (tokens (get st :tokens))) + (if (< i (len tokens)) (nth tokens i) {:type "eof" :value nil :pos 0})))) + +(define + dl-pp-advance! + (fn (st) (dict-set! st :idx (+ (get st :idx) 1)))) + +(define + dl-pp-at? + (fn + (st type value) + (let + ((t (dl-pp-peek st))) + (and + (= (get t :type) type) + (or (= value nil) (= (get t :value) value)))))) + +(define + dl-pp-error + (fn + (st msg) + (let + ((t (dl-pp-peek st))) + (error + (str + "Parse error at pos " + (get t :pos) + ": " + msg + " (got " + (get t :type) + " '" + (if (= (get t :value) nil) "" (get t :value)) + "')"))))) + +(define + dl-pp-expect! + (fn + (st type value) + (let + ((t (dl-pp-peek st))) + (if + (dl-pp-at? st type value) + (do (dl-pp-advance! st) t) + (dl-pp-error + st + (str "expected " type (if (= value nil) "" (str " '" value "'")))))))) + +;; Argument: variable, atom, number, string, or compound (relname/op + parens). +(define + dl-pp-parse-arg + (fn + (st) + (let + ((t (dl-pp-peek st))) + (let + ((ty (get t :type)) (vv (get t :value))) + (cond + ((= ty "number") (do (dl-pp-advance! st) vv)) + ((= ty "string") (do (dl-pp-advance! st) vv)) + ((= ty "var") (do (dl-pp-advance! st) (string->symbol vv))) + ((or (= ty "atom") (= ty "op")) + (do + (dl-pp-advance! st) + (if + (dl-pp-at? st "punct" "(") + (do + (dl-pp-advance! st) + (let + ((args (dl-pp-parse-arg-list st))) + (do + (dl-pp-expect! st "punct" ")") + (cons (string->symbol vv) args)))) + (string->symbol vv)))) + (else (dl-pp-error st "expected term"))))))) + +;; Comma-separated args inside parens. +(define + dl-pp-parse-arg-list + (fn + (st) + (let + ((args (list))) + (do + (append! args (dl-pp-parse-arg st)) + (define + dl-pp-arg-loop + (fn + () + (when + (dl-pp-at? st "punct" ",") + (do + (dl-pp-advance! st) + (append! args (dl-pp-parse-arg st)) + (dl-pp-arg-loop))))) + (dl-pp-arg-loop) + args)))) + +;; A positive literal: relname (atom or op) followed by optional (args). +(define + dl-pp-parse-positive + (fn + (st) + (let + ((t (dl-pp-peek st))) + (let + ((ty (get t :type)) (vv (get t :value))) + (if + (or (= ty "atom") (= ty "op")) + (do + (dl-pp-advance! st) + (if + (dl-pp-at? st "punct" "(") + (do + (dl-pp-advance! st) + (let + ((args (dl-pp-parse-arg-list st))) + (do + (dl-pp-expect! st "punct" ")") + (cons (string->symbol vv) args)))) + (list (string->symbol vv)))) + (dl-pp-error st "expected literal head")))))) + +;; A body literal: positive, or not(positive). +(define + dl-pp-parse-body-lit + (fn + (st) + (let + ((t1 (dl-pp-peek st)) (t2 (dl-pp-peek2 st))) + (if + (and + (= (get t1 :type) "atom") + (= (get t1 :value) "not") + (= (get t2 :type) "punct") + (= (get t2 :value) "(")) + (do + (dl-pp-advance! st) + (dl-pp-advance! st) + (let + ((inner (dl-pp-parse-positive st))) + (do (dl-pp-expect! st "punct" ")") {:neg inner}))) + (dl-pp-parse-positive st))))) + +;; Comma-separated body literals. +(define + dl-pp-parse-body + (fn + (st) + (let + ((lits (list))) + (do + (append! lits (dl-pp-parse-body-lit st)) + (define + dl-pp-body-loop + (fn + () + (when + (dl-pp-at? st "punct" ",") + (do + (dl-pp-advance! st) + (append! lits (dl-pp-parse-body-lit st)) + (dl-pp-body-loop))))) + (dl-pp-body-loop) + lits)))) + +;; Single clause: fact, rule, or query. Consumes trailing dot. +(define + dl-pp-parse-clause + (fn + (st) + (cond + ((dl-pp-at? st "op" "?-") + (do + (dl-pp-advance! st) + (let + ((body (dl-pp-parse-body st))) + (do (dl-pp-expect! st "punct" ".") {:query body})))) + (else + (let + ((head (dl-pp-parse-positive st))) + (cond + ((dl-pp-at? st "op" ":-") + (do + (dl-pp-advance! st) + (let + ((body (dl-pp-parse-body st))) + (do (dl-pp-expect! st "punct" ".") {:body body :head head})))) + (else (do (dl-pp-expect! st "punct" ".") {:body (list) :head head})))))))) + +(define + dl-parse-program + (fn + (tokens) + (let + ((st {:tokens tokens :idx 0}) (clauses (list))) + (do + (define + dl-pp-prog-loop + (fn + () + (when + (not (dl-pp-at? st "eof" nil)) + (do + (append! clauses (dl-pp-parse-clause st)) + (dl-pp-prog-loop))))) + (dl-pp-prog-loop) + clauses)))) + +(define dl-parse (fn (src) (dl-parse-program (dl-tokenize src)))) diff --git a/lib/datalog/scoreboard.json b/lib/datalog/scoreboard.json new file mode 100644 index 00000000..b6b5c6fa --- /dev/null +++ b/lib/datalog/scoreboard.json @@ -0,0 +1,11 @@ +{ + "lang": "datalog", + "total_passed": 44, + "total_failed": 0, + "total": 44, + "suites": [ + {"name":"tokenize","passed":26,"failed":0,"total":26}, + {"name":"parse","passed":18,"failed":0,"total":18} + ], + "generated": "2026-05-07T23:30:07+00:00" +} diff --git a/lib/datalog/scoreboard.md b/lib/datalog/scoreboard.md new file mode 100644 index 00000000..9cd60338 --- /dev/null +++ b/lib/datalog/scoreboard.md @@ -0,0 +1,8 @@ +# datalog scoreboard + +**44 / 44 passing** (0 failure(s)). + +| Suite | Passed | Total | Status | +|-------|--------|-------|--------| +| tokenize | 26 | 26 | ok | +| parse | 18 | 18 | ok | diff --git a/lib/datalog/tests/parse.sx b/lib/datalog/tests/parse.sx new file mode 100644 index 00000000..2af6e6ab --- /dev/null +++ b/lib/datalog/tests/parse.sx @@ -0,0 +1,147 @@ +;; lib/datalog/tests/parse.sx — parser unit tests +;; +;; Run via: bash lib/datalog/conformance.sh +;; Or: (load "lib/datalog/tokenizer.sx") (load "lib/datalog/parser.sx") +;; (load "lib/datalog/tests/parse.sx") (dl-parse-tests-run!) + +(define dl-pt-pass 0) +(define dl-pt-fail 0) +(define dl-pt-failures (list)) + +;; Order-independent structural equality. Lists compared positionally, +;; dicts as sets of (key, value) pairs. Numbers via = (so 30.0 = 30). +(define + dl-deep-equal? + (fn + (a b) + (cond + ((and (list? a) (list? b)) + (and (= (len a) (len b)) (dl-deep-equal-list? a b 0))) + ((and (dict? a) (dict? b)) + (let + ((ka (keys a)) (kb (keys b))) + (and + (= (len ka) (len kb)) + (dl-deep-equal-dict? a b ka 0)))) + ((and (number? a) (number? b)) (= a b)) + (else (equal? a b))))) + +(define + dl-deep-equal-list? + (fn + (a b i) + (cond + ((>= i (len a)) true) + ((not (dl-deep-equal? (nth a i) (nth b i))) false) + (else (dl-deep-equal-list? a b (+ i 1)))))) + +(define + dl-deep-equal-dict? + (fn + (a b ka i) + (cond + ((>= i (len ka)) true) + ((let ((k (nth ka i))) (not (dl-deep-equal? (get a k) (get b k)))) + false) + (else (dl-deep-equal-dict? a b ka (+ i 1)))))) + +(define + dl-pt-test! + (fn + (name got expected) + (if + (dl-deep-equal? got expected) + (set! dl-pt-pass (+ dl-pt-pass 1)) + (do + (set! dl-pt-fail (+ dl-pt-fail 1)) + (append! + dl-pt-failures + (str name "\n expected: " expected "\n got: " got)))))) + +(define + dl-pt-throws? + (fn + (thunk) + (let + ((threw false)) + (do (guard (e (#t (set! threw true))) (thunk)) threw)))) + +(define + dl-pt-run-all! + (fn + () + (do + (dl-pt-test! "empty program" (dl-parse "") (list)) + (dl-pt-test! "fact" (dl-parse "parent(tom, bob).") (list {:body (list) :head (list (quote parent) (quote tom) (quote bob))})) + (dl-pt-test! + "two facts" + (dl-parse "parent(tom, bob). parent(bob, ann).") + (list {:body (list) :head (list (quote parent) (quote tom) (quote bob))} {:body (list) :head (list (quote parent) (quote bob) (quote ann))})) + (dl-pt-test! "zero-ary fact" (dl-parse "ready.") (list {:body (list) :head (list (quote ready))})) + (dl-pt-test! + "rule one body lit" + (dl-parse "ancestor(X, Y) :- parent(X, Y).") + (list {:body (list (list (quote parent) (quote X) (quote Y))) :head (list (quote ancestor) (quote X) (quote Y))})) + (dl-pt-test! + "recursive rule" + (dl-parse "ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).") + (list {:body (list (list (quote parent) (quote X) (quote Y)) (list (quote ancestor) (quote Y) (quote Z))) :head (list (quote ancestor) (quote X) (quote Z))})) + (dl-pt-test! + "query single" + (dl-parse "?- ancestor(tom, X).") + (list {:query (list (list (quote ancestor) (quote tom) (quote X)))})) + (dl-pt-test! + "query multi" + (dl-parse "?- p(X), q(X).") + (list {:query (list (list (quote p) (quote X)) (list (quote q) (quote X)))})) + (dl-pt-test! + "negation" + (dl-parse "safe(X) :- person(X), not(parent(X, _)).") + (list {:body (list (list (quote person) (quote X)) {:neg (list (quote parent) (quote X) (quote _))}) :head (list (quote safe) (quote X))})) + (dl-pt-test! + "number arg" + (dl-parse "age(alice, 30).") + (list {:body (list) :head (list (quote age) (quote alice) 30)})) + (dl-pt-test! + "string arg" + (dl-parse "label(x, \"hi\").") + (list {:body (list) :head (list (quote label) (quote x) "hi")})) + (dl-pt-test! + "comparison literal" + (dl-parse "p(X) :- <(X, 5).") + (list {:body (list (list (string->symbol "<") (quote X) 5)) :head (list (quote p) (quote X))})) + (dl-pt-test! + "is with arith" + (dl-parse "succ(X, Y) :- nat(X), is(Y, +(X, 1)).") + (list {:body (list (list (quote nat) (quote X)) (list (quote is) (quote Y) (list (string->symbol "+") (quote X) 1))) :head (list (quote succ) (quote X) (quote Y))})) + (dl-pt-test! + "mixed program" + (dl-parse "p(a). p(b). q(X) :- p(X). ?- q(Y).") + (list {:body (list) :head (list (quote p) (quote a))} {:body (list) :head (list (quote p) (quote b))} {:body (list (list (quote p) (quote X))) :head (list (quote q) (quote X))} {:query (list (list (quote q) (quote Y)))})) + (dl-pt-test! + "comments skipped" + (dl-parse "% comment\nfoo(a).\n/* block */ bar(b).") + (list {:body (list) :head (list (quote foo) (quote a))} {:body (list) :head (list (quote bar) (quote b))})) + (dl-pt-test! + "underscore var" + (dl-parse "p(X) :- q(X, _).") + (list {:body (list (list (quote q) (quote X) (quote _))) :head (list (quote p) (quote X))})) + (dl-pt-test! + "missing dot raises" + (dl-pt-throws? (fn () (dl-parse "p(a)"))) + true) + (dl-pt-test! + "trailing comma raises" + (dl-pt-throws? (fn () (dl-parse "p(a,)."))) + true)))) + +(define + dl-parse-tests-run! + (fn + () + (do + (set! dl-pt-pass 0) + (set! dl-pt-fail 0) + (set! dl-pt-failures (list)) + (dl-pt-run-all!) + {:failures dl-pt-failures :total (+ dl-pt-pass dl-pt-fail) :passed dl-pt-pass :failed dl-pt-fail}))) diff --git a/plans/datalog-on-sx.md b/plans/datalog-on-sx.md index 79adc148..784cf0df 100644 --- a/plans/datalog-on-sx.md +++ b/plans/datalog-on-sx.md @@ -58,16 +58,21 @@ Key differences from Prolog: ## Roadmap ### Phase 1 — tokenizer + parser -- [ ] Tokenizer: atoms (lowercase/quoted), variables (uppercase/`_`), numbers, strings, - operators (`:- `, `?-`, `,`, `.`), comments (`%`, `/* */`) - Note: no function symbol syntax (no nested `f(...)` in arg position). -- [ ] Parser: +- [x] Tokenizer: atoms (lowercase/quoted), variables (uppercase/`_`), numbers, strings, + punct (`( )`, `,`, `.`), operators (`:-`, `?-`, `<=`, `>=`, `!=`, `<`, `>`, `=`, + `+`, `-`, `*`, `/`), comments (`%`, `/* */`) + Note: no function symbol syntax (no nested `f(...)` in arg position) — but the + parser permits nested compounds for arithmetic; safety analysis (Phase 3) rejects + non-arithmetic nesting. +- [x] Parser: - Facts: `parent(tom, bob).` → `{:head (parent tom bob) :body ()}` - Rules: `ancestor(X,Z) :- parent(X,Y), ancestor(Y,Z).` → `{:head (ancestor X Z) :body ((parent X Y) (ancestor Y Z))}` - - Queries: `?- ancestor(tom, X).` → `{:query (ancestor tom X)}` + - Queries: `?- ancestor(tom, X).` → `{:query ((ancestor tom X))}` + (`:query` value is always a list of literals; `?- p, q.` → `{:query ((p) (q))}`) - Negation: `not(parent(X,Y))` in body position → `{:neg (parent X Y)}` -- [ ] Tests in `lib/datalog/tests/parse.sx` +- [x] Tests in `lib/datalog/tests/parse.sx` (18) and `lib/datalog/tests/tokenize.sx` (26). + Conformance harness: `bash lib/datalog/conformance.sh` → 44 / 44 passing. ### Phase 2 — unification + substitution - [ ] Share or port unification from `lib/prolog/` — term walk, occurs check off by default @@ -142,4 +147,12 @@ _(none yet)_ _Newest first._ -_(awaiting phase 1)_ +- 2026-05-07 — Phase 1 done. `lib/datalog/tokenizer.sx` (~190 LOC) emits + `{:type :value :pos}` tokens; `lib/datalog/parser.sx` (~150 LOC) produces + `{:head … :body …}` / `{:query …}` clauses, with nested compounds + permitted for arithmetic and `not(...)` desugared to `{:neg …}`. 44 / 44 + via `bash lib/datalog/conformance.sh` (26 tokenize + 18 parse). Local + helpers namespace-prefixed (`dl-emit!`, `dl-peek`) after a host-primitive + shadow clash. Test harness uses a custom `dl-deep-equal?` that handles + out-of-order dict keys and number repr (`equal?` fails on dict key order + and on `30` vs `30.0`).