datalog: parser + 18 tests + conformance harness (Phase 1 done, 44/44)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 45s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 45s
Tokens → list of {:head :body} / {:query} clauses. SX symbols for
constants and variables (case-distinguished). not(literal) in body
desugars to {:neg literal}. Nested compounds permitted in arg
position for arithmetic; safety analysis (Phase 3) will gate them.
Conformance harness wraps lib/guest/conformance.sh; produces
lib/datalog/scoreboard.{json,md}.
This commit is contained in:
14
lib/datalog/conformance.conf
Normal file
14
lib/datalog/conformance.conf
Normal file
@@ -0,0 +1,14 @@
|
||||
# Datalog conformance config — sourced by lib/guest/conformance.sh.
|
||||
|
||||
LANG_NAME=datalog
|
||||
MODE=dict
|
||||
|
||||
PRELOADS=(
|
||||
lib/datalog/tokenizer.sx
|
||||
lib/datalog/parser.sx
|
||||
)
|
||||
|
||||
SUITES=(
|
||||
"tokenize:lib/datalog/tests/tokenize.sx:(dl-tokenize-tests-run!)"
|
||||
"parse:lib/datalog/tests/parse.sx:(dl-parse-tests-run!)"
|
||||
)
|
||||
3
lib/datalog/conformance.sh
Executable file
3
lib/datalog/conformance.sh
Executable file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/env bash
|
||||
# Thin wrapper — see lib/guest/conformance.sh and lib/datalog/conformance.conf.
|
||||
exec bash "$(dirname "$0")/../guest/conformance.sh" "$(dirname "$0")/conformance.conf" "$@"
|
||||
242
lib/datalog/parser.sx
Normal file
242
lib/datalog/parser.sx
Normal file
@@ -0,0 +1,242 @@
|
||||
;; lib/datalog/parser.sx — Datalog tokens → AST
|
||||
;;
|
||||
;; Output shapes:
|
||||
;; Literal (positive) := (relname arg ... arg) — SX list
|
||||
;; Literal (negative) := {:neg (relname arg ... arg)} — dict
|
||||
;; Argument := var-symbol | atom-symbol | number | string
|
||||
;; | (op-name arg ... arg) — arithmetic compound
|
||||
;; Fact := {:head literal :body ()}
|
||||
;; Rule := {:head literal :body (lit ... lit)}
|
||||
;; Query := {:query (lit ... lit)}
|
||||
;; Program := list of facts / rules / queries
|
||||
;;
|
||||
;; Variables and constants are both SX symbols; the evaluator dispatches
|
||||
;; on first-char case ('A'..'Z' or '_' = variable, otherwise constant).
|
||||
;;
|
||||
;; The parser permits nested compounds in arg position to support
|
||||
;; arithmetic (e.g. (is Z (+ X Y))). Safety analysis at rule-load time
|
||||
;; rejects compounds whose head is not an arithmetic operator.
|
||||
|
||||
(define
|
||||
dl-pp-peek
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((i (get st :idx)) (tokens (get st :tokens)))
|
||||
(if (< i (len tokens)) (nth tokens i) {:type "eof" :value nil :pos 0}))))
|
||||
|
||||
(define
|
||||
dl-pp-peek2
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((i (+ (get st :idx) 1)) (tokens (get st :tokens)))
|
||||
(if (< i (len tokens)) (nth tokens i) {:type "eof" :value nil :pos 0}))))
|
||||
|
||||
(define
|
||||
dl-pp-advance!
|
||||
(fn (st) (dict-set! st :idx (+ (get st :idx) 1))))
|
||||
|
||||
(define
|
||||
dl-pp-at?
|
||||
(fn
|
||||
(st type value)
|
||||
(let
|
||||
((t (dl-pp-peek st)))
|
||||
(and
|
||||
(= (get t :type) type)
|
||||
(or (= value nil) (= (get t :value) value))))))
|
||||
|
||||
(define
|
||||
dl-pp-error
|
||||
(fn
|
||||
(st msg)
|
||||
(let
|
||||
((t (dl-pp-peek st)))
|
||||
(error
|
||||
(str
|
||||
"Parse error at pos "
|
||||
(get t :pos)
|
||||
": "
|
||||
msg
|
||||
" (got "
|
||||
(get t :type)
|
||||
" '"
|
||||
(if (= (get t :value) nil) "" (get t :value))
|
||||
"')")))))
|
||||
|
||||
(define
|
||||
dl-pp-expect!
|
||||
(fn
|
||||
(st type value)
|
||||
(let
|
||||
((t (dl-pp-peek st)))
|
||||
(if
|
||||
(dl-pp-at? st type value)
|
||||
(do (dl-pp-advance! st) t)
|
||||
(dl-pp-error
|
||||
st
|
||||
(str "expected " type (if (= value nil) "" (str " '" value "'"))))))))
|
||||
|
||||
;; Argument: variable, atom, number, string, or compound (relname/op + parens).
|
||||
(define
|
||||
dl-pp-parse-arg
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((t (dl-pp-peek st)))
|
||||
(let
|
||||
((ty (get t :type)) (vv (get t :value)))
|
||||
(cond
|
||||
((= ty "number") (do (dl-pp-advance! st) vv))
|
||||
((= ty "string") (do (dl-pp-advance! st) vv))
|
||||
((= ty "var") (do (dl-pp-advance! st) (string->symbol vv)))
|
||||
((or (= ty "atom") (= ty "op"))
|
||||
(do
|
||||
(dl-pp-advance! st)
|
||||
(if
|
||||
(dl-pp-at? st "punct" "(")
|
||||
(do
|
||||
(dl-pp-advance! st)
|
||||
(let
|
||||
((args (dl-pp-parse-arg-list st)))
|
||||
(do
|
||||
(dl-pp-expect! st "punct" ")")
|
||||
(cons (string->symbol vv) args))))
|
||||
(string->symbol vv))))
|
||||
(else (dl-pp-error st "expected term")))))))
|
||||
|
||||
;; Comma-separated args inside parens.
|
||||
(define
|
||||
dl-pp-parse-arg-list
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((args (list)))
|
||||
(do
|
||||
(append! args (dl-pp-parse-arg st))
|
||||
(define
|
||||
dl-pp-arg-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(dl-pp-at? st "punct" ",")
|
||||
(do
|
||||
(dl-pp-advance! st)
|
||||
(append! args (dl-pp-parse-arg st))
|
||||
(dl-pp-arg-loop)))))
|
||||
(dl-pp-arg-loop)
|
||||
args))))
|
||||
|
||||
;; A positive literal: relname (atom or op) followed by optional (args).
|
||||
(define
|
||||
dl-pp-parse-positive
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((t (dl-pp-peek st)))
|
||||
(let
|
||||
((ty (get t :type)) (vv (get t :value)))
|
||||
(if
|
||||
(or (= ty "atom") (= ty "op"))
|
||||
(do
|
||||
(dl-pp-advance! st)
|
||||
(if
|
||||
(dl-pp-at? st "punct" "(")
|
||||
(do
|
||||
(dl-pp-advance! st)
|
||||
(let
|
||||
((args (dl-pp-parse-arg-list st)))
|
||||
(do
|
||||
(dl-pp-expect! st "punct" ")")
|
||||
(cons (string->symbol vv) args))))
|
||||
(list (string->symbol vv))))
|
||||
(dl-pp-error st "expected literal head"))))))
|
||||
|
||||
;; A body literal: positive, or not(positive).
|
||||
(define
|
||||
dl-pp-parse-body-lit
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((t1 (dl-pp-peek st)) (t2 (dl-pp-peek2 st)))
|
||||
(if
|
||||
(and
|
||||
(= (get t1 :type) "atom")
|
||||
(= (get t1 :value) "not")
|
||||
(= (get t2 :type) "punct")
|
||||
(= (get t2 :value) "("))
|
||||
(do
|
||||
(dl-pp-advance! st)
|
||||
(dl-pp-advance! st)
|
||||
(let
|
||||
((inner (dl-pp-parse-positive st)))
|
||||
(do (dl-pp-expect! st "punct" ")") {:neg inner})))
|
||||
(dl-pp-parse-positive st)))))
|
||||
|
||||
;; Comma-separated body literals.
|
||||
(define
|
||||
dl-pp-parse-body
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((lits (list)))
|
||||
(do
|
||||
(append! lits (dl-pp-parse-body-lit st))
|
||||
(define
|
||||
dl-pp-body-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(dl-pp-at? st "punct" ",")
|
||||
(do
|
||||
(dl-pp-advance! st)
|
||||
(append! lits (dl-pp-parse-body-lit st))
|
||||
(dl-pp-body-loop)))))
|
||||
(dl-pp-body-loop)
|
||||
lits))))
|
||||
|
||||
;; Single clause: fact, rule, or query. Consumes trailing dot.
|
||||
(define
|
||||
dl-pp-parse-clause
|
||||
(fn
|
||||
(st)
|
||||
(cond
|
||||
((dl-pp-at? st "op" "?-")
|
||||
(do
|
||||
(dl-pp-advance! st)
|
||||
(let
|
||||
((body (dl-pp-parse-body st)))
|
||||
(do (dl-pp-expect! st "punct" ".") {:query body}))))
|
||||
(else
|
||||
(let
|
||||
((head (dl-pp-parse-positive st)))
|
||||
(cond
|
||||
((dl-pp-at? st "op" ":-")
|
||||
(do
|
||||
(dl-pp-advance! st)
|
||||
(let
|
||||
((body (dl-pp-parse-body st)))
|
||||
(do (dl-pp-expect! st "punct" ".") {:body body :head head}))))
|
||||
(else (do (dl-pp-expect! st "punct" ".") {:body (list) :head head}))))))))
|
||||
|
||||
(define
|
||||
dl-parse-program
|
||||
(fn
|
||||
(tokens)
|
||||
(let
|
||||
((st {:tokens tokens :idx 0}) (clauses (list)))
|
||||
(do
|
||||
(define
|
||||
dl-pp-prog-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(not (dl-pp-at? st "eof" nil))
|
||||
(do
|
||||
(append! clauses (dl-pp-parse-clause st))
|
||||
(dl-pp-prog-loop)))))
|
||||
(dl-pp-prog-loop)
|
||||
clauses))))
|
||||
|
||||
(define dl-parse (fn (src) (dl-parse-program (dl-tokenize src))))
|
||||
11
lib/datalog/scoreboard.json
Normal file
11
lib/datalog/scoreboard.json
Normal file
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"lang": "datalog",
|
||||
"total_passed": 44,
|
||||
"total_failed": 0,
|
||||
"total": 44,
|
||||
"suites": [
|
||||
{"name":"tokenize","passed":26,"failed":0,"total":26},
|
||||
{"name":"parse","passed":18,"failed":0,"total":18}
|
||||
],
|
||||
"generated": "2026-05-07T23:30:07+00:00"
|
||||
}
|
||||
8
lib/datalog/scoreboard.md
Normal file
8
lib/datalog/scoreboard.md
Normal file
@@ -0,0 +1,8 @@
|
||||
# datalog scoreboard
|
||||
|
||||
**44 / 44 passing** (0 failure(s)).
|
||||
|
||||
| Suite | Passed | Total | Status |
|
||||
|-------|--------|-------|--------|
|
||||
| tokenize | 26 | 26 | ok |
|
||||
| parse | 18 | 18 | ok |
|
||||
147
lib/datalog/tests/parse.sx
Normal file
147
lib/datalog/tests/parse.sx
Normal file
@@ -0,0 +1,147 @@
|
||||
;; lib/datalog/tests/parse.sx — parser unit tests
|
||||
;;
|
||||
;; Run via: bash lib/datalog/conformance.sh
|
||||
;; Or: (load "lib/datalog/tokenizer.sx") (load "lib/datalog/parser.sx")
|
||||
;; (load "lib/datalog/tests/parse.sx") (dl-parse-tests-run!)
|
||||
|
||||
(define dl-pt-pass 0)
|
||||
(define dl-pt-fail 0)
|
||||
(define dl-pt-failures (list))
|
||||
|
||||
;; Order-independent structural equality. Lists compared positionally,
|
||||
;; dicts as sets of (key, value) pairs. Numbers via = (so 30.0 = 30).
|
||||
(define
|
||||
dl-deep-equal?
|
||||
(fn
|
||||
(a b)
|
||||
(cond
|
||||
((and (list? a) (list? b))
|
||||
(and (= (len a) (len b)) (dl-deep-equal-list? a b 0)))
|
||||
((and (dict? a) (dict? b))
|
||||
(let
|
||||
((ka (keys a)) (kb (keys b)))
|
||||
(and
|
||||
(= (len ka) (len kb))
|
||||
(dl-deep-equal-dict? a b ka 0))))
|
||||
((and (number? a) (number? b)) (= a b))
|
||||
(else (equal? a b)))))
|
||||
|
||||
(define
|
||||
dl-deep-equal-list?
|
||||
(fn
|
||||
(a b i)
|
||||
(cond
|
||||
((>= i (len a)) true)
|
||||
((not (dl-deep-equal? (nth a i) (nth b i))) false)
|
||||
(else (dl-deep-equal-list? a b (+ i 1))))))
|
||||
|
||||
(define
|
||||
dl-deep-equal-dict?
|
||||
(fn
|
||||
(a b ka i)
|
||||
(cond
|
||||
((>= i (len ka)) true)
|
||||
((let ((k (nth ka i))) (not (dl-deep-equal? (get a k) (get b k))))
|
||||
false)
|
||||
(else (dl-deep-equal-dict? a b ka (+ i 1))))))
|
||||
|
||||
(define
|
||||
dl-pt-test!
|
||||
(fn
|
||||
(name got expected)
|
||||
(if
|
||||
(dl-deep-equal? got expected)
|
||||
(set! dl-pt-pass (+ dl-pt-pass 1))
|
||||
(do
|
||||
(set! dl-pt-fail (+ dl-pt-fail 1))
|
||||
(append!
|
||||
dl-pt-failures
|
||||
(str name "\n expected: " expected "\n got: " got))))))
|
||||
|
||||
(define
|
||||
dl-pt-throws?
|
||||
(fn
|
||||
(thunk)
|
||||
(let
|
||||
((threw false))
|
||||
(do (guard (e (#t (set! threw true))) (thunk)) threw))))
|
||||
|
||||
(define
|
||||
dl-pt-run-all!
|
||||
(fn
|
||||
()
|
||||
(do
|
||||
(dl-pt-test! "empty program" (dl-parse "") (list))
|
||||
(dl-pt-test! "fact" (dl-parse "parent(tom, bob).") (list {:body (list) :head (list (quote parent) (quote tom) (quote bob))}))
|
||||
(dl-pt-test!
|
||||
"two facts"
|
||||
(dl-parse "parent(tom, bob). parent(bob, ann).")
|
||||
(list {:body (list) :head (list (quote parent) (quote tom) (quote bob))} {:body (list) :head (list (quote parent) (quote bob) (quote ann))}))
|
||||
(dl-pt-test! "zero-ary fact" (dl-parse "ready.") (list {:body (list) :head (list (quote ready))}))
|
||||
(dl-pt-test!
|
||||
"rule one body lit"
|
||||
(dl-parse "ancestor(X, Y) :- parent(X, Y).")
|
||||
(list {:body (list (list (quote parent) (quote X) (quote Y))) :head (list (quote ancestor) (quote X) (quote Y))}))
|
||||
(dl-pt-test!
|
||||
"recursive rule"
|
||||
(dl-parse "ancestor(X, Z) :- parent(X, Y), ancestor(Y, Z).")
|
||||
(list {:body (list (list (quote parent) (quote X) (quote Y)) (list (quote ancestor) (quote Y) (quote Z))) :head (list (quote ancestor) (quote X) (quote Z))}))
|
||||
(dl-pt-test!
|
||||
"query single"
|
||||
(dl-parse "?- ancestor(tom, X).")
|
||||
(list {:query (list (list (quote ancestor) (quote tom) (quote X)))}))
|
||||
(dl-pt-test!
|
||||
"query multi"
|
||||
(dl-parse "?- p(X), q(X).")
|
||||
(list {:query (list (list (quote p) (quote X)) (list (quote q) (quote X)))}))
|
||||
(dl-pt-test!
|
||||
"negation"
|
||||
(dl-parse "safe(X) :- person(X), not(parent(X, _)).")
|
||||
(list {:body (list (list (quote person) (quote X)) {:neg (list (quote parent) (quote X) (quote _))}) :head (list (quote safe) (quote X))}))
|
||||
(dl-pt-test!
|
||||
"number arg"
|
||||
(dl-parse "age(alice, 30).")
|
||||
(list {:body (list) :head (list (quote age) (quote alice) 30)}))
|
||||
(dl-pt-test!
|
||||
"string arg"
|
||||
(dl-parse "label(x, \"hi\").")
|
||||
(list {:body (list) :head (list (quote label) (quote x) "hi")}))
|
||||
(dl-pt-test!
|
||||
"comparison literal"
|
||||
(dl-parse "p(X) :- <(X, 5).")
|
||||
(list {:body (list (list (string->symbol "<") (quote X) 5)) :head (list (quote p) (quote X))}))
|
||||
(dl-pt-test!
|
||||
"is with arith"
|
||||
(dl-parse "succ(X, Y) :- nat(X), is(Y, +(X, 1)).")
|
||||
(list {:body (list (list (quote nat) (quote X)) (list (quote is) (quote Y) (list (string->symbol "+") (quote X) 1))) :head (list (quote succ) (quote X) (quote Y))}))
|
||||
(dl-pt-test!
|
||||
"mixed program"
|
||||
(dl-parse "p(a). p(b). q(X) :- p(X). ?- q(Y).")
|
||||
(list {:body (list) :head (list (quote p) (quote a))} {:body (list) :head (list (quote p) (quote b))} {:body (list (list (quote p) (quote X))) :head (list (quote q) (quote X))} {:query (list (list (quote q) (quote Y)))}))
|
||||
(dl-pt-test!
|
||||
"comments skipped"
|
||||
(dl-parse "% comment\nfoo(a).\n/* block */ bar(b).")
|
||||
(list {:body (list) :head (list (quote foo) (quote a))} {:body (list) :head (list (quote bar) (quote b))}))
|
||||
(dl-pt-test!
|
||||
"underscore var"
|
||||
(dl-parse "p(X) :- q(X, _).")
|
||||
(list {:body (list (list (quote q) (quote X) (quote _))) :head (list (quote p) (quote X))}))
|
||||
(dl-pt-test!
|
||||
"missing dot raises"
|
||||
(dl-pt-throws? (fn () (dl-parse "p(a)")))
|
||||
true)
|
||||
(dl-pt-test!
|
||||
"trailing comma raises"
|
||||
(dl-pt-throws? (fn () (dl-parse "p(a,).")))
|
||||
true))))
|
||||
|
||||
(define
|
||||
dl-parse-tests-run!
|
||||
(fn
|
||||
()
|
||||
(do
|
||||
(set! dl-pt-pass 0)
|
||||
(set! dl-pt-fail 0)
|
||||
(set! dl-pt-failures (list))
|
||||
(dl-pt-run-all!)
|
||||
{:failures dl-pt-failures :total (+ dl-pt-pass dl-pt-fail) :passed dl-pt-pass :failed dl-pt-fail})))
|
||||
@@ -58,16 +58,21 @@ Key differences from Prolog:
|
||||
## Roadmap
|
||||
|
||||
### Phase 1 — tokenizer + parser
|
||||
- [ ] Tokenizer: atoms (lowercase/quoted), variables (uppercase/`_`), numbers, strings,
|
||||
operators (`:- `, `?-`, `,`, `.`), comments (`%`, `/* */`)
|
||||
Note: no function symbol syntax (no nested `f(...)` in arg position).
|
||||
- [ ] Parser:
|
||||
- [x] Tokenizer: atoms (lowercase/quoted), variables (uppercase/`_`), numbers, strings,
|
||||
punct (`( )`, `,`, `.`), operators (`:-`, `?-`, `<=`, `>=`, `!=`, `<`, `>`, `=`,
|
||||
`+`, `-`, `*`, `/`), comments (`%`, `/* */`)
|
||||
Note: no function symbol syntax (no nested `f(...)` in arg position) — but the
|
||||
parser permits nested compounds for arithmetic; safety analysis (Phase 3) rejects
|
||||
non-arithmetic nesting.
|
||||
- [x] Parser:
|
||||
- Facts: `parent(tom, bob).` → `{:head (parent tom bob) :body ()}`
|
||||
- Rules: `ancestor(X,Z) :- parent(X,Y), ancestor(Y,Z).`
|
||||
→ `{:head (ancestor X Z) :body ((parent X Y) (ancestor Y Z))}`
|
||||
- Queries: `?- ancestor(tom, X).` → `{:query (ancestor tom X)}`
|
||||
- Queries: `?- ancestor(tom, X).` → `{:query ((ancestor tom X))}`
|
||||
(`:query` value is always a list of literals; `?- p, q.` → `{:query ((p) (q))}`)
|
||||
- Negation: `not(parent(X,Y))` in body position → `{:neg (parent X Y)}`
|
||||
- [ ] Tests in `lib/datalog/tests/parse.sx`
|
||||
- [x] Tests in `lib/datalog/tests/parse.sx` (18) and `lib/datalog/tests/tokenize.sx` (26).
|
||||
Conformance harness: `bash lib/datalog/conformance.sh` → 44 / 44 passing.
|
||||
|
||||
### Phase 2 — unification + substitution
|
||||
- [ ] Share or port unification from `lib/prolog/` — term walk, occurs check off by default
|
||||
@@ -142,4 +147,12 @@ _(none yet)_
|
||||
|
||||
_Newest first._
|
||||
|
||||
_(awaiting phase 1)_
|
||||
- 2026-05-07 — Phase 1 done. `lib/datalog/tokenizer.sx` (~190 LOC) emits
|
||||
`{:type :value :pos}` tokens; `lib/datalog/parser.sx` (~150 LOC) produces
|
||||
`{:head … :body …}` / `{:query …}` clauses, with nested compounds
|
||||
permitted for arithmetic and `not(...)` desugared to `{:neg …}`. 44 / 44
|
||||
via `bash lib/datalog/conformance.sh` (26 tokenize + 18 parse). Local
|
||||
helpers namespace-prefixed (`dl-emit!`, `dl-peek`) after a host-primitive
|
||||
shadow clash. Test harness uses a custom `dl-deep-equal?` that handles
|
||||
out-of-order dict keys and number repr (`equal?` fails on dict key order
|
||||
and on `30` vs `30.0`).
|
||||
|
||||
Reference in New Issue
Block a user