Recover agent-loop progress: lua/prolog/forth/erlang/haskell phases 1-2

Salvaged from worktree-agent-* branches killed during sx-tree MCP outage:
- lua: tokenizer + parser + phase-2 transpile (~157 tests)
- prolog: tokenizer + parser + unification (72 tests, plan update lost to WIP)
- forth: phase-1 reader/interpreter + phase-2 colon/VARIABLE (134 tests)
- erlang: tokenizer + parser (114 tests)
- haskell: tokenizer + parse tests (43 tests)

Cherry-picked file contents only, not branch history, to avoid pulling in
unrelated ocaml-vm merge commits that were in those branches' bases.
This commit is contained in:
2026-04-24 16:03:00 +00:00
parent e274878052
commit 99753580b4
32 changed files with 7803 additions and 36 deletions

265
lib/prolog/parser.sx Normal file
View File

@@ -0,0 +1,265 @@
;; lib/prolog/parser.sx — tokens → Prolog AST
;;
;; Phase 1 grammar (NO operator table yet):
;; Program := Clause* EOF
;; Clause := Term "." | Term ":-" Term "."
;; Term := Atom | Var | Number | String | Compound | List
;; Compound := atom "(" ArgList ")"
;; ArgList := Term ("," Term)*
;; List := "[" "]" | "[" Term ("," Term)* ("|" Term)? "]"
;;
;; Term AST shapes (all tagged lists for uniform dispatch):
;; ("atom" name) — atom
;; ("var" name) — variable template (parser-time only)
;; ("num" value) — integer or float
;; ("str" value) — string literal
;; ("compound" functor args) — compound term, args is list of term-ASTs
;; ("cut") — the cut atom !
;;
;; A clause is (list "clause" head body). A fact is head with body = ("atom" "true").
;;
;; The empty list is (atom "[]"). Cons is compound "." with two args:
;; [1, 2, 3] → .(1, .(2, .(3, [])))
;; [H|T] → .(H, T)
;; ── Parser state helpers ────────────────────────────────────────────
(define
pp-peek
(fn
(st)
(let
((i (get st :idx)) (tokens (get st :tokens)))
(if (< i (len tokens)) (nth tokens i) {:pos 0 :value nil :type "eof"}))))
(define pp-advance! (fn (st) (dict-set! st :idx (+ (get st :idx) 1))))
(define
pp-at?
(fn
(st type value)
(let
((t (pp-peek st)))
(and
(= (get t :type) type)
(or (= value nil) (= (get t :value) value))))))
(define
pp-expect!
(fn
(st type value)
(let
((t (pp-peek st)))
(if
(pp-at? st type value)
(do (pp-advance! st) t)
(error
(str
"Parse error at pos "
(get t :pos)
": expected "
type
" '"
(if (= value nil) "" value)
"' got "
(get t :type)
" '"
(if (= (get t :value) nil) "" (get t :value))
"'"))))))
;; ── AST constructors ────────────────────────────────────────────────
(define pl-mk-atom (fn (name) (list "atom" name)))
(define pl-mk-var (fn (name) (list "var" name)))
(define pl-mk-num (fn (n) (list "num" n)))
(define pl-mk-str (fn (s) (list "str" s)))
(define pl-mk-compound (fn (f args) (list "compound" f args)))
(define pl-mk-cut (fn () (list "cut")))
;; Term tag extractors
(define pl-term-tag (fn (t) (if (list? t) (first t) nil)))
(define pl-term-val (fn (t) (nth t 1)))
(define pl-compound-functor (fn (t) (nth t 1)))
(define pl-compound-args (fn (t) (nth t 2)))
;; Empty-list atom and cons helpers
(define pl-nil-term (fn () (pl-mk-atom "[]")))
(define pl-mk-cons (fn (h t) (pl-mk-compound "." (list h t))))
;; Build cons list from a list of terms + optional tail
(define
pl-mk-list-term
(fn
(items tail)
(if
(= (len items) 0)
tail
(pl-mk-cons (first items) (pl-mk-list-term (rest items) tail)))))
;; ── Term parser ─────────────────────────────────────────────────────
(define
pp-parse-term
(fn
(st)
(let
((t (pp-peek st)))
(let
((ty (get t :type)) (vv (get t :value)))
(cond
((= ty "number") (do (pp-advance! st) (pl-mk-num vv)))
((= ty "string") (do (pp-advance! st) (pl-mk-str vv)))
((= ty "var") (do (pp-advance! st) (pl-mk-var vv)))
((and (= ty "op") (= vv "!"))
(do (pp-advance! st) (pl-mk-cut)))
((and (= ty "punct") (= vv "[")) (pp-parse-list st))
((= ty "atom")
(do
(pp-advance! st)
(if
(pp-at? st "punct" "(")
(do
(pp-advance! st)
(let
((args (pp-parse-arg-list st)))
(do (pp-expect! st "punct" ")") (pl-mk-compound vv args))))
(pl-mk-atom vv))))
(else
(error
(str
"Parse error at pos "
(get t :pos)
": unexpected "
ty
" '"
(if (= vv nil) "" vv)
"'"))))))))
;; Parse one or more comma-separated terms (arguments).
(define
pp-parse-arg-list
(fn
(st)
(let
((first-arg (pp-parse-term st)) (args (list)))
(do
(append! args first-arg)
(define
loop
(fn
()
(when
(pp-at? st "punct" ",")
(do
(pp-advance! st)
(append! args (pp-parse-term st))
(loop)))))
(loop)
args))))
;; Parse a [ ... ] list literal. Consumes the "[".
(define
pp-parse-list
(fn
(st)
(do
(pp-expect! st "punct" "[")
(if
(pp-at? st "punct" "]")
(do (pp-advance! st) (pl-nil-term))
(let
((items (list)))
(do
(append! items (pp-parse-term st))
(define
comma-loop
(fn
()
(when
(pp-at? st "punct" ",")
(do
(pp-advance! st)
(append! items (pp-parse-term st))
(comma-loop)))))
(comma-loop)
(let
((tail (if (pp-at? st "punct" "|") (do (pp-advance! st) (pp-parse-term st)) (pl-nil-term))))
(do (pp-expect! st "punct" "]") (pl-mk-list-term items tail)))))))))
;; ── Body parsing ────────────────────────────────────────────────────
;; A clause body is a comma-separated list of goals. We flatten into a
;; right-associative `,` compound: (A, B, C) → ','(A, ','(B, C))
;; If only one goal, it's that goal directly.
(define
pp-parse-body
(fn
(st)
(let
((first-goal (pp-parse-term st)) (rest-goals (list)))
(do
(define
gloop
(fn
()
(when
(pp-at? st "punct" ",")
(do
(pp-advance! st)
(append! rest-goals (pp-parse-term st))
(gloop)))))
(gloop)
(if
(= (len rest-goals) 0)
first-goal
(pp-build-conj first-goal rest-goals))))))
(define
pp-build-conj
(fn
(first-goal rest-goals)
(if
(= (len rest-goals) 0)
first-goal
(pl-mk-compound
","
(list
first-goal
(pp-build-conj (first rest-goals) (rest rest-goals)))))))
;; ── Clause parsing ──────────────────────────────────────────────────
(define
pp-parse-clause
(fn
(st)
(let
((head (pp-parse-term st)))
(let
((body (if (pp-at? st "op" ":-") (do (pp-advance! st) (pp-parse-body st)) (pl-mk-atom "true"))))
(do (pp-expect! st "punct" ".") (list "clause" head body))))))
;; Parse an entire program — returns list of clauses.
(define
pl-parse-program
(fn
(tokens)
(let
((st {:idx 0 :tokens tokens}) (clauses (list)))
(do
(define
ploop
(fn
()
(when
(not (pp-at? st "eof" nil))
(do (append! clauses (pp-parse-clause st)) (ploop)))))
(ploop)
clauses))))
;; Parse a single query term (no trailing "."). Returns the term.
(define
pl-parse-query
(fn (tokens) (let ((st {:idx 0 :tokens tokens})) (pp-parse-body st))))
;; Convenience: source → clauses
(define pl-parse (fn (src) (pl-parse-program (pl-tokenize src))))
;; Convenience: source → query term
(define pl-parse-goal (fn (src) (pl-parse-query (pl-tokenize src))))

232
lib/prolog/runtime.sx Normal file
View File

@@ -0,0 +1,232 @@
;; lib/prolog/runtime.sx — unification, trail, and (later) solver
;;
;; Phase 2 focus: runtime terms + unification + trail.
;;
;; Term representation at runtime:
;; atom ("atom" name) -- immutable tagged list, same as parse AST
;; num ("num" n) -- likewise
;; str ("str" s) -- likewise
;; compound ("compound" fun args) -- args is a regular list of terms
;; var {:tag "var" -- MUTABLE dict; :binding = nil (unbound) or term
;; :name "X"
;; :id <int>
;; :binding <term-or-nil>}
;;
;; Parse-time ("var" name) tokens must be instantiated into runtime vars
;; before unification. Fresh renaming happens per clause resolution so
;; that two separate calls to the same clause don't share variables.
;;
;; Trail:
;; {:entries (<var> <var> ...) :len N} -- stack of vars that got bound
;; Mark = integer length of the entries list at checkpoint time.
;; trail-undo-to! pops entries pushed since the mark, nil'ing :binding.
;;
;; Occurs-check: off by default; configurable via trail :occurs-check flag.
;; ── Var id counter ─────────────────────────────────────────────────
(define pl-var-counter {:n 0})
(define
pl-fresh-id
(fn
()
(let
((n (dict-get pl-var-counter :n)))
(dict-set! pl-var-counter :n (+ n 1))
n)))
;; ── Term constructors / predicates ─────────────────────────────────
(define pl-mk-rt-var (fn (name) {:tag "var" :id (pl-fresh-id) :name name :binding nil}))
(define pl-var? (fn (t) (and (dict? t) (= (dict-get t :tag) "var"))))
(define
pl-atom?
(fn (t) (and (list? t) (not (empty? t)) (= (first t) "atom"))))
(define
pl-num?
(fn (t) (and (list? t) (not (empty? t)) (= (first t) "num"))))
(define
pl-str?
(fn (t) (and (list? t) (not (empty? t)) (= (first t) "str"))))
(define
pl-compound?
(fn (t) (and (list? t) (not (empty? t)) (= (first t) "compound"))))
(define pl-var-name (fn (v) (dict-get v :name)))
(define pl-var-id (fn (v) (dict-get v :id)))
(define pl-var-binding (fn (v) (dict-get v :binding)))
(define pl-var-bound? (fn (v) (not (nil? (dict-get v :binding)))))
(define pl-atom-name (fn (t) (nth t 1)))
(define pl-num-val (fn (t) (nth t 1)))
(define pl-str-val (fn (t) (nth t 1)))
(define pl-fun (fn (t) (nth t 1)))
(define pl-args (fn (t) (nth t 2)))
;; ── Instantiate parse AST into runtime terms ──────────────────────
;; Walk a parser AST term, replacing ("var" name) occurrences with fresh
;; runtime vars. A name->var dict is threaded so that repeated uses of
;; the same variable within a clause share the same runtime var.
;; "_" is anonymous: each occurrence gets a NEW fresh var (never shared).
(define
pl-instantiate
(fn
(ast var-env)
(cond
((pl-var? ast) ast)
((not (list? ast)) ast)
((empty? ast) ast)
((= (first ast) "var")
(let
((name (nth ast 1)))
(if
(= name "_")
(pl-mk-rt-var "_")
(if
(dict-has? var-env name)
(dict-get var-env name)
(let ((v (pl-mk-rt-var name))) (dict-set! var-env name v) v)))))
((= (first ast) "compound")
(let
((fun (nth ast 1)) (args (nth ast 2)))
(list
"compound"
fun
(map (fn (a) (pl-instantiate a var-env)) args))))
(true ast))))
(define pl-instantiate-fresh (fn (ast) (pl-instantiate ast {})))
;; ── Walk: follow binding chain ─────────────────────────────────────
(define
pl-walk
(fn
(t)
(if
(pl-var? t)
(if (pl-var-bound? t) (pl-walk (pl-var-binding t)) t)
t)))
;; Deep-walk: recursively resolve variables inside compound terms.
(define
pl-walk-deep
(fn
(t)
(let
((w (pl-walk t)))
(if
(pl-compound? w)
(list "compound" (pl-fun w) (map pl-walk-deep (pl-args w)))
w))))
;; ── Trail ─────────────────────────────────────────────────────────
(define pl-mk-trail (fn () {:entries () :len 0 :occurs-check false}))
(define pl-trail-mark (fn (trail) (dict-get trail :len)))
(define
pl-trail-push!
(fn
(trail v)
(dict-set! trail :entries (cons v (dict-get trail :entries)))
(dict-set! trail :len (+ 1 (dict-get trail :len)))))
(define
pl-trail-undo-to!
(fn
(trail mark)
(let
loop
()
(when
(> (dict-get trail :len) mark)
(let
((entries (dict-get trail :entries)))
(let
((top (first entries)) (rest (rest entries)))
(dict-set! top :binding nil)
(dict-set! trail :entries rest)
(dict-set! trail :len (- (dict-get trail :len) 1))
(loop)))))))
;; Bind variable v to term t, recording on the trail.
(define
pl-bind!
(fn (v t trail) (dict-set! v :binding t) (pl-trail-push! trail v)))
;; ── Occurs check ──────────────────────────────────────────────────
(define
pl-occurs?
(fn
(v t)
(let
((w (pl-walk t)))
(cond
((pl-var? w) (= (pl-var-id v) (pl-var-id w)))
((pl-compound? w) (some (fn (a) (pl-occurs? v a)) (pl-args w)))
(true false)))))
;; ── Unify ─────────────────────────────────────────────────────────
;; Unify two terms, mutating trail. Returns true on success.
;; On failure, the caller must undo to a pre-unify mark.
(define
pl-unify!
(fn
(t1 t2 trail)
(let
((a (pl-walk t1)) (b (pl-walk t2)))
(cond
((and (pl-var? a) (pl-var? b) (= (pl-var-id a) (pl-var-id b)))
true)
((pl-var? a)
(if
(and (dict-get trail :occurs-check) (pl-occurs? a b))
false
(do (pl-bind! a b trail) true)))
((pl-var? b)
(if
(and (dict-get trail :occurs-check) (pl-occurs? b a))
false
(do (pl-bind! b a trail) true)))
((and (pl-atom? a) (pl-atom? b))
(= (pl-atom-name a) (pl-atom-name b)))
((and (pl-num? a) (pl-num? b)) (= (pl-num-val a) (pl-num-val b)))
((and (pl-str? a) (pl-str? b)) (= (pl-str-val a) (pl-str-val b)))
((and (pl-compound? a) (pl-compound? b))
(if
(and
(= (pl-fun a) (pl-fun b))
(= (len (pl-args a)) (len (pl-args b))))
(pl-unify-lists! (pl-args a) (pl-args b) trail)
false))
(true false)))))
(define
pl-unify-lists!
(fn
(xs ys trail)
(cond
((and (empty? xs) (empty? ys)) true)
((or (empty? xs) (empty? ys)) false)
(true
(if
(pl-unify! (first xs) (first ys) trail)
(pl-unify-lists! (rest xs) (rest ys) trail)
false)))))
;; Convenience: try-unify with auto-undo on failure.
(define
pl-try-unify!
(fn
(t1 t2 trail)
(let
((mark (pl-trail-mark trail)))
(if
(pl-unify! t1 t2 trail)
true
(do (pl-trail-undo-to! trail mark) false)))))

215
lib/prolog/tests/parse.sx Normal file
View File

@@ -0,0 +1,215 @@
;; lib/prolog/tests/parse.sx — parser unit tests
;;
;; Run: bash lib/prolog/tests/run-parse.sh
;; Or via sx-server: (load "lib/prolog/tokenizer.sx") (load "lib/prolog/parser.sx")
;; (load "lib/prolog/tests/parse.sx") (pl-parse-tests-run!)
(define pl-test-count 0)
(define pl-test-pass 0)
(define pl-test-fail 0)
(define pl-test-failures (list))
(define
pl-test!
(fn
(name got expected)
(do
(set! pl-test-count (+ pl-test-count 1))
(if
(= got expected)
(set! pl-test-pass (+ pl-test-pass 1))
(do
(set! pl-test-fail (+ pl-test-fail 1))
(append!
pl-test-failures
(str name "\n expected: " expected "\n got: " got)))))))
;; Atoms & variables
(pl-test!
"atom fact"
(pl-parse "foo.")
(list (list "clause" (list "atom" "foo") (list "atom" "true"))))
(pl-test! "number literal" (pl-parse-goal "42") (list "num" 42))
(pl-test!
"negative number — not supported yet (parsed as op atom + num)"
(pl-parse-goal "-5")
(list "atom" "-"))
(pl-test! "variable" (pl-parse-goal "X") (list "var" "X"))
(pl-test!
"underscore variable"
(pl-parse-goal "_Ignored")
(list "var" "_Ignored"))
(pl-test! "anonymous variable" (pl-parse-goal "_") (list "var" "_"))
(pl-test!
"compound 1-arg"
(pl-parse-goal "foo(a)")
(list "compound" "foo" (list (list "atom" "a"))))
(pl-test!
"compound 3-args mixed"
(pl-parse-goal "p(X, 1, hello)")
(list
"compound"
"p"
(list (list "var" "X") (list "num" 1) (list "atom" "hello"))))
(pl-test!
"nested compound"
(pl-parse-goal "f(g(X), h(Y, Z))")
(list
"compound"
"f"
(list
(list "compound" "g" (list (list "var" "X")))
(list "compound" "h" (list (list "var" "Y") (list "var" "Z"))))))
;; Lists
(pl-test! "empty list" (pl-parse-goal "[]") (list "atom" "[]"))
(pl-test!
"single-element list"
(pl-parse-goal "[a]")
(list "compound" "." (list (list "atom" "a") (list "atom" "[]"))))
(pl-test!
"three-element list"
(pl-parse-goal "[1, 2, 3]")
(list
"compound"
"."
(list
(list "num" 1)
(list
"compound"
"."
(list
(list "num" 2)
(list "compound" "." (list (list "num" 3) (list "atom" "[]"))))))))
(pl-test!
"head-tail list"
(pl-parse-goal "[H|T]")
(list "compound" "." (list (list "var" "H") (list "var" "T"))))
(pl-test!
"two-head-tail list"
(pl-parse-goal "[A, B|T]")
(list
"compound"
"."
(list
(list "var" "A")
(list "compound" "." (list (list "var" "B") (list "var" "T"))))))
;; Clauses
(pl-test!
"fact"
(pl-parse "parent(tom, bob).")
(list
(list
"clause"
(list
"compound"
"parent"
(list (list "atom" "tom") (list "atom" "bob")))
(list "atom" "true"))))
(pl-test!
"rule with single-goal body"
(pl-parse "q(X) :- p(X).")
(list
(list
"clause"
(list "compound" "q" (list (list "var" "X")))
(list "compound" "p" (list (list "var" "X"))))))
(pl-test!
"rule with conjunctive body"
(pl-parse "r(X, Y) :- p(X), q(Y).")
(list
(list
"clause"
(list "compound" "r" (list (list "var" "X") (list "var" "Y")))
(list
"compound"
","
(list
(list "compound" "p" (list (list "var" "X")))
(list "compound" "q" (list (list "var" "Y"))))))))
;; Cut in body
(pl-test!
"cut in body"
(pl-parse "foo(X) :- p(X), !, q(X).")
(list
(list
"clause"
(list "compound" "foo" (list (list "var" "X")))
(list
"compound"
","
(list
(list "compound" "p" (list (list "var" "X")))
(list
"compound"
","
(list
(list "cut")
(list "compound" "q" (list (list "var" "X"))))))))))
;; Symbolic-atom compound terms (phase 1 form)
(pl-test!
"= as compound"
(pl-parse-goal "=(X, 5)")
(list "compound" "=" (list (list "var" "X") (list "num" 5))))
(pl-test!
"is with +"
(pl-parse-goal "is(Y, +(X, 1))")
(list
"compound"
"is"
(list
(list "var" "Y")
(list "compound" "+" (list (list "var" "X") (list "num" 1))))))
;; Strings
(pl-test!
"double-quoted string"
(pl-parse-goal "\"hello\"")
(list "str" "hello"))
;; Single-quoted atom
(pl-test!
"quoted atom"
(pl-parse-goal "'Hello World'")
(list "atom" "Hello World"))
;; Multi-clause program
(pl-test!
"append program"
(len
(pl-parse "append([], L, L).\nappend([H|T], L, [H|R]) :- append(T, L, R).\n"))
2)
;; Comments
(pl-test!
"line comment ignored"
(pl-parse "foo.\n% this is a comment\nbar.")
(list
(list "clause" (list "atom" "foo") (list "atom" "true"))
(list "clause" (list "atom" "bar") (list "atom" "true"))))
(pl-test!
"block comment ignored"
(pl-parse "/* hello */\nfoo.")
(list (list "clause" (list "atom" "foo") (list "atom" "true"))))
;; ── Runner ───────────────────────────────────────────────────────
(define pl-parse-tests-run! (fn () {:failed pl-test-fail :passed pl-test-pass :total pl-test-count :failures pl-test-failures}))

484
lib/prolog/tests/unify.sx Normal file
View File

@@ -0,0 +1,484 @@
;; lib/prolog/tests/unify.sx — unification + trail unit tests
;;
;; Run via MCP: (pl-unify-tests-run!)
;;
;; Covers: atoms, vars, numbers, strings, compounds, nested compounds,
;; cons-cell lists, trail undo, occurs-check, deep walks.
(define pl-u-count 0)
(define pl-u-pass 0)
(define pl-u-fail 0)
(define pl-u-failures (list))
(define
pl-u-test!
(fn
(name thunk expected)
(set! pl-u-count (+ pl-u-count 1))
(let
((got (thunk)))
(if
(= got expected)
(set! pl-u-pass (+ pl-u-pass 1))
(do
(set! pl-u-fail (+ pl-u-fail 1))
(set!
pl-u-failures
(cons (list name :expected expected :got got) pl-u-failures)))))))
;; Shortcuts
(define pl-a (fn (n) (list "atom" n)))
(define pl-n (fn (v) (list "num" v)))
(define pl-s (fn (v) (list "str" v)))
(define pl-c (fn (f args) (list "compound" f args)))
;; ── Primitive predicates ──────────────────────────────────────────
(pl-u-test! "var? on fresh var" (fn () (pl-var? (pl-mk-rt-var "X"))) true)
(pl-u-test! "var? on atom" (fn () (pl-var? (pl-a "foo"))) false)
(pl-u-test! "atom? on atom" (fn () (pl-atom? (pl-a "foo"))) true)
(pl-u-test! "atom? on var" (fn () (pl-atom? (pl-mk-rt-var "X"))) false)
(pl-u-test!
"compound? on compound"
(fn () (pl-compound? (pl-c "p" (list (pl-a "a")))))
true)
(pl-u-test! "num? on num" (fn () (pl-num? (pl-n 42))) true)
;; ── Fresh var ids ─────────────────────────────────────────────────
(pl-u-test!
"fresh vars get distinct ids"
(fn
()
(let
((a (pl-mk-rt-var "X")) (b (pl-mk-rt-var "X")))
(not (= (pl-var-id a) (pl-var-id b)))))
true)
;; ── Walk ───────────────────────────────────────────────────────────
(pl-u-test!
"walk returns unbound var unchanged"
(fn
()
(let
((v (pl-mk-rt-var "X")))
(= (pl-var-id (pl-walk v)) (pl-var-id v))))
true)
(pl-u-test!
"walk follows single binding"
(fn
()
(let
((t (pl-mk-trail)) (v (pl-mk-rt-var "X")))
(pl-bind! v (pl-a "hello") t)
(pl-walk v)))
(list "atom" "hello"))
(pl-u-test!
"walk follows chain var→var→atom"
(fn
()
(let
((t (pl-mk-trail)) (a (pl-mk-rt-var "A")) (b (pl-mk-rt-var "B")))
(pl-bind! a b t)
(pl-bind! b (pl-a "end") t)
(pl-walk a)))
(list "atom" "end"))
;; ── Unify: atoms ──────────────────────────────────────────────────
(pl-u-test!
"unify same atom"
(fn () (pl-unify! (pl-a "foo") (pl-a "foo") (pl-mk-trail)))
true)
(pl-u-test!
"unify different atoms"
(fn () (pl-unify! (pl-a "foo") (pl-a "bar") (pl-mk-trail)))
false)
;; ── Unify: numbers ────────────────────────────────────────────────
(pl-u-test!
"unify equal nums"
(fn () (pl-unify! (pl-n 5) (pl-n 5) (pl-mk-trail)))
true)
(pl-u-test!
"unify different nums"
(fn () (pl-unify! (pl-n 5) (pl-n 6) (pl-mk-trail)))
false)
(pl-u-test!
"atom vs num fails"
(fn () (pl-unify! (pl-a "5") (pl-n 5) (pl-mk-trail)))
false)
;; ── Unify: strings ────────────────────────────────────────────────
(pl-u-test!
"unify equal strings"
(fn () (pl-unify! (pl-s "hi") (pl-s "hi") (pl-mk-trail)))
true)
(pl-u-test!
"unify different strings"
(fn () (pl-unify! (pl-s "hi") (pl-s "bye") (pl-mk-trail)))
false)
;; ── Unify: variables ──────────────────────────────────────────────
(pl-u-test!
"unify var with atom binds"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-unify! x (pl-a "foo") t)
(pl-walk x)))
(list "atom" "foo"))
(pl-u-test!
"unify atom with var binds"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-unify! (pl-a "foo") x t)
(pl-walk x)))
(list "atom" "foo"))
(pl-u-test!
"unify var = var binds one to the other"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y")))
(pl-unify! x y t)
(pl-unify! y (pl-a "bound") t)
(pl-walk x)))
(list "atom" "bound"))
(pl-u-test!
"unify same var succeeds without binding"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-unify! x x t)
(list (pl-var-bound? x) (= (dict-get t :len) 0))))
(list false true))
(pl-u-test!
"bound-var vs atom uses binding"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-bind! x (pl-a "a") t)
(pl-unify! x (pl-a "a") t)))
true)
(pl-u-test!
"bound-var vs different atom fails"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-bind! x (pl-a "a") t)
(pl-unify! x (pl-a "b") t)))
false)
;; ── Unify: compounds ──────────────────────────────────────────────
(pl-u-test!
"unify p(a) with p(a)"
(fn
()
(pl-unify!
(pl-c "p" (list (pl-a "a")))
(pl-c "p" (list (pl-a "a")))
(pl-mk-trail)))
true)
(pl-u-test!
"unify p(a) with p(b)"
(fn
()
(pl-unify!
(pl-c "p" (list (pl-a "a")))
(pl-c "p" (list (pl-a "b")))
(pl-mk-trail)))
false)
(pl-u-test!
"unify p(a) with q(a) — functor mismatch"
(fn
()
(pl-unify!
(pl-c "p" (list (pl-a "a")))
(pl-c "q" (list (pl-a "a")))
(pl-mk-trail)))
false)
(pl-u-test!
"unify p(a) with p(a,b) — arity mismatch"
(fn
()
(pl-unify!
(pl-c "p" (list (pl-a "a")))
(pl-c "p" (list (pl-a "a") (pl-a "b")))
(pl-mk-trail)))
false)
(pl-u-test!
"unify p(X) with p(foo) binds X"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-unify! (pl-c "p" (list x)) (pl-c "p" (list (pl-a "foo"))) t)
(pl-walk x)))
(list "atom" "foo"))
(pl-u-test!
"unify p(X,Y) with p(1,2) binds both"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y")))
(pl-unify!
(pl-c "p" (list x y))
(pl-c "p" (list (pl-n 1) (pl-n 2)))
t)
(list (pl-walk x) (pl-walk y))))
(list (list "num" 1) (list "num" 2)))
(pl-u-test!
"unify p(X,X) with p(a,a)"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-unify!
(pl-c "p" (list x x))
(pl-c "p" (list (pl-a "a") (pl-a "a")))
t)))
true)
(pl-u-test!
"unify p(X,X) with p(a,b) fails"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-unify!
(pl-c "p" (list x x))
(pl-c "p" (list (pl-a "a") (pl-a "b")))
t)))
false)
;; ── Nested compounds ──────────────────────────────────────────────
(pl-u-test!
"unify f(g(X)) with f(g(foo))"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-unify!
(pl-c "f" (list (pl-c "g" (list x))))
(pl-c "f" (list (pl-c "g" (list (pl-a "foo")))))
t)
(pl-walk x)))
(list "atom" "foo"))
;; ── Cons-cell lists ──────────────────────────────────────────────
(define pl-nil (pl-a "[]"))
(define pl-cons (fn (h t) (pl-c "." (list h t))))
(pl-u-test!
"unify [1,2,3] with [X|T] binds X and T"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (tl (pl-mk-rt-var "T")))
(pl-unify!
(pl-cons (pl-n 1) (pl-cons (pl-n 2) (pl-cons (pl-n 3) pl-nil)))
(pl-cons x tl)
t)
(list (pl-walk x) (pl-walk-deep tl))))
(list
(list "num" 1)
(list
"compound"
"."
(list
(list "num" 2)
(list "compound" "." (list (list "num" 3) (list "atom" "[]")))))))
;; ── Trail: mark + undo ───────────────────────────────────────────
(pl-u-test!
"trail-undo restores unbound vars"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y")))
(let
((mark (pl-trail-mark t)))
(pl-unify! x (pl-a "a") t)
(pl-unify! y (pl-a "b") t)
(pl-trail-undo-to! t mark)
(list (pl-var-bound? x) (pl-var-bound? y) (dict-get t :len)))))
(list false false 0))
(pl-u-test!
"partial undo preserves earlier bindings"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y")))
(pl-unify! x (pl-a "a") t)
(let
((mark (pl-trail-mark t)))
(pl-unify! y (pl-a "b") t)
(pl-trail-undo-to! t mark)
(list (pl-var-bound? x) (pl-var-bound? y)))))
(list true false))
(pl-u-test!
"try-unify undoes on failure"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y")))
(pl-try-unify!
(pl-c "p" (list x y (pl-a "a")))
(pl-c "p" (list (pl-n 1) (pl-n 2) (pl-a "b")))
t)
(list (pl-var-bound? x) (pl-var-bound? y))))
(list false false))
(pl-u-test!
"try-unify success keeps bindings"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y")))
(pl-try-unify!
(pl-c "p" (list x y))
(pl-c "p" (list (pl-n 1) (pl-n 2)))
t)
(list (pl-walk x) (pl-walk y))))
(list (list "num" 1) (list "num" 2)))
;; ── Occurs check ──────────────────────────────────────────────────
(pl-u-test!
"no occurs check: X = f(X) succeeds"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(pl-unify! x (pl-c "f" (list x)) t)))
true)
(pl-u-test!
"occurs check: X = f(X) fails"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(dict-set! t :occurs-check true)
(pl-unify! x (pl-c "f" (list x)) t)))
false)
(pl-u-test!
"occurs check: X = Y, Y = f(X) fails"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y")))
(dict-set! t :occurs-check true)
(pl-unify! x y t)
(pl-unify! y (pl-c "f" (list x)) t)))
false)
(pl-u-test!
"occurs check: deep occurrence in compound"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")))
(dict-set! t :occurs-check true)
(pl-unify! x (pl-c "f" (list (pl-c "g" (list x)))) t)))
false)
(pl-u-test!
"occurs check: X = Y (both unbound) succeeds"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y")))
(dict-set! t :occurs-check true)
(pl-unify! x y t)))
true)
;; ── Parse AST → runtime term ──────────────────────────────────────
(pl-u-test!
"instantiate replaces (var X) with runtime var"
(fn
()
(let ((ast (list "var" "X"))) (pl-var? (pl-instantiate-fresh ast))))
true)
(pl-u-test!
"instantiate shares vars within clause"
(fn
()
(let
((env {}))
(let
((v1 (pl-instantiate (list "var" "X") env))
(v2 (pl-instantiate (list "var" "X") env)))
(= (pl-var-id v1) (pl-var-id v2)))))
true)
(pl-u-test!
"instantiate makes distinct vars for _"
(fn
()
(let
((env {}))
(let
((v1 (pl-instantiate (list "var" "_") env))
(v2 (pl-instantiate (list "var" "_") env)))
(not (= (pl-var-id v1) (pl-var-id v2))))))
true)
(pl-u-test!
"instantiate compound recurses"
(fn
()
(let
((env {}))
(let
((inst (pl-instantiate (list "compound" "p" (list (list "var" "X") (list "atom" "a"))) env))
(x (dict-get env "X")))
(pl-unify!
inst
(pl-c "p" (list (pl-a "foo") (pl-a "a")))
(pl-mk-trail))
(pl-walk x))))
(list "atom" "foo"))
(pl-u-test!
"deep-walk resolves nested vars"
(fn
()
(let
((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y")))
(pl-unify! x (pl-c "f" (list y (pl-a "b"))) t)
(pl-unify! y (pl-a "a") t)
(pl-walk-deep x)))
(list "compound" "f" (list (list "atom" "a") (list "atom" "b"))))
;; ── Runner ────────────────────────────────────────────────────────
(define pl-unify-tests-run! (fn () {:failed pl-u-fail :passed pl-u-pass :total pl-u-count :failures pl-u-failures}))

232
lib/prolog/tokenizer.sx Normal file
View File

@@ -0,0 +1,232 @@
;; lib/prolog/tokenizer.sx — Prolog source → token stream
;;
;; Tokens: {:type T :value V :pos P}
;; Types:
;; "atom" — lowercase-start, quoted, or symbolic atom (=, \=, +, etc.)
;; "var" — uppercase-start or _-start variable (value is the name)
;; "number" — numeric literal (decoded to number)
;; "string" — "..." string literal
;; "punct" — ( ) , . [ ] |
;; "op" — :- ! (phase 1 only has these two "operators")
;; "eof"
;;
;; NOTE: phase 1 parser does NOT handle operator precedence (no X is Y+1).
;; All compound terms are written as f(arg1, arg2, ...) — including
;; =(X, Y), is(X, +(1,2)), and so on, using symbolic atoms as functors.
(define pl-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
(define pl-digit? (fn (c) (and (>= c "0") (<= c "9"))))
(define pl-lower? (fn (c) (and (>= c "a") (<= c "z"))))
(define pl-upper? (fn (c) (and (>= c "A") (<= c "Z"))))
(define
pl-ident-char?
(fn (c) (or (pl-lower? c) (pl-upper? c) (pl-digit? c) (= c "_"))))
(define pl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; Characters that form "symbolic atoms" (operator-shaped atoms like
;; =, \=, +, -, *, /, <, >, etc.). A run of these becomes a single atom
;; token. In phase 1 this lets users write =(X, Y) or is(X, +(1,2)) as
;; regular compound terms without needing an operator parser.
(define
pl-sym?
(fn
(c)
(or
(= c "=")
(= c "\\")
(= c "+")
(= c "-")
(= c "*")
(= c "/")
(= c "<")
(= c ">")
(= c "@")
(= c "#")
(= c "$")
(= c "&")
(= c "?")
(= c "^")
(= c "~")
(= c ";"))))
;; ── Main tokenizer ────────────────────────────────────────────────
(define
pl-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
pl-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define cur (fn () (pl-peek 0)))
(define advance! (fn (n) (set! pos (+ pos n))))
(define
at?
(fn
(s)
(let
((sl (len s)))
(and (<= (+ pos sl) src-len) (= (slice src pos (+ pos sl)) s)))))
(define
pl-emit!
(fn
(type value start)
(append! tokens (pl-make-token type value start))))
(define
skip-line-comment!
(fn
()
(when
(and (< pos src-len) (not (= (cur) "\n")))
(do (advance! 1) (skip-line-comment!)))))
(define
skip-block-comment!
(fn
()
(cond
((>= pos src-len) nil)
((and (= (cur) "*") (< (+ pos 1) src-len) (= (pl-peek 1) "/"))
(advance! 2))
(else (do (advance! 1) (skip-block-comment!))))))
(define
skip-ws!
(fn
()
(cond
((>= pos src-len) nil)
((pl-ws? (cur)) (do (advance! 1) (skip-ws!)))
((= (cur) "%")
(do (advance! 1) (skip-line-comment!) (skip-ws!)))
((and (= (cur) "/") (< (+ pos 1) src-len) (= (pl-peek 1) "*"))
(do (advance! 2) (skip-block-comment!) (skip-ws!)))
(else nil))))
(define
read-ident
(fn
(start)
(do
(when
(and (< pos src-len) (pl-ident-char? (cur)))
(do (advance! 1) (read-ident start)))
(slice src start pos))))
(define
read-sym
(fn
(start)
(do
(when
(and (< pos src-len) (pl-sym? (cur)))
(do (advance! 1) (read-sym start)))
(slice src start pos))))
(define
read-decimal-digits!
(fn
()
(when
(and (< pos src-len) (pl-digit? (cur)))
(do (advance! 1) (read-decimal-digits!)))))
(define
read-number
(fn
(start)
(do
(read-decimal-digits!)
(when
(and
(< pos src-len)
(= (cur) ".")
(< (+ pos 1) src-len)
(pl-digit? (pl-peek 1)))
(do (advance! 1) (read-decimal-digits!)))
(parse-number (slice src start pos)))))
(define
read-quoted
(fn
(quote-char)
(let
((chars (list)))
(advance! 1)
(define
loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(do
(advance! 1)
(when
(< pos src-len)
(let
((ch (cur)))
(do
(cond
((= ch "n") (append! chars "\n"))
((= ch "t") (append! chars "\t"))
((= ch "r") (append! chars "\r"))
((= ch "\\") (append! chars "\\"))
((= ch "'") (append! chars "'"))
((= ch "\"") (append! chars "\""))
(else (append! chars ch)))
(advance! 1))))
(loop)))
((= (cur) quote-char) (advance! 1))
(else (do (append! chars (cur)) (advance! 1) (loop))))))
(loop)
(join "" chars))))
(define
scan!
(fn
()
(do
(skip-ws!)
(when
(< pos src-len)
(let
((ch (cur)) (start pos))
(cond
((at? ":-")
(do (pl-emit! "op" ":-" start) (advance! 2) (scan!)))
((pl-digit? ch)
(do
(pl-emit! "number" (read-number start) start)
(scan!)))
((= ch "'")
(do (pl-emit! "atom" (read-quoted "'") start) (scan!)))
((= ch "\"")
(do (pl-emit! "string" (read-quoted "\"") start) (scan!)))
((pl-lower? ch)
(do (pl-emit! "atom" (read-ident start) start) (scan!)))
((or (pl-upper? ch) (= ch "_"))
(do (pl-emit! "var" (read-ident start) start) (scan!)))
((= ch "(")
(do (pl-emit! "punct" "(" start) (advance! 1) (scan!)))
((= ch ")")
(do (pl-emit! "punct" ")" start) (advance! 1) (scan!)))
((= ch ",")
(do (pl-emit! "punct" "," start) (advance! 1) (scan!)))
((= ch ".")
(do (pl-emit! "punct" "." start) (advance! 1) (scan!)))
((= ch "[")
(do (pl-emit! "punct" "[" start) (advance! 1) (scan!)))
((= ch "]")
(do (pl-emit! "punct" "]" start) (advance! 1) (scan!)))
((= ch "|")
(do (pl-emit! "punct" "|" start) (advance! 1) (scan!)))
((= ch "!")
(do (pl-emit! "op" "!" start) (advance! 1) (scan!)))
((pl-sym? ch)
(do (pl-emit! "atom" (read-sym start) start) (scan!)))
(else (do (advance! 1) (scan!)))))))))
(scan!)
(pl-emit! "eof" nil pos)
tokens)))