prolog: operator-table parser + < > =< >= built-ins, 19 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled

This commit is contained in:
2026-04-25 06:57:48 +00:00
parent e018ba9423
commit 3190e770fb
7 changed files with 342 additions and 84 deletions

View File

@@ -1,28 +1,20 @@
;; lib/prolog/parser.sx — tokens → Prolog AST
;;
;; Phase 1 grammar (NO operator table yet):
;; Phase 4 grammar (with operator table):
;; Program := Clause* EOF
;; Clause := Term "." | Term ":-" Term "."
;; Term := Atom | Var | Number | String | Compound | List
;; Compound := atom "(" ArgList ")"
;; ArgList := Term ("," Term)*
;; List := "[" "]" | "[" Term ("," Term)* ("|" Term)? "]"
;; Clause := Term[999] "." | Term[999] ":-" Term[1200] "."
;; Term[Pmax] uses precedence climbing on the operator table:
;; primary = Atom | Var | Number | String | Compound | List | "(" Term[1200] ")"
;; while next token is infix op `op` with prec(op) ≤ Pmax:
;; consume op; parse rhs at right-prec(op); fold into compound(op-name,[lhs,rhs])
;;
;; Term AST shapes (all tagged lists for uniform dispatch):
;; ("atom" name) — atom
;; ("var" name) — variable template (parser-time only)
;; ("num" value) — integer or float
;; ("str" value) — string literal
;; ("compound" functor args) — compound term, args is list of term-ASTs
;; ("cut") — the cut atom !
;; Op type → right-prec for op at precedence P:
;; xfx → P-1 strict-both
;; xfy → P right-associative
;; yfx → P-1 left-associative
;;
;; A clause is (list "clause" head body). A fact is head with body = ("atom" "true").
;;
;; The empty list is (atom "[]"). Cons is compound "." with two args:
;; [1, 2, 3] → .(1, .(2, .(3, [])))
;; [H|T] → .(H, T)
;; AST shapes are unchanged — operators just become compound terms.
;; ── Parser state helpers ────────────────────────────────────────────
(define
pp-peek
(fn
@@ -66,7 +58,6 @@
(if (= (get t :value) nil) "" (get t :value))
"'"))))))
;; ── AST constructors ────────────────────────────────────────────────
(define pl-mk-atom (fn (name) (list "atom" name)))
(define pl-mk-var (fn (name) (list "var" name)))
(define pl-mk-num (fn (n) (list "num" n)))
@@ -74,18 +65,14 @@
(define pl-mk-compound (fn (f args) (list "compound" f args)))
(define pl-mk-cut (fn () (list "cut")))
;; Term tag extractors
(define pl-term-tag (fn (t) (if (list? t) (first t) nil)))
(define pl-term-val (fn (t) (nth t 1)))
(define pl-compound-functor (fn (t) (nth t 1)))
(define pl-compound-args (fn (t) (nth t 2)))
;; Empty-list atom and cons helpers
(define pl-nil-term (fn () (pl-mk-atom "[]")))
(define pl-mk-cons (fn (h t) (pl-mk-compound "." (list h t))))
;; Build cons list from a list of terms + optional tail
(define
pl-mk-list-term
(fn
@@ -95,9 +82,60 @@
tail
(pl-mk-cons (first items) (pl-mk-list-term (rest items) tail)))))
;; ── Term parser ─────────────────────────────────────────────────────
;; ── Operator table (Phase 4) ──────────────────────────────────────
;; Each entry: (name precedence type). Type ∈ "xfx" "xfy" "yfx".
(define
pp-parse-term
pl-op-table
(list
(list "," 1000 "xfy")
(list ";" 1100 "xfy")
(list "->" 1050 "xfy")
(list "=" 700 "xfx")
(list "\\=" 700 "xfx")
(list "is" 700 "xfx")
(list "<" 700 "xfx")
(list ">" 700 "xfx")
(list "=<" 700 "xfx")
(list ">=" 700 "xfx")
(list "+" 500 "yfx")
(list "-" 500 "yfx")
(list "*" 400 "yfx")
(list "/" 400 "yfx")
(list "mod" 400 "yfx")))
(define
pl-op-find
(fn
(name table)
(cond
((empty? table) nil)
((= (first (first table)) name) (rest (first table)))
(true (pl-op-find name (rest table))))))
(define pl-op-lookup (fn (name) (pl-op-find name pl-op-table)))
;; Token → (name prec type) for known infix ops, else nil.
(define
pl-token-op
(fn
(t)
(let
((ty (get t :type)) (vv (get t :value)))
(cond
((and (= ty "punct") (= vv ","))
(let
((info (pl-op-lookup ",")))
(if (nil? info) nil (cons "," info))))
((= ty "atom")
(let
((info (pl-op-lookup vv)))
(if (nil? info) nil (cons vv info))))
(true nil)))))
;; ── Term parser ─────────────────────────────────────────────────────
;; Primary term: atom, var, num, str, compound (atom + paren), list, cut, parens.
(define
pp-parse-primary
(fn
(st)
(let
@@ -111,6 +149,12 @@
((and (= ty "op") (= vv "!"))
(do (pp-advance! st) (pl-mk-cut)))
((and (= ty "punct") (= vv "[")) (pp-parse-list st))
((and (= ty "punct") (= vv "("))
(do
(pp-advance! st)
(let
((inner (pp-parse-term-prec st 1200)))
(do (pp-expect! st "punct" ")") inner))))
((= ty "atom")
(do
(pp-advance! st)
@@ -133,13 +177,51 @@
(if (= vv nil) "" vv)
"'"))))))))
;; Parse one or more comma-separated terms (arguments).
;; Operator-aware term parser: precedence climbing.
(define
pp-parse-term-prec
(fn
(st max-prec)
(let ((left (pp-parse-primary st))) (pp-parse-op-rhs st left max-prec))))
(define
pp-parse-op-rhs
(fn
(st left max-prec)
(let
((op-info (pl-token-op (pp-peek st))))
(cond
((nil? op-info) left)
(true
(let
((name (first op-info))
(prec (nth op-info 1))
(ty (nth op-info 2)))
(cond
((> prec max-prec) left)
(true
(let
((right-prec (if (= ty "xfy") prec (- prec 1))))
(do
(pp-advance! st)
(let
((right (pp-parse-term-prec st right-prec)))
(pp-parse-op-rhs
st
(pl-mk-compound name (list left right))
max-prec))))))))))))
;; Backwards-compat alias.
(define pp-parse-term (fn (st) (pp-parse-term-prec st 999)))
;; Args inside parens: parse at prec 999 so comma-as-operator (1000)
;; is not consumed; the explicit comma loop handles separation.
(define
pp-parse-arg-list
(fn
(st)
(let
((first-arg (pp-parse-term st)) (args (list)))
((first-arg (pp-parse-term-prec st 999)) (args (list)))
(do
(append! args first-arg)
(define
@@ -150,12 +232,12 @@
(pp-at? st "punct" ",")
(do
(pp-advance! st)
(append! args (pp-parse-term st))
(append! args (pp-parse-term-prec st 999))
(loop)))))
(loop)
args))))
;; Parse a [ ... ] list literal. Consumes the "[".
;; List literal.
(define
pp-parse-list
(fn
@@ -168,7 +250,7 @@
(let
((items (list)))
(do
(append! items (pp-parse-term st))
(append! items (pp-parse-term-prec st 999))
(define
comma-loop
(fn
@@ -177,52 +259,17 @@
(pp-at? st "punct" ",")
(do
(pp-advance! st)
(append! items (pp-parse-term st))
(append! items (pp-parse-term-prec st 999))
(comma-loop)))))
(comma-loop)
(let
((tail (if (pp-at? st "punct" "|") (do (pp-advance! st) (pp-parse-term st)) (pl-nil-term))))
((tail (if (pp-at? st "punct" "|") (do (pp-advance! st) (pp-parse-term-prec st 999)) (pl-nil-term))))
(do (pp-expect! st "punct" "]") (pl-mk-list-term items tail)))))))))
;; ── Body parsing ────────────────────────────────────────────────────
;; A clause body is a comma-separated list of goals. We flatten into a
;; right-associative `,` compound: (A, B, C) → ','(A, ','(B, C))
;; If only one goal, it's that goal directly.
(define
pp-parse-body
(fn
(st)
(let
((first-goal (pp-parse-term st)) (rest-goals (list)))
(do
(define
gloop
(fn
()
(when
(pp-at? st "punct" ",")
(do
(pp-advance! st)
(append! rest-goals (pp-parse-term st))
(gloop)))))
(gloop)
(if
(= (len rest-goals) 0)
first-goal
(pp-build-conj first-goal rest-goals))))))
(define
pp-build-conj
(fn
(first-goal rest-goals)
(if
(= (len rest-goals) 0)
first-goal
(pl-mk-compound
","
(list
first-goal
(pp-build-conj (first rest-goals) (rest rest-goals)))))))
;; A body is a single term parsed at prec 1200 — operator parser folds
;; `,`, `;`, `->` automatically into right-associative compounds.
(define pp-parse-body (fn (st) (pp-parse-term-prec st 1200)))
;; ── Clause parsing ──────────────────────────────────────────────────
(define
@@ -230,12 +277,11 @@
(fn
(st)
(let
((head (pp-parse-term st)))
((head (pp-parse-term-prec st 999)))
(let
((body (if (pp-at? st "op" ":-") (do (pp-advance! st) (pp-parse-body st)) (pl-mk-atom "true"))))
(do (pp-expect! st "punct" ".") (list "clause" head body))))))
;; Parse an entire program — returns list of clauses.
(define
pl-parse-program
(fn
@@ -253,13 +299,9 @@
(ploop)
clauses))))
;; Parse a single query term (no trailing "."). Returns the term.
(define
pl-parse-query
(fn (tokens) (let ((st {:idx 0 :tokens tokens})) (pp-parse-body st))))
;; Convenience: source → clauses
(define pl-parse (fn (src) (pl-parse-program (pl-tokenize src))))
;; Convenience: source → query term
(define pl-parse-goal (fn (src) (pl-parse-query (pl-tokenize src))))