;; lib/prolog/parser.sx — tokens → Prolog AST ;; ;; Phase 1 grammar (NO operator table yet): ;; Program := Clause* EOF ;; Clause := Term "." | Term ":-" Term "." ;; Term := Atom | Var | Number | String | Compound | List ;; Compound := atom "(" ArgList ")" ;; ArgList := Term ("," Term)* ;; List := "[" "]" | "[" Term ("," Term)* ("|" Term)? "]" ;; ;; Term AST shapes (all tagged lists for uniform dispatch): ;; ("atom" name) — atom ;; ("var" name) — variable template (parser-time only) ;; ("num" value) — integer or float ;; ("str" value) — string literal ;; ("compound" functor args) — compound term, args is list of term-ASTs ;; ("cut") — the cut atom ! ;; ;; A clause is (list "clause" head body). A fact is head with body = ("atom" "true"). ;; ;; The empty list is (atom "[]"). Cons is compound "." with two args: ;; [1, 2, 3] → .(1, .(2, .(3, []))) ;; [H|T] → .(H, T) ;; ── Parser state helpers ──────────────────────────────────────────── (define pp-peek (fn (st) (let ((i (get st :idx)) (tokens (get st :tokens))) (if (< i (len tokens)) (nth tokens i) {:pos 0 :value nil :type "eof"})))) (define pp-advance! (fn (st) (dict-set! st :idx (+ (get st :idx) 1)))) (define pp-at? (fn (st type value) (let ((t (pp-peek st))) (and (= (get t :type) type) (or (= value nil) (= (get t :value) value)))))) (define pp-expect! (fn (st type value) (let ((t (pp-peek st))) (if (pp-at? st type value) (do (pp-advance! st) t) (error (str "Parse error at pos " (get t :pos) ": expected " type " '" (if (= value nil) "" value) "' got " (get t :type) " '" (if (= (get t :value) nil) "" (get t :value)) "'")))))) ;; ── AST constructors ──────────────────────────────────────────────── (define pl-mk-atom (fn (name) (list "atom" name))) (define pl-mk-var (fn (name) (list "var" name))) (define pl-mk-num (fn (n) (list "num" n))) (define pl-mk-str (fn (s) (list "str" s))) (define pl-mk-compound (fn (f args) (list "compound" f args))) (define pl-mk-cut (fn () (list "cut"))) ;; Term tag extractors (define pl-term-tag (fn (t) (if (list? t) (first t) nil))) (define pl-term-val (fn (t) (nth t 1))) (define pl-compound-functor (fn (t) (nth t 1))) (define pl-compound-args (fn (t) (nth t 2))) ;; Empty-list atom and cons helpers (define pl-nil-term (fn () (pl-mk-atom "[]"))) (define pl-mk-cons (fn (h t) (pl-mk-compound "." (list h t)))) ;; Build cons list from a list of terms + optional tail (define pl-mk-list-term (fn (items tail) (if (= (len items) 0) tail (pl-mk-cons (first items) (pl-mk-list-term (rest items) tail))))) ;; ── Term parser ───────────────────────────────────────────────────── (define pp-parse-term (fn (st) (let ((t (pp-peek st))) (let ((ty (get t :type)) (vv (get t :value))) (cond ((= ty "number") (do (pp-advance! st) (pl-mk-num vv))) ((= ty "string") (do (pp-advance! st) (pl-mk-str vv))) ((= ty "var") (do (pp-advance! st) (pl-mk-var vv))) ((and (= ty "op") (= vv "!")) (do (pp-advance! st) (pl-mk-cut))) ((and (= ty "punct") (= vv "[")) (pp-parse-list st)) ((= ty "atom") (do (pp-advance! st) (if (pp-at? st "punct" "(") (do (pp-advance! st) (let ((args (pp-parse-arg-list st))) (do (pp-expect! st "punct" ")") (pl-mk-compound vv args)))) (pl-mk-atom vv)))) (else (error (str "Parse error at pos " (get t :pos) ": unexpected " ty " '" (if (= vv nil) "" vv) "'")))))))) ;; Parse one or more comma-separated terms (arguments). (define pp-parse-arg-list (fn (st) (let ((first-arg (pp-parse-term st)) (args (list))) (do (append! args first-arg) (define loop (fn () (when (pp-at? st "punct" ",") (do (pp-advance! st) (append! args (pp-parse-term st)) (loop))))) (loop) args)))) ;; Parse a [ ... ] list literal. Consumes the "[". (define pp-parse-list (fn (st) (do (pp-expect! st "punct" "[") (if (pp-at? st "punct" "]") (do (pp-advance! st) (pl-nil-term)) (let ((items (list))) (do (append! items (pp-parse-term st)) (define comma-loop (fn () (when (pp-at? st "punct" ",") (do (pp-advance! st) (append! items (pp-parse-term st)) (comma-loop))))) (comma-loop) (let ((tail (if (pp-at? st "punct" "|") (do (pp-advance! st) (pp-parse-term st)) (pl-nil-term)))) (do (pp-expect! st "punct" "]") (pl-mk-list-term items tail))))))))) ;; ── Body parsing ──────────────────────────────────────────────────── ;; A clause body is a comma-separated list of goals. We flatten into a ;; right-associative `,` compound: (A, B, C) → ','(A, ','(B, C)) ;; If only one goal, it's that goal directly. (define pp-parse-body (fn (st) (let ((first-goal (pp-parse-term st)) (rest-goals (list))) (do (define gloop (fn () (when (pp-at? st "punct" ",") (do (pp-advance! st) (append! rest-goals (pp-parse-term st)) (gloop))))) (gloop) (if (= (len rest-goals) 0) first-goal (pp-build-conj first-goal rest-goals)))))) (define pp-build-conj (fn (first-goal rest-goals) (if (= (len rest-goals) 0) first-goal (pl-mk-compound "," (list first-goal (pp-build-conj (first rest-goals) (rest rest-goals))))))) ;; ── Clause parsing ────────────────────────────────────────────────── (define pp-parse-clause (fn (st) (let ((head (pp-parse-term st))) (let ((body (if (pp-at? st "op" ":-") (do (pp-advance! st) (pp-parse-body st)) (pl-mk-atom "true")))) (do (pp-expect! st "punct" ".") (list "clause" head body)))))) ;; Parse an entire program — returns list of clauses. (define pl-parse-program (fn (tokens) (let ((st {:idx 0 :tokens tokens}) (clauses (list))) (do (define ploop (fn () (when (not (pp-at? st "eof" nil)) (do (append! clauses (pp-parse-clause st)) (ploop))))) (ploop) clauses)))) ;; Parse a single query term (no trailing "."). Returns the term. (define pl-parse-query (fn (tokens) (let ((st {:idx 0 :tokens tokens})) (pp-parse-body st)))) ;; Convenience: source → clauses (define pl-parse (fn (src) (pl-parse-program (pl-tokenize src)))) ;; Convenience: source → query term (define pl-parse-goal (fn (src) (pl-parse-query (pl-tokenize src))))