Recover agent-loop progress: lua/prolog/forth/erlang/haskell phases 1-2
Salvaged from worktree-agent-* branches killed during sx-tree MCP outage: - lua: tokenizer + parser + phase-2 transpile (~157 tests) - prolog: tokenizer + parser + unification (72 tests, plan update lost to WIP) - forth: phase-1 reader/interpreter + phase-2 colon/VARIABLE (134 tests) - erlang: tokenizer + parser (114 tests) - haskell: tokenizer + parse tests (43 tests) Cherry-picked file contents only, not branch history, to avoid pulling in unrelated ocaml-vm merge commits that were in those branches' bases.
This commit is contained in:
232
lib/prolog/tokenizer.sx
Normal file
232
lib/prolog/tokenizer.sx
Normal file
@@ -0,0 +1,232 @@
|
||||
;; lib/prolog/tokenizer.sx — Prolog source → token stream
|
||||
;;
|
||||
;; Tokens: {:type T :value V :pos P}
|
||||
;; Types:
|
||||
;; "atom" — lowercase-start, quoted, or symbolic atom (=, \=, +, etc.)
|
||||
;; "var" — uppercase-start or _-start variable (value is the name)
|
||||
;; "number" — numeric literal (decoded to number)
|
||||
;; "string" — "..." string literal
|
||||
;; "punct" — ( ) , . [ ] |
|
||||
;; "op" — :- ! (phase 1 only has these two "operators")
|
||||
;; "eof"
|
||||
;;
|
||||
;; NOTE: phase 1 parser does NOT handle operator precedence (no X is Y+1).
|
||||
;; All compound terms are written as f(arg1, arg2, ...) — including
|
||||
;; =(X, Y), is(X, +(1,2)), and so on, using symbolic atoms as functors.
|
||||
|
||||
(define pl-make-token (fn (type value pos) {:pos pos :value value :type type}))
|
||||
|
||||
;; ── Character predicates ──────────────────────────────────────────
|
||||
(define pl-digit? (fn (c) (and (>= c "0") (<= c "9"))))
|
||||
|
||||
(define pl-lower? (fn (c) (and (>= c "a") (<= c "z"))))
|
||||
|
||||
(define pl-upper? (fn (c) (and (>= c "A") (<= c "Z"))))
|
||||
|
||||
(define
|
||||
pl-ident-char?
|
||||
(fn (c) (or (pl-lower? c) (pl-upper? c) (pl-digit? c) (= c "_"))))
|
||||
|
||||
(define pl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
|
||||
|
||||
;; Characters that form "symbolic atoms" (operator-shaped atoms like
|
||||
;; =, \=, +, -, *, /, <, >, etc.). A run of these becomes a single atom
|
||||
;; token. In phase 1 this lets users write =(X, Y) or is(X, +(1,2)) as
|
||||
;; regular compound terms without needing an operator parser.
|
||||
(define
|
||||
pl-sym?
|
||||
(fn
|
||||
(c)
|
||||
(or
|
||||
(= c "=")
|
||||
(= c "\\")
|
||||
(= c "+")
|
||||
(= c "-")
|
||||
(= c "*")
|
||||
(= c "/")
|
||||
(= c "<")
|
||||
(= c ">")
|
||||
(= c "@")
|
||||
(= c "#")
|
||||
(= c "$")
|
||||
(= c "&")
|
||||
(= c "?")
|
||||
(= c "^")
|
||||
(= c "~")
|
||||
(= c ";"))))
|
||||
|
||||
;; ── Main tokenizer ────────────────────────────────────────────────
|
||||
(define
|
||||
pl-tokenize
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
(define
|
||||
pl-peek
|
||||
(fn
|
||||
(offset)
|
||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||
(define cur (fn () (pl-peek 0)))
|
||||
(define advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define
|
||||
at?
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((sl (len s)))
|
||||
(and (<= (+ pos sl) src-len) (= (slice src pos (+ pos sl)) s)))))
|
||||
(define
|
||||
pl-emit!
|
||||
(fn
|
||||
(type value start)
|
||||
(append! tokens (pl-make-token type value start))))
|
||||
(define
|
||||
skip-line-comment!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (not (= (cur) "\n")))
|
||||
(do (advance! 1) (skip-line-comment!)))))
|
||||
(define
|
||||
skip-block-comment!
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((and (= (cur) "*") (< (+ pos 1) src-len) (= (pl-peek 1) "/"))
|
||||
(advance! 2))
|
||||
(else (do (advance! 1) (skip-block-comment!))))))
|
||||
(define
|
||||
skip-ws!
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((pl-ws? (cur)) (do (advance! 1) (skip-ws!)))
|
||||
((= (cur) "%")
|
||||
(do (advance! 1) (skip-line-comment!) (skip-ws!)))
|
||||
((and (= (cur) "/") (< (+ pos 1) src-len) (= (pl-peek 1) "*"))
|
||||
(do (advance! 2) (skip-block-comment!) (skip-ws!)))
|
||||
(else nil))))
|
||||
(define
|
||||
read-ident
|
||||
(fn
|
||||
(start)
|
||||
(do
|
||||
(when
|
||||
(and (< pos src-len) (pl-ident-char? (cur)))
|
||||
(do (advance! 1) (read-ident start)))
|
||||
(slice src start pos))))
|
||||
(define
|
||||
read-sym
|
||||
(fn
|
||||
(start)
|
||||
(do
|
||||
(when
|
||||
(and (< pos src-len) (pl-sym? (cur)))
|
||||
(do (advance! 1) (read-sym start)))
|
||||
(slice src start pos))))
|
||||
(define
|
||||
read-decimal-digits!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (pl-digit? (cur)))
|
||||
(do (advance! 1) (read-decimal-digits!)))))
|
||||
(define
|
||||
read-number
|
||||
(fn
|
||||
(start)
|
||||
(do
|
||||
(read-decimal-digits!)
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(= (cur) ".")
|
||||
(< (+ pos 1) src-len)
|
||||
(pl-digit? (pl-peek 1)))
|
||||
(do (advance! 1) (read-decimal-digits!)))
|
||||
(parse-number (slice src start pos)))))
|
||||
(define
|
||||
read-quoted
|
||||
(fn
|
||||
(quote-char)
|
||||
(let
|
||||
((chars (list)))
|
||||
(advance! 1)
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((= (cur) "\\")
|
||||
(do
|
||||
(advance! 1)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (cur)))
|
||||
(do
|
||||
(cond
|
||||
((= ch "n") (append! chars "\n"))
|
||||
((= ch "t") (append! chars "\t"))
|
||||
((= ch "r") (append! chars "\r"))
|
||||
((= ch "\\") (append! chars "\\"))
|
||||
((= ch "'") (append! chars "'"))
|
||||
((= ch "\"") (append! chars "\""))
|
||||
(else (append! chars ch)))
|
||||
(advance! 1))))
|
||||
(loop)))
|
||||
((= (cur) quote-char) (advance! 1))
|
||||
(else (do (append! chars (cur)) (advance! 1) (loop))))))
|
||||
(loop)
|
||||
(join "" chars))))
|
||||
(define
|
||||
scan!
|
||||
(fn
|
||||
()
|
||||
(do
|
||||
(skip-ws!)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (cur)) (start pos))
|
||||
(cond
|
||||
((at? ":-")
|
||||
(do (pl-emit! "op" ":-" start) (advance! 2) (scan!)))
|
||||
((pl-digit? ch)
|
||||
(do
|
||||
(pl-emit! "number" (read-number start) start)
|
||||
(scan!)))
|
||||
((= ch "'")
|
||||
(do (pl-emit! "atom" (read-quoted "'") start) (scan!)))
|
||||
((= ch "\"")
|
||||
(do (pl-emit! "string" (read-quoted "\"") start) (scan!)))
|
||||
((pl-lower? ch)
|
||||
(do (pl-emit! "atom" (read-ident start) start) (scan!)))
|
||||
((or (pl-upper? ch) (= ch "_"))
|
||||
(do (pl-emit! "var" (read-ident start) start) (scan!)))
|
||||
((= ch "(")
|
||||
(do (pl-emit! "punct" "(" start) (advance! 1) (scan!)))
|
||||
((= ch ")")
|
||||
(do (pl-emit! "punct" ")" start) (advance! 1) (scan!)))
|
||||
((= ch ",")
|
||||
(do (pl-emit! "punct" "," start) (advance! 1) (scan!)))
|
||||
((= ch ".")
|
||||
(do (pl-emit! "punct" "." start) (advance! 1) (scan!)))
|
||||
((= ch "[")
|
||||
(do (pl-emit! "punct" "[" start) (advance! 1) (scan!)))
|
||||
((= ch "]")
|
||||
(do (pl-emit! "punct" "]" start) (advance! 1) (scan!)))
|
||||
((= ch "|")
|
||||
(do (pl-emit! "punct" "|" start) (advance! 1) (scan!)))
|
||||
((= ch "!")
|
||||
(do (pl-emit! "op" "!" start) (advance! 1) (scan!)))
|
||||
((pl-sym? ch)
|
||||
(do (pl-emit! "atom" (read-sym start) start) (scan!)))
|
||||
(else (do (advance! 1) (scan!)))))))))
|
||||
(scan!)
|
||||
(pl-emit! "eof" nil pos)
|
||||
tokens)))
|
||||
Reference in New Issue
Block a user