Recover agent-loop progress: lua/prolog/forth/erlang/haskell phases 1-2
Salvaged from worktree-agent-* branches killed during sx-tree MCP outage: - lua: tokenizer + parser + phase-2 transpile (~157 tests) - prolog: tokenizer + parser + unification (72 tests, plan update lost to WIP) - forth: phase-1 reader/interpreter + phase-2 colon/VARIABLE (134 tests) - erlang: tokenizer + parser (114 tests) - haskell: tokenizer + parse tests (43 tests) Cherry-picked file contents only, not branch history, to avoid pulling in unrelated ocaml-vm merge commits that were in those branches' bases.
This commit is contained in:
353
lib/lua/tokenizer.sx
Normal file
353
lib/lua/tokenizer.sx
Normal file
@@ -0,0 +1,353 @@
|
||||
(define lua-make-token (fn (type value pos) {:pos pos :value value :type type}))
|
||||
|
||||
(define lua-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9"))))
|
||||
|
||||
(define
|
||||
lua-hex-digit?
|
||||
(fn
|
||||
(c)
|
||||
(and
|
||||
(not (= c nil))
|
||||
(or
|
||||
(lua-digit? c)
|
||||
(and (>= c "a") (<= c "f"))
|
||||
(and (>= c "A") (<= c "F"))))))
|
||||
|
||||
(define
|
||||
lua-letter?
|
||||
(fn
|
||||
(c)
|
||||
(and
|
||||
(not (= c nil))
|
||||
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))))
|
||||
|
||||
(define lua-ident-start? (fn (c) (or (lua-letter? c) (= c "_"))))
|
||||
|
||||
(define lua-ident-char? (fn (c) (or (lua-ident-start? c) (lua-digit? c))))
|
||||
|
||||
(define lua-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
|
||||
|
||||
(define
|
||||
lua-keywords
|
||||
(list
|
||||
"and"
|
||||
"break"
|
||||
"do"
|
||||
"else"
|
||||
"elseif"
|
||||
"end"
|
||||
"false"
|
||||
"for"
|
||||
"function"
|
||||
"goto"
|
||||
"if"
|
||||
"in"
|
||||
"local"
|
||||
"nil"
|
||||
"not"
|
||||
"or"
|
||||
"repeat"
|
||||
"return"
|
||||
"then"
|
||||
"true"
|
||||
"until"
|
||||
"while"))
|
||||
|
||||
(define lua-keyword? (fn (word) (contains? lua-keywords word)))
|
||||
|
||||
(define
|
||||
lua-tokenize
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
(define
|
||||
lua-peek
|
||||
(fn
|
||||
(offset)
|
||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||
(define cur (fn () (lua-peek 0)))
|
||||
(define advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define
|
||||
push!
|
||||
(fn
|
||||
(type value start)
|
||||
(append! tokens (lua-make-token type value start))))
|
||||
(define
|
||||
match-long-open
|
||||
(fn
|
||||
()
|
||||
(if
|
||||
(= (cur) "[")
|
||||
(let
|
||||
((p (+ pos 1)) (level 0))
|
||||
(begin
|
||||
(define
|
||||
count-eq
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< p src-len) (= (nth src p) "="))
|
||||
(begin
|
||||
(set! level (+ level 1))
|
||||
(set! p (+ p 1))
|
||||
(count-eq)))))
|
||||
(count-eq)
|
||||
(if (and (< p src-len) (= (nth src p) "[")) level -1)))
|
||||
-1)))
|
||||
(define
|
||||
read-long-body
|
||||
(fn
|
||||
(level)
|
||||
(let
|
||||
((start pos) (result nil))
|
||||
(begin
|
||||
(define
|
||||
scan
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) (set! result (slice src start pos)))
|
||||
((= (cur) "]")
|
||||
(let
|
||||
((p (+ pos 1)) (eq-count 0))
|
||||
(begin
|
||||
(define
|
||||
count-eq
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< p src-len) (= (nth src p) "="))
|
||||
(begin
|
||||
(set! eq-count (+ eq-count 1))
|
||||
(set! p (+ p 1))
|
||||
(count-eq)))))
|
||||
(count-eq)
|
||||
(if
|
||||
(and
|
||||
(= eq-count level)
|
||||
(< p src-len)
|
||||
(= (nth src p) "]"))
|
||||
(begin
|
||||
(set! result (slice src start pos))
|
||||
(set! pos (+ p 1)))
|
||||
(begin (advance! 1) (scan))))))
|
||||
(else (begin (advance! 1) (scan))))))
|
||||
(scan)
|
||||
result))))
|
||||
(define
|
||||
skip-line-comment!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (not (= (cur) "\n")))
|
||||
(begin (advance! 1) (skip-line-comment!)))))
|
||||
(define
|
||||
skip-ws!
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((lua-ws? (cur)) (begin (advance! 1) (skip-ws!)))
|
||||
((and (= (cur) "-") (< (+ pos 1) src-len) (= (lua-peek 1) "-"))
|
||||
(begin
|
||||
(advance! 2)
|
||||
(let
|
||||
((lvl (match-long-open)))
|
||||
(cond
|
||||
((>= lvl 0)
|
||||
(begin
|
||||
(advance! (+ 2 lvl))
|
||||
(read-long-body lvl)
|
||||
(skip-ws!)))
|
||||
(else (begin (skip-line-comment!) (skip-ws!)))))))
|
||||
(else nil))))
|
||||
(define
|
||||
read-ident
|
||||
(fn
|
||||
(start)
|
||||
(begin
|
||||
(when
|
||||
(and (< pos src-len) (lua-ident-char? (cur)))
|
||||
(begin (advance! 1) (read-ident start)))
|
||||
(slice src start pos))))
|
||||
(define
|
||||
read-decimal-digits!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (lua-digit? (cur)))
|
||||
(begin (advance! 1) (read-decimal-digits!)))))
|
||||
(define
|
||||
read-hex-digits!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (lua-hex-digit? (cur)))
|
||||
(begin (advance! 1) (read-hex-digits!)))))
|
||||
(define
|
||||
read-exp-part!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (or (= (cur) "e") (= (cur) "E")))
|
||||
(let
|
||||
((p1 (lua-peek 1)))
|
||||
(when
|
||||
(or
|
||||
(and (not (= p1 nil)) (lua-digit? p1))
|
||||
(and
|
||||
(or (= p1 "+") (= p1 "-"))
|
||||
(< (+ pos 2) src-len)
|
||||
(lua-digit? (lua-peek 2))))
|
||||
(begin
|
||||
(advance! 1)
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(or (= (cur) "+") (= (cur) "-")))
|
||||
(advance! 1))
|
||||
(read-decimal-digits!)))))))
|
||||
(define
|
||||
read-number
|
||||
(fn
|
||||
(start)
|
||||
(cond
|
||||
((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (lua-peek 1) "x") (= (lua-peek 1) "X")))
|
||||
(begin
|
||||
(advance! 2)
|
||||
(read-hex-digits!)
|
||||
(let
|
||||
((raw (slice src (+ start 2) pos)))
|
||||
(parse-number (str "0x" raw)))))
|
||||
(else
|
||||
(begin
|
||||
(read-decimal-digits!)
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(= (cur) ".")
|
||||
(< (+ pos 1) src-len)
|
||||
(lua-digit? (lua-peek 1)))
|
||||
(begin (advance! 1) (read-decimal-digits!)))
|
||||
(read-exp-part!)
|
||||
(parse-number (slice src start pos)))))))
|
||||
(define
|
||||
read-string
|
||||
(fn
|
||||
(quote-char)
|
||||
(let
|
||||
((chars (list)))
|
||||
(begin
|
||||
(advance! 1)
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((>= pos src-len) nil)
|
||||
((= (cur) "\\")
|
||||
(begin
|
||||
(advance! 1)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (cur)))
|
||||
(begin
|
||||
(cond
|
||||
((= ch "n") (append! chars "\n"))
|
||||
((= ch "t") (append! chars "\t"))
|
||||
((= ch "r") (append! chars "\r"))
|
||||
((= ch "\\") (append! chars "\\"))
|
||||
((= ch "'") (append! chars "'"))
|
||||
((= ch "\"") (append! chars "\""))
|
||||
(else (append! chars ch)))
|
||||
(advance! 1))))
|
||||
(loop)))
|
||||
((= (cur) quote-char) (advance! 1))
|
||||
(else
|
||||
(begin (append! chars (cur)) (advance! 1) (loop))))))
|
||||
(loop)
|
||||
(join "" chars)))))
|
||||
(define
|
||||
try-punct
|
||||
(fn
|
||||
(start)
|
||||
(let
|
||||
((c (cur)) (c1 (lua-peek 1)) (c2 (lua-peek 2)))
|
||||
(cond
|
||||
((and (= c ".") (= c1 ".") (= c2 "."))
|
||||
(begin (advance! 3) (push! "op" "..." start) true))
|
||||
((and (= c ".") (= c1 "."))
|
||||
(begin (advance! 2) (push! "op" ".." start) true))
|
||||
((and (= c "=") (= c1 "="))
|
||||
(begin (advance! 2) (push! "op" "==" start) true))
|
||||
((and (= c "~") (= c1 "="))
|
||||
(begin (advance! 2) (push! "op" "~=" start) true))
|
||||
((and (= c "<") (= c1 "="))
|
||||
(begin (advance! 2) (push! "op" "<=" start) true))
|
||||
((and (= c ">") (= c1 "="))
|
||||
(begin (advance! 2) (push! "op" ">=" start) true))
|
||||
((and (= c ":") (= c1 ":"))
|
||||
(begin (advance! 2) (push! "op" "::" start) true))
|
||||
((or (= c "+") (= c "-") (= c "*") (= c "/") (= c "%") (= c "^") (= c "#") (= c "<") (= c ">") (= c "=") (= c "(") (= c ")") (= c "{") (= c "}") (= c "[") (= c "]") (= c ";") (= c ":") (= c ",") (= c "."))
|
||||
(begin (advance! 1) (push! "op" c start) true))
|
||||
(else false)))))
|
||||
(define
|
||||
step
|
||||
(fn
|
||||
()
|
||||
(begin
|
||||
(skip-ws!)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((start pos) (c (cur)))
|
||||
(cond
|
||||
((lua-ident-start? c)
|
||||
(let
|
||||
((word (read-ident start)))
|
||||
(begin
|
||||
(if
|
||||
(lua-keyword? word)
|
||||
(push! "keyword" word start)
|
||||
(push! "ident" word start))
|
||||
(step))))
|
||||
((lua-digit? c)
|
||||
(let
|
||||
((v (read-number start)))
|
||||
(begin (push! "number" v start) (step))))
|
||||
((and (= c ".") (< (+ pos 1) src-len) (lua-digit? (lua-peek 1)))
|
||||
(begin
|
||||
(advance! 1)
|
||||
(read-decimal-digits!)
|
||||
(read-exp-part!)
|
||||
(push!
|
||||
"number"
|
||||
(parse-number (slice src start pos))
|
||||
start)
|
||||
(step)))
|
||||
((or (= c "\"") (= c "'"))
|
||||
(let
|
||||
((s (read-string c)))
|
||||
(begin (push! "string" s start) (step))))
|
||||
((= c "[")
|
||||
(let
|
||||
((lvl (match-long-open)))
|
||||
(cond
|
||||
((>= lvl 0)
|
||||
(begin
|
||||
(advance! (+ 2 lvl))
|
||||
(when (= (cur) "\n") (advance! 1))
|
||||
(let
|
||||
((s (read-long-body lvl)))
|
||||
(begin (push! "string" s start) (step)))))
|
||||
(else
|
||||
(begin (advance! 1) (push! "op" "[" start) (step))))))
|
||||
((try-punct start) (step))
|
||||
(else
|
||||
(error
|
||||
(str "lua-tokenize: unexpected char " c " at " pos)))))))))
|
||||
(step)
|
||||
(push! "eof" nil pos)
|
||||
tokens)))
|
||||
Reference in New Issue
Block a user