Recover agent-loop progress: lua/prolog/forth/erlang/haskell phases 1-2
Salvaged from worktree-agent-* branches killed during sx-tree MCP outage: - lua: tokenizer + parser + phase-2 transpile (~157 tests) - prolog: tokenizer + parser + unification (72 tests, plan update lost to WIP) - forth: phase-1 reader/interpreter + phase-2 colon/VARIABLE (134 tests) - erlang: tokenizer + parser (114 tests) - haskell: tokenizer + parse tests (43 tests) Cherry-picked file contents only, not branch history, to avoid pulling in unrelated ocaml-vm merge commits that were in those branches' bases.
This commit is contained in:
49
lib/erlang/parser-core.sx
Normal file
49
lib/erlang/parser-core.sx
Normal file
@@ -0,0 +1,49 @@
|
||||
;; Core parser helpers — shared by er-parse-expr and er-parse-module.
|
||||
;; Everything reads/mutates a parser state dict:
|
||||
;; {:toks TOKS :idx INDEX}
|
||||
|
||||
(define er-state-make (fn (toks) {:idx 0 :toks toks}))
|
||||
|
||||
(define
|
||||
er-peek
|
||||
(fn
|
||||
(st offset)
|
||||
(let
|
||||
((toks (get st :toks)) (idx (+ (get st :idx) offset)))
|
||||
(if (< idx (len toks)) (nth toks idx) (nth toks (- (len toks) 1))))))
|
||||
|
||||
(define er-cur (fn (st) (er-peek st 0)))
|
||||
|
||||
(define er-cur-type (fn (st) (get (er-cur st) :type)))
|
||||
(define er-cur-value (fn (st) (get (er-cur st) :value)))
|
||||
|
||||
(define er-advance! (fn (st) (dict-set! st :idx (+ (get st :idx) 1))))
|
||||
|
||||
(define er-at-eof? (fn (st) (= (er-cur-type st) "eof")))
|
||||
|
||||
(define
|
||||
er-is?
|
||||
(fn
|
||||
(st type value)
|
||||
(and
|
||||
(= (er-cur-type st) type)
|
||||
(or (= value nil) (= (er-cur-value st) value)))))
|
||||
|
||||
(define
|
||||
er-expect!
|
||||
(fn
|
||||
(st type value)
|
||||
(if
|
||||
(er-is? st type value)
|
||||
(let ((t (er-cur st))) (er-advance! st) t)
|
||||
(error
|
||||
(str
|
||||
"Erlang parse: expected "
|
||||
type
|
||||
(if value (str " '" value "'") "")
|
||||
" but got "
|
||||
(er-cur-type st)
|
||||
" '"
|
||||
(er-cur-value st)
|
||||
"' at pos "
|
||||
(get (er-cur st) :pos))))))
|
||||
534
lib/erlang/parser-expr.sx
Normal file
534
lib/erlang/parser-expr.sx
Normal file
@@ -0,0 +1,534 @@
|
||||
;; Erlang expression parser — top-level fns operating on parser state.
|
||||
;; Depends on parser-core.sx (er-state-*, er-cur-*, er-is?, er-expect!)
|
||||
;; and parser.sx (er-is-binop?, er-any-binop?, er-build-cons, er-slice-list).
|
||||
|
||||
;; ── entry point ───────────────────────────────────────────────────
|
||||
(define
|
||||
er-parse-expr
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((st (er-state-make (er-tokenize src))))
|
||||
(er-parse-expr-prec st 0))))
|
||||
|
||||
;; Pratt-like operator-precedence parser.
|
||||
(define
|
||||
er-parse-expr-prec
|
||||
(fn
|
||||
(st min-prec)
|
||||
(let
|
||||
((left (er-parse-unary st)))
|
||||
(er-parse-expr-loop st min-prec left))))
|
||||
|
||||
(define
|
||||
er-parse-expr-loop
|
||||
(fn
|
||||
(st min-prec left)
|
||||
(if
|
||||
(er-any-binop? (er-cur st) min-prec)
|
||||
(let
|
||||
((tok (er-cur st)))
|
||||
(cond
|
||||
(er-is-binop? tok 0)
|
||||
(do (er-advance! st) (er-parse-expr-loop st min-prec {:rhs (er-parse-expr-prec st 0) :type "match" :lhs left}))
|
||||
(er-is-binop? tok 1)
|
||||
(do (er-advance! st) (er-parse-expr-loop st min-prec {:msg (er-parse-expr-prec st 1) :type "send" :to left}))
|
||||
(er-is-binop? tok 2)
|
||||
(let
|
||||
((op (get tok :value)))
|
||||
(er-advance! st)
|
||||
(er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 3)) :type "op" :op op}))
|
||||
(er-is-binop? tok 3)
|
||||
(let
|
||||
((op (get tok :value)))
|
||||
(er-advance! st)
|
||||
(er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 4)) :type "op" :op op}))
|
||||
(er-is-binop? tok 4)
|
||||
(let
|
||||
((op (get tok :value)))
|
||||
(er-advance! st)
|
||||
(er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 5)) :type "op" :op op}))
|
||||
(er-is-binop? tok 5)
|
||||
(let
|
||||
((op (get tok :value)))
|
||||
(er-advance! st)
|
||||
(er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 5)) :type "op" :op op}))
|
||||
(er-is-binop? tok 6)
|
||||
(let
|
||||
((op (get tok :value)))
|
||||
(er-advance! st)
|
||||
(er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 7)) :type "op" :op op}))
|
||||
(er-is-binop? tok 7)
|
||||
(let
|
||||
((op (get tok :value)))
|
||||
(er-advance! st)
|
||||
(er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 8)) :type "op" :op op}))
|
||||
:else left))
|
||||
left)))
|
||||
|
||||
(define
|
||||
er-parse-unary
|
||||
(fn
|
||||
(st)
|
||||
(cond
|
||||
(er-is? st "op" "-")
|
||||
(do (er-advance! st) {:arg (er-parse-unary st) :type "unop" :op "-"})
|
||||
(er-is? st "op" "+")
|
||||
(do (er-advance! st) (er-parse-unary st))
|
||||
(er-is? st "keyword" "not")
|
||||
(do (er-advance! st) {:arg (er-parse-unary st) :type "unop" :op "not"})
|
||||
(er-is? st "keyword" "bnot")
|
||||
(do (er-advance! st) {:arg (er-parse-unary st) :type "unop" :op "bnot"})
|
||||
:else (er-parse-postfix st))))
|
||||
|
||||
(define
|
||||
er-parse-postfix
|
||||
(fn (st) (er-parse-postfix-loop st (er-parse-primary st))))
|
||||
|
||||
(define
|
||||
er-parse-postfix-loop
|
||||
(fn
|
||||
(st node)
|
||||
(cond
|
||||
(er-is? st "punct" ":")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(let
|
||||
((rhs (er-parse-primary st)))
|
||||
(er-parse-postfix-loop st {:fun rhs :mod node :type "remote"})))
|
||||
(er-is? st "punct" "(")
|
||||
(let
|
||||
((args (er-parse-call-args st)))
|
||||
(er-parse-postfix-loop st {:args args :fun node :type "call"}))
|
||||
:else node)))
|
||||
|
||||
(define
|
||||
er-parse-call-args
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "punct" "(")
|
||||
(if
|
||||
(er-is? st "punct" ")")
|
||||
(do (er-advance! st) (list))
|
||||
(let
|
||||
((args (list (er-parse-expr-prec st 0))))
|
||||
(er-parse-args-tail st args)))))
|
||||
|
||||
(define
|
||||
er-parse-args-tail
|
||||
(fn
|
||||
(st args)
|
||||
(cond
|
||||
(er-is? st "punct" ",")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! args (er-parse-expr-prec st 0))
|
||||
(er-parse-args-tail st args))
|
||||
(er-is? st "punct" ")")
|
||||
(do (er-advance! st) args)
|
||||
:else (error
|
||||
(str
|
||||
"Erlang parse: expected ',' or ')' in args, got '"
|
||||
(er-cur-value st)
|
||||
"'")))))
|
||||
|
||||
;; A body is: Expr {, Expr}
|
||||
(define
|
||||
er-parse-body
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((exprs (list (er-parse-expr-prec st 0))))
|
||||
(er-parse-body-tail st exprs))))
|
||||
|
||||
(define
|
||||
er-parse-body-tail
|
||||
(fn
|
||||
(st exprs)
|
||||
(if
|
||||
(er-is? st "punct" ",")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! exprs (er-parse-expr-prec st 0))
|
||||
(er-parse-body-tail st exprs))
|
||||
exprs)))
|
||||
|
||||
;; Guards: G1 ; G2 ; ... where each Gi is a guard-conj (T, T, ...)
|
||||
(define
|
||||
er-parse-guards
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((alts (list (er-parse-guard-conj st))))
|
||||
(er-parse-guards-tail st alts))))
|
||||
|
||||
(define
|
||||
er-parse-guards-tail
|
||||
(fn
|
||||
(st alts)
|
||||
(if
|
||||
(er-is? st "punct" ";")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! alts (er-parse-guard-conj st))
|
||||
(er-parse-guards-tail st alts))
|
||||
alts)))
|
||||
|
||||
(define
|
||||
er-parse-guard-conj
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((ts (list (er-parse-expr-prec st 0))))
|
||||
(er-parse-guard-conj-tail st ts))))
|
||||
|
||||
(define
|
||||
er-parse-guard-conj-tail
|
||||
(fn
|
||||
(st ts)
|
||||
(if
|
||||
(er-is? st "punct" ",")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! ts (er-parse-expr-prec st 0))
|
||||
(er-parse-guard-conj-tail st ts))
|
||||
ts)))
|
||||
|
||||
(define er-parse-pattern (fn (st) (er-parse-expr-prec st 0)))
|
||||
|
||||
;; ── primary expressions ──────────────────────────────────────────
|
||||
(define
|
||||
er-parse-primary
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((tok (er-cur st)))
|
||||
(cond
|
||||
(= (er-cur-type st) "integer")
|
||||
(do (er-advance! st) {:value (get tok :value) :type "integer"})
|
||||
(= (er-cur-type st) "float")
|
||||
(do (er-advance! st) {:value (get tok :value) :type "float"})
|
||||
(= (er-cur-type st) "string")
|
||||
(do (er-advance! st) {:value (get tok :value) :type "string"})
|
||||
(= (er-cur-type st) "atom")
|
||||
(do (er-advance! st) {:value (get tok :value) :type "atom"})
|
||||
(= (er-cur-type st) "var")
|
||||
(do (er-advance! st) {:type "var" :name (get tok :value)})
|
||||
(er-is? st "punct" "(")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(let
|
||||
((e (er-parse-expr-prec st 0)))
|
||||
(er-expect! st "punct" ")")
|
||||
e))
|
||||
(er-is? st "punct" "{")
|
||||
(er-parse-tuple st)
|
||||
(er-is? st "punct" "[")
|
||||
(er-parse-list st)
|
||||
(er-is? st "keyword" "if")
|
||||
(er-parse-if st)
|
||||
(er-is? st "keyword" "case")
|
||||
(er-parse-case st)
|
||||
(er-is? st "keyword" "receive")
|
||||
(er-parse-receive st)
|
||||
(er-is? st "keyword" "begin")
|
||||
(er-parse-begin st)
|
||||
(er-is? st "keyword" "fun")
|
||||
(er-parse-fun-expr st)
|
||||
(er-is? st "keyword" "try")
|
||||
(er-parse-try st)
|
||||
:else (error
|
||||
(str
|
||||
"Erlang parse: unexpected "
|
||||
(er-cur-type st)
|
||||
" '"
|
||||
(get tok :value)
|
||||
"' at pos "
|
||||
(get tok :pos)))))))
|
||||
|
||||
(define
|
||||
er-parse-tuple
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "punct" "{")
|
||||
(if
|
||||
(er-is? st "punct" "}")
|
||||
(do (er-advance! st) {:elements (list) :type "tuple"})
|
||||
(let
|
||||
((elems (list (er-parse-expr-prec st 0))))
|
||||
(er-parse-tuple-tail st elems)))))
|
||||
|
||||
(define
|
||||
er-parse-tuple-tail
|
||||
(fn
|
||||
(st elems)
|
||||
(cond
|
||||
(er-is? st "punct" ",")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! elems (er-parse-expr-prec st 0))
|
||||
(er-parse-tuple-tail st elems))
|
||||
(er-is? st "punct" "}")
|
||||
(do (er-advance! st) {:elements elems :type "tuple"})
|
||||
:else (error
|
||||
(str
|
||||
"Erlang parse: expected ',' or '}' in tuple, got '"
|
||||
(er-cur-value st)
|
||||
"'")))))
|
||||
|
||||
(define
|
||||
er-parse-list
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "punct" "[")
|
||||
(if
|
||||
(er-is? st "punct" "]")
|
||||
(do (er-advance! st) {:type "nil"})
|
||||
(let
|
||||
((elems (list (er-parse-expr-prec st 0))))
|
||||
(er-parse-list-tail st elems)))))
|
||||
|
||||
(define
|
||||
er-parse-list-tail
|
||||
(fn
|
||||
(st elems)
|
||||
(cond
|
||||
(er-is? st "punct" ",")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! elems (er-parse-expr-prec st 0))
|
||||
(er-parse-list-tail st elems))
|
||||
(er-is? st "punct" "|")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(let
|
||||
((tail (er-parse-expr-prec st 0)))
|
||||
(er-expect! st "punct" "]")
|
||||
(er-build-cons elems tail)))
|
||||
(er-is? st "punct" "]")
|
||||
(do (er-advance! st) (er-build-cons elems {:type "nil"}))
|
||||
:else (error
|
||||
(str
|
||||
"Erlang parse: expected ',' '|' or ']' in list, got '"
|
||||
(er-cur-value st)
|
||||
"'")))))
|
||||
|
||||
;; ── if ──────────────────────────────────────────────────────────
|
||||
(define
|
||||
er-parse-if
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "keyword" "if")
|
||||
(let
|
||||
((clauses (list (er-parse-if-clause st))))
|
||||
(er-parse-if-tail st clauses))))
|
||||
|
||||
(define
|
||||
er-parse-if-tail
|
||||
(fn
|
||||
(st clauses)
|
||||
(if
|
||||
(er-is? st "punct" ";")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! clauses (er-parse-if-clause st))
|
||||
(er-parse-if-tail st clauses))
|
||||
(do (er-expect! st "keyword" "end") {:clauses clauses :type "if"}))))
|
||||
|
||||
(define
|
||||
er-parse-if-clause
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((guards (er-parse-guards st)))
|
||||
(er-expect! st "punct" "->")
|
||||
(let ((body (er-parse-body st))) {:body body :guards guards}))))
|
||||
|
||||
;; ── case ────────────────────────────────────────────────────────
|
||||
(define
|
||||
er-parse-case
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "keyword" "case")
|
||||
(let
|
||||
((e (er-parse-expr-prec st 0)))
|
||||
(er-expect! st "keyword" "of")
|
||||
(let
|
||||
((clauses (list (er-parse-case-clause st))))
|
||||
(er-parse-case-tail st e clauses)))))
|
||||
|
||||
(define
|
||||
er-parse-case-tail
|
||||
(fn
|
||||
(st e clauses)
|
||||
(if
|
||||
(er-is? st "punct" ";")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! clauses (er-parse-case-clause st))
|
||||
(er-parse-case-tail st e clauses))
|
||||
(do (er-expect! st "keyword" "end") {:expr e :clauses clauses :type "case"}))))
|
||||
|
||||
(define
|
||||
er-parse-case-clause
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((pat (er-parse-pattern st)))
|
||||
(let
|
||||
((guards (if (er-is? st "keyword" "when") (do (er-advance! st) (er-parse-guards st)) (list))))
|
||||
(er-expect! st "punct" "->")
|
||||
(let ((body (er-parse-body st))) {:pattern pat :body body :guards guards})))))
|
||||
|
||||
;; ── receive ─────────────────────────────────────────────────────
|
||||
(define
|
||||
er-parse-receive
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "keyword" "receive")
|
||||
(let
|
||||
((clauses (if (er-is? st "keyword" "after") (list) (list (er-parse-case-clause st)))))
|
||||
(er-parse-receive-clauses st clauses))))
|
||||
|
||||
(define
|
||||
er-parse-receive-clauses
|
||||
(fn
|
||||
(st clauses)
|
||||
(cond
|
||||
(er-is? st "punct" ";")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! clauses (er-parse-case-clause st))
|
||||
(er-parse-receive-clauses st clauses))
|
||||
(er-is? st "keyword" "after")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(let
|
||||
((after-ms (er-parse-expr-prec st 0)))
|
||||
(er-expect! st "punct" "->")
|
||||
(let
|
||||
((after-body (er-parse-body st)))
|
||||
(er-expect! st "keyword" "end")
|
||||
{:clauses clauses :type "receive" :after-ms after-ms :after-body after-body})))
|
||||
:else (do (er-expect! st "keyword" "end") {:clauses clauses :type "receive" :after-ms nil :after-body (list)}))))
|
||||
|
||||
(define
|
||||
er-parse-begin
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "keyword" "begin")
|
||||
(let
|
||||
((exprs (er-parse-body st)))
|
||||
(er-expect! st "keyword" "end")
|
||||
{:exprs exprs :type "block"})))
|
||||
|
||||
(define
|
||||
er-parse-fun-expr
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "keyword" "fun")
|
||||
(cond
|
||||
(er-is? st "punct" "(")
|
||||
(let
|
||||
((clauses (list (er-parse-fun-clause st nil))))
|
||||
(er-parse-fun-expr-tail st clauses))
|
||||
:else (error "Erlang parse: fun-ref syntax not yet supported"))))
|
||||
|
||||
(define
|
||||
er-parse-fun-expr-tail
|
||||
(fn
|
||||
(st clauses)
|
||||
(if
|
||||
(er-is? st "punct" ";")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! clauses (er-parse-fun-clause st nil))
|
||||
(er-parse-fun-expr-tail st clauses))
|
||||
(do (er-expect! st "keyword" "end") {:clauses clauses :type "fun"}))))
|
||||
|
||||
(define
|
||||
er-parse-fun-clause
|
||||
(fn
|
||||
(st named-name)
|
||||
(er-expect! st "punct" "(")
|
||||
(let
|
||||
((patterns (if (er-is? st "punct" ")") (list) (er-parse-pattern-list st (list (er-parse-pattern st))))))
|
||||
(er-expect! st "punct" ")")
|
||||
(let
|
||||
((guards (if (er-is? st "keyword" "when") (do (er-advance! st) (er-parse-guards st)) (list))))
|
||||
(er-expect! st "punct" "->")
|
||||
(let ((body (er-parse-body st))) {:patterns patterns :body body :guards guards :name named-name})))))
|
||||
|
||||
(define
|
||||
er-parse-pattern-list
|
||||
(fn
|
||||
(st pats)
|
||||
(if
|
||||
(er-is? st "punct" ",")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! pats (er-parse-pattern st))
|
||||
(er-parse-pattern-list st pats))
|
||||
pats)))
|
||||
|
||||
;; ── try ─────────────────────────────────────────────────────────
|
||||
(define
|
||||
er-parse-try
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "keyword" "try")
|
||||
(let
|
||||
((exprs (er-parse-body st))
|
||||
(of-clauses (list))
|
||||
(catch-clauses (list))
|
||||
(after-body (list)))
|
||||
(when
|
||||
(er-is? st "keyword" "of")
|
||||
(er-advance! st)
|
||||
(append! of-clauses (er-parse-case-clause st))
|
||||
(er-parse-try-of-tail st of-clauses))
|
||||
(when
|
||||
(er-is? st "keyword" "catch")
|
||||
(er-advance! st)
|
||||
(append! catch-clauses (er-parse-catch-clause st))
|
||||
(er-parse-try-catch-tail st catch-clauses))
|
||||
(when
|
||||
(er-is? st "keyword" "after")
|
||||
(er-advance! st)
|
||||
(set! after-body (er-parse-body st)))
|
||||
(er-expect! st "keyword" "end")
|
||||
{:exprs exprs :catch-clauses catch-clauses :type "try" :of-clauses of-clauses :after after-body})))
|
||||
|
||||
(define
|
||||
er-parse-try-of-tail
|
||||
(fn
|
||||
(st clauses)
|
||||
(when
|
||||
(er-is? st "punct" ";")
|
||||
(er-advance! st)
|
||||
(append! clauses (er-parse-case-clause st))
|
||||
(er-parse-try-of-tail st clauses))))
|
||||
|
||||
(define
|
||||
er-parse-try-catch-tail
|
||||
(fn
|
||||
(st clauses)
|
||||
(when
|
||||
(er-is? st "punct" ";")
|
||||
(er-advance! st)
|
||||
(append! clauses (er-parse-catch-clause st))
|
||||
(er-parse-try-catch-tail st clauses))))
|
||||
|
||||
(define
|
||||
er-parse-catch-clause
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((p1 (er-parse-pattern st)))
|
||||
(let
|
||||
((klass (if (= (get p1 :type) "remote") (get p1 :mod) {:value "throw" :type "atom"}))
|
||||
(pat (if (= (get p1 :type) "remote") (get p1 :fun) p1)))
|
||||
(let
|
||||
((guards (if (er-is? st "keyword" "when") (do (er-advance! st) (er-parse-guards st)) (list))))
|
||||
(er-expect! st "punct" "->")
|
||||
(let ((body (er-parse-body st))) {:pattern pat :body body :class klass :guards guards}))))))
|
||||
113
lib/erlang/parser-module.sx
Normal file
113
lib/erlang/parser-module.sx
Normal file
@@ -0,0 +1,113 @@
|
||||
;; Erlang module parser — reads top-level forms and builds a module AST.
|
||||
;;
|
||||
;; Depends on parser-core.sx, parser.sx, parser-expr.sx.
|
||||
|
||||
(define
|
||||
er-parse-module
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((st (er-state-make (er-tokenize src)))
|
||||
(mod-ref (list nil))
|
||||
(attrs (list))
|
||||
(functions (list)))
|
||||
(er-parse-module-loop st mod-ref attrs functions)
|
||||
{:functions functions :type "module" :attrs attrs :name (nth mod-ref 0)})))
|
||||
|
||||
(define
|
||||
er-parse-module-loop
|
||||
(fn
|
||||
(st mod-ref attrs functions)
|
||||
(when
|
||||
(not (er-at-eof? st))
|
||||
(er-parse-top-form st mod-ref attrs functions)
|
||||
(er-parse-module-loop st mod-ref attrs functions))))
|
||||
|
||||
(define
|
||||
er-parse-top-form
|
||||
(fn
|
||||
(st mod-ref attrs functions)
|
||||
(cond
|
||||
(er-is? st "op" "-")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(let
|
||||
((attr-name (er-cur-value st)))
|
||||
(er-advance! st)
|
||||
(let
|
||||
((args (er-parse-attr-args st)))
|
||||
(er-expect! st "punct" ".")
|
||||
(cond
|
||||
(= attr-name "module")
|
||||
(set-nth! mod-ref 0 (get (nth args 0) :value))
|
||||
:else (append! attrs {:args args :name attr-name})))))
|
||||
(= (er-cur-type st) "atom")
|
||||
(append! functions (er-parse-function st))
|
||||
:else (error
|
||||
(str
|
||||
"Erlang parse (top): unexpected "
|
||||
(er-cur-type st)
|
||||
" '"
|
||||
(er-cur-value st)
|
||||
"' at pos "
|
||||
(get (er-cur st) :pos))))))
|
||||
|
||||
(define
|
||||
er-parse-attr-args
|
||||
(fn
|
||||
(st)
|
||||
(er-expect! st "punct" "(")
|
||||
(if
|
||||
(er-is? st "punct" ")")
|
||||
(do (er-advance! st) (list))
|
||||
(let
|
||||
((args (list (er-parse-attr-arg st))))
|
||||
(er-parse-attr-args-tail st args)))))
|
||||
|
||||
(define
|
||||
er-parse-attr-args-tail
|
||||
(fn
|
||||
(st args)
|
||||
(cond
|
||||
(er-is? st "punct" ",")
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! args (er-parse-attr-arg st))
|
||||
(er-parse-attr-args-tail st args))
|
||||
(er-is? st "punct" ")")
|
||||
(do (er-advance! st) args)
|
||||
:else (error (str "Erlang parse attr: got '" (er-cur-value st) "'")))))
|
||||
|
||||
;; Attribute args often contain `Name/Arity` pairs — parse as a
|
||||
;; general expression so the caller can interpret the shape.
|
||||
(define er-parse-attr-arg (fn (st) (er-parse-expr-prec st 0)))
|
||||
|
||||
(define
|
||||
er-parse-function
|
||||
(fn
|
||||
(st)
|
||||
(let
|
||||
((name (er-cur-value st)))
|
||||
(er-advance! st)
|
||||
(let
|
||||
((clauses (list (er-parse-fun-clause st name))))
|
||||
(er-parse-function-tail st name clauses)
|
||||
(er-expect! st "punct" ".")
|
||||
(let ((arity (len (get (nth clauses 0) :patterns)))) {:arity arity :clauses clauses :type "function" :name name})))))
|
||||
|
||||
(define
|
||||
er-parse-function-tail
|
||||
(fn
|
||||
(st name clauses)
|
||||
(when
|
||||
(er-is? st "punct" ";")
|
||||
(let
|
||||
((save (get st :idx)))
|
||||
(er-advance! st)
|
||||
(if
|
||||
(and (= (er-cur-type st) "atom") (= (er-cur-value st) name))
|
||||
(do
|
||||
(er-advance! st)
|
||||
(append! clauses (er-parse-fun-clause st name))
|
||||
(er-parse-function-tail st name clauses))
|
||||
(dict-set! st :idx save))))))
|
||||
111
lib/erlang/parser.sx
Normal file
111
lib/erlang/parser.sx
Normal file
@@ -0,0 +1,111 @@
|
||||
;; Erlang parser — turns a token list into an AST.
|
||||
;;
|
||||
;; Shared state lives in the surrounding `let` of `er-parse-*`.
|
||||
;; All helpers use recursion (no `while` in SX).
|
||||
;;
|
||||
;; AST node shapes:
|
||||
;; {:type "atom" :value "foo"}
|
||||
;; {:type "integer" :value "42"} ; value kept as string
|
||||
;; {:type "float" :value "3.14"}
|
||||
;; {:type "string" :value "hi"}
|
||||
;; {:type "var" :name "X"} ; "_" is wildcard
|
||||
;; {:type "nil"}
|
||||
;; {:type "tuple" :elements [...]}
|
||||
;; {:type "cons" :head E :tail E}
|
||||
;; {:type "call" :fun E :args [...]}
|
||||
;; {:type "remote" :mod E :fun E}
|
||||
;; {:type "op" :op OP :args [L R]}
|
||||
;; {:type "unop" :op OP :arg E}
|
||||
;; {:type "match" :lhs P :rhs E}
|
||||
;; {:type "send" :to E :msg E}
|
||||
;; {:type "if" :clauses [{:guards [...] :body [...]} ...]}
|
||||
;; {:type "case" :expr E :clauses [{:pattern P :guards [...] :body [...]} ...]}
|
||||
;; {:type "receive" :clauses [...] :after-ms E-or-nil :after-body [...]}
|
||||
;; {:type "fun" :clauses [...]}
|
||||
;; {:type "block" :exprs [...]}
|
||||
;; {:type "try" :exprs [...] :of-clauses [...] :catch-clauses [...] :after [...]}
|
||||
;; Top-level: {:type "module" :name A :attrs [{:name A :args [...]} ...] :functions [...]}
|
||||
;; {:type "function" :name A :arity N :clauses [{:name :patterns :guards :body}]}
|
||||
|
||||
(define
|
||||
er-is-binop?
|
||||
(fn
|
||||
(tok prec)
|
||||
(let
|
||||
((ty (get tok :type)) (v (get tok :value)))
|
||||
(cond
|
||||
(= prec 0)
|
||||
(and (= ty "op") (= v "="))
|
||||
(= prec 1)
|
||||
(and (= ty "op") (= v "!"))
|
||||
(= prec 2)
|
||||
(or
|
||||
(and (= ty "keyword") (= v "orelse"))
|
||||
(and (= ty "keyword") (= v "or"))
|
||||
(and (= ty "keyword") (= v "xor")))
|
||||
(= prec 3)
|
||||
(or
|
||||
(and (= ty "keyword") (= v "andalso"))
|
||||
(and (= ty "keyword") (= v "and")))
|
||||
(= prec 4)
|
||||
(and
|
||||
(= ty "op")
|
||||
(or
|
||||
(= v "==")
|
||||
(= v "/=")
|
||||
(= v "=:=")
|
||||
(= v "=/=")
|
||||
(= v "<")
|
||||
(= v ">")
|
||||
(= v "=<")
|
||||
(= v ">=")))
|
||||
(= prec 5)
|
||||
(and (= ty "op") (or (= v "++") (= v "--")))
|
||||
(= prec 6)
|
||||
(and (= ty "op") (or (= v "+") (= v "-")))
|
||||
(= prec 7)
|
||||
(or
|
||||
(and (= ty "op") (or (= v "*") (= v "/")))
|
||||
(and
|
||||
(= ty "keyword")
|
||||
(or
|
||||
(= v "div")
|
||||
(= v "rem")
|
||||
(= v "band")
|
||||
(= v "bor")
|
||||
(= v "bxor")
|
||||
(= v "bsl")
|
||||
(= v "bsr"))))
|
||||
:else false))))
|
||||
|
||||
(define
|
||||
er-any-binop?
|
||||
(fn
|
||||
(tok min-prec)
|
||||
(or
|
||||
(and (>= 0 min-prec) (er-is-binop? tok 0))
|
||||
(and (>= 1 min-prec) (er-is-binop? tok 1))
|
||||
(and (>= 2 min-prec) (er-is-binop? tok 2))
|
||||
(and (>= 3 min-prec) (er-is-binop? tok 3))
|
||||
(and (>= 4 min-prec) (er-is-binop? tok 4))
|
||||
(and (>= 5 min-prec) (er-is-binop? tok 5))
|
||||
(and (>= 6 min-prec) (er-is-binop? tok 6))
|
||||
(and (>= 7 min-prec) (er-is-binop? tok 7)))))
|
||||
|
||||
(define
|
||||
er-slice-list
|
||||
(fn
|
||||
(xs from)
|
||||
(if
|
||||
(>= from (len xs))
|
||||
(list)
|
||||
(let
|
||||
((out (list)))
|
||||
(for-each
|
||||
(fn (i) (append! out (nth xs i)))
|
||||
(range from (len xs)))
|
||||
out))))
|
||||
|
||||
(define
|
||||
er-build-cons
|
||||
(fn (elems tail) (if (= (len elems) 0) tail {:head (nth elems 0) :tail (er-build-cons (er-slice-list elems 1) tail) :type "cons"})))
|
||||
230
lib/erlang/tests/parse.sx
Normal file
230
lib/erlang/tests/parse.sx
Normal file
@@ -0,0 +1,230 @@
|
||||
;; Erlang parser tests
|
||||
|
||||
(define er-parse-test-count 0)
|
||||
(define er-parse-test-pass 0)
|
||||
(define er-parse-test-fails (list))
|
||||
|
||||
(define
|
||||
deep=
|
||||
(fn
|
||||
(a b)
|
||||
(cond
|
||||
(and (= (type-of a) "dict") (= (type-of b) "dict"))
|
||||
(let
|
||||
((ka (sort (keys a))) (kb (sort (keys b))))
|
||||
(and (= ka kb) (every? (fn (k) (deep= (get a k) (get b k))) ka)))
|
||||
(and (= (type-of a) "list") (= (type-of b) "list"))
|
||||
(and
|
||||
(= (len a) (len b))
|
||||
(every? (fn (i) (deep= (nth a i) (nth b i))) (range 0 (len a))))
|
||||
:else (= a b))))
|
||||
|
||||
(define
|
||||
er-parse-test
|
||||
(fn
|
||||
(name actual expected)
|
||||
(set! er-parse-test-count (+ er-parse-test-count 1))
|
||||
(if
|
||||
(deep= actual expected)
|
||||
(set! er-parse-test-pass (+ er-parse-test-pass 1))
|
||||
(append! er-parse-test-fails {:actual actual :expected expected :name name}))))
|
||||
(define pe er-parse-expr)
|
||||
|
||||
;; ── literals ──────────────────────────────────────────────────────
|
||||
(define pm er-parse-module)
|
||||
|
||||
(er-parse-test "int" (pe "42") {:value "42" :type "integer"})
|
||||
|
||||
(er-parse-test "float" (pe "3.14") {:value "3.14" :type "float"})
|
||||
|
||||
(er-parse-test "atom" (pe "foo") {:value "foo" :type "atom"})
|
||||
|
||||
(er-parse-test "quoted atom" (pe "'Hello'") {:value "Hello" :type "atom"})
|
||||
|
||||
(er-parse-test "var" (pe "X") {:type "var" :name "X"})
|
||||
|
||||
(er-parse-test "wildcard" (pe "_") {:type "var" :name "_"})
|
||||
|
||||
(er-parse-test "string" (pe "\"hello\"") {:value "hello" :type "string"})
|
||||
|
||||
;; ── tuples ────────────────────────────────────────────────────────
|
||||
(er-parse-test "nil list" (pe "[]") {:type "nil"})
|
||||
|
||||
(er-parse-test "empty tuple" (pe "{}") {:elements (list) :type "tuple"})
|
||||
|
||||
(er-parse-test "pair" (pe "{ok, 1}") {:elements (list {:value "ok" :type "atom"} {:value "1" :type "integer"}) :type "tuple"})
|
||||
|
||||
;; ── lists ─────────────────────────────────────────────────────────
|
||||
(er-parse-test "triple" (pe "{a, b, c}") {:elements (list {:value "a" :type "atom"} {:value "b" :type "atom"} {:value "c" :type "atom"}) :type "tuple"})
|
||||
|
||||
(er-parse-test "list [1]" (pe "[1]") {:head {:value "1" :type "integer"} :tail {:type "nil"} :type "cons"})
|
||||
|
||||
(er-parse-test "cons [H|T]" (pe "[H|T]") {:head {:type "var" :name "H"} :tail {:type "var" :name "T"} :type "cons"})
|
||||
|
||||
;; ── operators / precedence ────────────────────────────────────────
|
||||
(er-parse-test "list [1,2]" (pe "[1,2]") {:head {:value "1" :type "integer"} :tail {:head {:value "2" :type "integer"} :tail {:type "nil"} :type "cons"} :type "cons"})
|
||||
|
||||
(er-parse-test "add" (pe "1 + 2") {:args (list {:value "1" :type "integer"} {:value "2" :type "integer"}) :type "op" :op "+"})
|
||||
|
||||
(er-parse-test "mul binds tighter" (pe "1 + 2 * 3") {:args (list {:value "1" :type "integer"} {:args (list {:value "2" :type "integer"} {:value "3" :type "integer"}) :type "op" :op "*"}) :type "op" :op "+"})
|
||||
|
||||
(er-parse-test "parens" (pe "(1 + 2) * 3") {:args (list {:args (list {:value "1" :type "integer"} {:value "2" :type "integer"}) :type "op" :op "+"} {:value "3" :type "integer"}) :type "op" :op "*"})
|
||||
|
||||
(er-parse-test "neg unary" (pe "-5") {:arg {:value "5" :type "integer"} :type "unop" :op "-"})
|
||||
|
||||
(er-parse-test "not" (pe "not X") {:arg {:type "var" :name "X"} :type "unop" :op "not"})
|
||||
|
||||
(er-parse-test "match" (pe "X = 42") {:rhs {:value "42" :type "integer"} :type "match" :lhs {:type "var" :name "X"}})
|
||||
|
||||
(er-parse-test "cmp" (pe "X > 0") {:args (list {:type "var" :name "X"} {:value "0" :type "integer"}) :type "op" :op ">"})
|
||||
|
||||
(er-parse-test "eq =:=" (pe "X =:= 1") {:args (list {:type "var" :name "X"} {:value "1" :type "integer"}) :type "op" :op "=:="})
|
||||
|
||||
(er-parse-test "send" (pe "Pid ! hello") {:msg {:value "hello" :type "atom"} :type "send" :to {:type "var" :name "Pid"}})
|
||||
|
||||
(er-parse-test "andalso" (pe "X andalso Y") {:args (list {:type "var" :name "X"} {:type "var" :name "Y"}) :type "op" :op "andalso"})
|
||||
|
||||
(er-parse-test "orelse" (pe "X orelse Y") {:args (list {:type "var" :name "X"} {:type "var" :name "Y"}) :type "op" :op "orelse"})
|
||||
|
||||
(er-parse-test "++" (pe "A ++ B") {:args (list {:type "var" :name "A"} {:type "var" :name "B"}) :type "op" :op "++"})
|
||||
|
||||
(er-parse-test "div" (pe "10 div 3") {:args (list {:value "10" :type "integer"} {:value "3" :type "integer"}) :type "op" :op "div"})
|
||||
|
||||
;; ── calls ─────────────────────────────────────────────────────────
|
||||
(er-parse-test "rem" (pe "10 rem 3") {:args (list {:value "10" :type "integer"} {:value "3" :type "integer"}) :type "op" :op "rem"})
|
||||
|
||||
(er-parse-test "local call 0-arity" (pe "self()") {:args (list) :fun {:value "self" :type "atom"} :type "call"})
|
||||
|
||||
(er-parse-test "local call 2-arg" (pe "foo(1, 2)") {:args (list {:value "1" :type "integer"} {:value "2" :type "integer"}) :fun {:value "foo" :type "atom"} :type "call"})
|
||||
|
||||
;; ── if / case / receive / fun / try ───────────────────────────────
|
||||
(er-parse-test "remote call" (pe "lists:map(F, L)") {:args (list {:type "var" :name "F"} {:type "var" :name "L"}) :fun {:fun {:value "map" :type "atom"} :mod {:value "lists" :type "atom"} :type "remote"} :type "call"})
|
||||
|
||||
(er-parse-test "if-else" (pe "if X > 0 -> pos; true -> neg end") {:clauses (list {:body (list {:value "pos" :type "atom"}) :guards (list (list {:args (list {:type "var" :name "X"} {:value "0" :type "integer"}) :type "op" :op ">"}))} {:body (list {:value "neg" :type "atom"}) :guards (list (list {:value "true" :type "atom"}))}) :type "if"})
|
||||
|
||||
(er-parse-test
|
||||
"case 2-clause"
|
||||
(pe "case X of 0 -> zero; _ -> nz end")
|
||||
{:expr {:type "var" :name "X"} :clauses (list {:pattern {:value "0" :type "integer"} :body (list {:value "zero" :type "atom"}) :guards (list)} {:pattern {:type "var" :name "_"} :body (list {:value "nz" :type "atom"}) :guards (list)}) :type "case"})
|
||||
|
||||
(er-parse-test
|
||||
"case with guard"
|
||||
(pe "case X of N when N > 0 -> pos; _ -> other end")
|
||||
{:expr {:type "var" :name "X"} :clauses (list {:pattern {:type "var" :name "N"} :body (list {:value "pos" :type "atom"}) :guards (list (list {:args (list {:type "var" :name "N"} {:value "0" :type "integer"}) :type "op" :op ">"}))} {:pattern {:type "var" :name "_"} :body (list {:value "other" :type "atom"}) :guards (list)}) :type "case"})
|
||||
|
||||
(er-parse-test "receive one clause" (pe "receive X -> X end") {:clauses (list {:pattern {:type "var" :name "X"} :body (list {:type "var" :name "X"}) :guards (list)}) :type "receive" :after-ms nil :after-body (list)})
|
||||
|
||||
(er-parse-test
|
||||
"receive after"
|
||||
(pe "receive X -> X after 1000 -> timeout end")
|
||||
{:clauses (list {:pattern {:type "var" :name "X"} :body (list {:type "var" :name "X"}) :guards (list)}) :type "receive" :after-ms {:value "1000" :type "integer"} :after-body (list {:value "timeout" :type "atom"})})
|
||||
|
||||
(er-parse-test
|
||||
"receive just after"
|
||||
(pe "receive after 0 -> ok end")
|
||||
{:clauses (list) :type "receive" :after-ms {:value "0" :type "integer"} :after-body (list {:value "ok" :type "atom"})})
|
||||
|
||||
(er-parse-test
|
||||
"anonymous fun 1-clause"
|
||||
(pe "fun (X) -> X * 2 end")
|
||||
{:clauses (list {:patterns (list {:type "var" :name "X"}) :body (list {:args (list {:type "var" :name "X"} {:value "2" :type "integer"}) :type "op" :op "*"}) :guards (list) :name nil}) :type "fun"})
|
||||
|
||||
(er-parse-test "begin/end block" (pe "begin 1, 2, 3 end") {:exprs (list {:value "1" :type "integer"} {:value "2" :type "integer"} {:value "3" :type "integer"}) :type "block"})
|
||||
|
||||
(er-parse-test "try/catch" (pe "try foo() catch error:X -> X end") {:exprs (list {:args (list) :fun {:value "foo" :type "atom"} :type "call"}) :catch-clauses (list {:pattern {:type "var" :name "X"} :body (list {:type "var" :name "X"}) :class {:value "error" :type "atom"} :guards (list)}) :type "try" :of-clauses (list) :after (list)})
|
||||
|
||||
;; ── module-level ──────────────────────────────────────────────────
|
||||
(er-parse-test
|
||||
"try catch default class"
|
||||
(pe "try foo() catch X -> X end")
|
||||
{:exprs (list {:args (list) :fun {:value "foo" :type "atom"} :type "call"}) :catch-clauses (list {:pattern {:type "var" :name "X"} :body (list {:type "var" :name "X"}) :class {:value "throw" :type "atom"} :guards (list)}) :type "try" :of-clauses (list) :after (list)})
|
||||
|
||||
(er-parse-test "minimal module" (pm "-module(m).\nfoo(X) -> X.") {:functions (list {:arity 1 :clauses (list {:patterns (list {:type "var" :name "X"}) :body (list {:type "var" :name "X"}) :guards (list) :name "foo"}) :type "function" :name "foo"}) :type "module" :attrs (list) :name "m"})
|
||||
|
||||
(er-parse-test
|
||||
"module with export"
|
||||
(let
|
||||
((m (pm "-module(m).\n-export([foo/1]).\nfoo(X) -> X.")))
|
||||
(list
|
||||
(get m :name)
|
||||
(len (get m :attrs))
|
||||
(get (nth (get m :attrs) 0) :name)
|
||||
(len (get m :functions))))
|
||||
(list "m" 1 "export" 1))
|
||||
|
||||
(er-parse-test
|
||||
"two-clause function"
|
||||
(let
|
||||
((m (pm "-module(m).\nf(0) -> z; f(N) -> n.")))
|
||||
(list (len (get (nth (get m :functions) 0) :clauses))))
|
||||
(list 2))
|
||||
|
||||
(er-parse-test
|
||||
"multi-arg function"
|
||||
(let
|
||||
((m (pm "-module(m).\nadd(X, Y) -> X + Y.")))
|
||||
(list (get (nth (get m :functions) 0) :arity)))
|
||||
(list 2))
|
||||
|
||||
(er-parse-test
|
||||
"zero-arity"
|
||||
(let
|
||||
((m (pm "-module(m).\npi() -> 3.14.")))
|
||||
(list (get (nth (get m :functions) 0) :arity)))
|
||||
(list 0))
|
||||
|
||||
(er-parse-test
|
||||
"function with guard"
|
||||
(let
|
||||
((m (pm "-module(m).\nabs(N) when N < 0 -> -N; abs(N) -> N.")))
|
||||
(list
|
||||
(len (get (nth (get m :functions) 0) :clauses))
|
||||
(len
|
||||
(get (nth (get (nth (get m :functions) 0) :clauses) 0) :guards))))
|
||||
(list 2 1))
|
||||
|
||||
;; ── combined programs ────────────────────────────────────────────
|
||||
(er-parse-test
|
||||
"three-function module"
|
||||
(let
|
||||
((m (pm "-module(m).\na() -> 1.\nb() -> 2.\nc() -> 3.")))
|
||||
(list
|
||||
(len (get m :functions))
|
||||
(get (nth (get m :functions) 0) :name)
|
||||
(get (nth (get m :functions) 1) :name)
|
||||
(get (nth (get m :functions) 2) :name)))
|
||||
(list 3 "a" "b" "c"))
|
||||
|
||||
(er-parse-test
|
||||
"factorial"
|
||||
(let
|
||||
((m (pm "-module(fact).\n-export([fact/1]).\nfact(0) -> 1;\nfact(N) -> N * fact(N - 1).")))
|
||||
(list
|
||||
(get m :name)
|
||||
(get (nth (get m :functions) 0) :arity)
|
||||
(len (get (nth (get m :functions) 0) :clauses))))
|
||||
(list "fact" 1 2))
|
||||
|
||||
(er-parse-test
|
||||
"ping-pong snippet"
|
||||
(let
|
||||
((e (pe "receive ping -> Sender ! pong end")))
|
||||
(list (get e :type) (len (get e :clauses))))
|
||||
(list "receive" 1))
|
||||
|
||||
(er-parse-test
|
||||
"case with nested tuple"
|
||||
(let
|
||||
((e (pe "case X of {ok, V} -> V; error -> 0 end")))
|
||||
(list (get e :type) (len (get e :clauses))))
|
||||
(list "case" 2))
|
||||
|
||||
;; ── summary ──────────────────────────────────────────────────────
|
||||
(er-parse-test
|
||||
"deep expression"
|
||||
(let ((e (pe "A + B * C - D / E"))) (get e :op))
|
||||
"-")
|
||||
|
||||
(define
|
||||
er-parse-test-summary
|
||||
(str "parser " er-parse-test-pass "/" er-parse-test-count))
|
||||
245
lib/erlang/tests/tokenize.sx
Normal file
245
lib/erlang/tests/tokenize.sx
Normal file
@@ -0,0 +1,245 @@
|
||||
;; Erlang tokenizer tests
|
||||
|
||||
(define er-test-count 0)
|
||||
(define er-test-pass 0)
|
||||
(define er-test-fails (list))
|
||||
|
||||
(define tok-type (fn (t) (get t :type)))
|
||||
(define tok-value (fn (t) (get t :value)))
|
||||
|
||||
(define tok-types (fn (src) (map tok-type (er-tokenize src))))
|
||||
|
||||
(define tok-values (fn (src) (map tok-value (er-tokenize src))))
|
||||
|
||||
(define
|
||||
er-test
|
||||
(fn
|
||||
(name actual expected)
|
||||
(set! er-test-count (+ er-test-count 1))
|
||||
(if
|
||||
(= actual expected)
|
||||
(set! er-test-pass (+ er-test-pass 1))
|
||||
(append! er-test-fails {:actual actual :expected expected :name name}))))
|
||||
|
||||
;; ── atoms ─────────────────────────────────────────────────────────
|
||||
(er-test "atom: bare" (tok-values "foo") (list "foo" nil))
|
||||
|
||||
(er-test
|
||||
"atom: snake_case"
|
||||
(tok-values "hello_world")
|
||||
(list "hello_world" nil))
|
||||
|
||||
(er-test
|
||||
"atom: quoted"
|
||||
(tok-values "'Hello World'")
|
||||
(list "Hello World" nil))
|
||||
|
||||
(er-test
|
||||
"atom: quoted with special chars"
|
||||
(tok-values "'foo-bar'")
|
||||
(list "foo-bar" nil))
|
||||
|
||||
(er-test "atom: with @" (tok-values "node@host") (list "node@host" nil))
|
||||
|
||||
(er-test
|
||||
"atom: type is atom"
|
||||
(tok-types "foo bar baz")
|
||||
(list "atom" "atom" "atom" "eof"))
|
||||
|
||||
;; ── variables ─────────────────────────────────────────────────────
|
||||
(er-test "var: uppercase" (tok-values "X") (list "X" nil))
|
||||
|
||||
(er-test "var: camelcase" (tok-values "FooBar") (list "FooBar" nil))
|
||||
|
||||
(er-test "var: underscore" (tok-values "_") (list "_" nil))
|
||||
|
||||
(er-test "var: _prefixed" (tok-values "_ignored") (list "_ignored" nil))
|
||||
|
||||
(er-test "var: type" (tok-types "X Y _") (list "var" "var" "var" "eof"))
|
||||
|
||||
;; ── integers ──────────────────────────────────────────────────────
|
||||
(er-test "integer: zero" (tok-values "0") (list "0" nil))
|
||||
|
||||
(er-test "integer: positive" (tok-values "42") (list "42" nil))
|
||||
|
||||
(er-test "integer: big" (tok-values "12345678") (list "12345678" nil))
|
||||
|
||||
(er-test "integer: hex" (tok-values "16#FF") (list "16#FF" nil))
|
||||
|
||||
(er-test
|
||||
"integer: type"
|
||||
(tok-types "1 2 3")
|
||||
(list "integer" "integer" "integer" "eof"))
|
||||
|
||||
(er-test "integer: char literal" (tok-types "$a") (list "integer" "eof"))
|
||||
|
||||
(er-test
|
||||
"integer: char literal escape"
|
||||
(tok-types "$\\n")
|
||||
(list "integer" "eof"))
|
||||
|
||||
;; ── floats ────────────────────────────────────────────────────────
|
||||
(er-test "float: simple" (tok-values "3.14") (list "3.14" nil))
|
||||
|
||||
(er-test "float: exponent" (tok-values "1.0e10") (list "1.0e10" nil))
|
||||
|
||||
(er-test "float: neg exponent" (tok-values "1.5e-3") (list "1.5e-3" nil))
|
||||
|
||||
(er-test "float: type" (tok-types "3.14") (list "float" "eof"))
|
||||
|
||||
;; ── strings ───────────────────────────────────────────────────────
|
||||
(er-test "string: simple" (tok-values "\"hello\"") (list "hello" nil))
|
||||
|
||||
(er-test "string: empty" (tok-values "\"\"") (list "" nil))
|
||||
|
||||
(er-test "string: escape newline" (tok-values "\"a\\nb\"") (list "a\nb" nil))
|
||||
|
||||
(er-test "string: type" (tok-types "\"hello\"") (list "string" "eof"))
|
||||
|
||||
;; ── keywords ──────────────────────────────────────────────────────
|
||||
(er-test "keyword: case" (tok-types "case") (list "keyword" "eof"))
|
||||
|
||||
(er-test
|
||||
"keyword: of end when"
|
||||
(tok-types "of end when")
|
||||
(list "keyword" "keyword" "keyword" "eof"))
|
||||
|
||||
(er-test
|
||||
"keyword: receive after"
|
||||
(tok-types "receive after")
|
||||
(list "keyword" "keyword" "eof"))
|
||||
|
||||
(er-test
|
||||
"keyword: fun try catch"
|
||||
(tok-types "fun try catch")
|
||||
(list "keyword" "keyword" "keyword" "eof"))
|
||||
|
||||
(er-test
|
||||
"keyword: andalso orelse not"
|
||||
(tok-types "andalso orelse not")
|
||||
(list "keyword" "keyword" "keyword" "eof"))
|
||||
|
||||
(er-test
|
||||
"keyword: div rem"
|
||||
(tok-types "div rem")
|
||||
(list "keyword" "keyword" "eof"))
|
||||
|
||||
;; ── punct ─────────────────────────────────────────────────────────
|
||||
(er-test "punct: parens" (tok-values "()") (list "(" ")" nil))
|
||||
|
||||
(er-test "punct: braces" (tok-values "{}") (list "{" "}" nil))
|
||||
|
||||
(er-test "punct: brackets" (tok-values "[]") (list "[" "]" nil))
|
||||
|
||||
(er-test
|
||||
"punct: commas"
|
||||
(tok-types "a,b")
|
||||
(list "atom" "punct" "atom" "eof"))
|
||||
|
||||
(er-test
|
||||
"punct: semicolon"
|
||||
(tok-types "a;b")
|
||||
(list "atom" "punct" "atom" "eof"))
|
||||
|
||||
(er-test "punct: period" (tok-types "a.") (list "atom" "punct" "eof"))
|
||||
|
||||
(er-test "punct: arrow" (tok-values "->") (list "->" nil))
|
||||
|
||||
(er-test "punct: backarrow" (tok-values "<-") (list "<-" nil))
|
||||
|
||||
(er-test "punct: binary brackets" (tok-values "<<>>") (list "<<" ">>" nil))
|
||||
|
||||
(er-test
|
||||
"punct: cons bar"
|
||||
(tok-values "[a|b]")
|
||||
(list "[" "a" "|" "b" "]" nil))
|
||||
|
||||
(er-test "punct: double-bar (list comp)" (tok-values "||") (list "||" nil))
|
||||
|
||||
(er-test "punct: double-colon" (tok-values "::") (list "::" nil))
|
||||
|
||||
(er-test
|
||||
"punct: module-colon"
|
||||
(tok-values "lists:map")
|
||||
(list "lists" ":" "map" nil))
|
||||
|
||||
;; ── operators ─────────────────────────────────────────────────────
|
||||
(er-test
|
||||
"op: plus minus times div"
|
||||
(tok-values "+ - * /")
|
||||
(list "+" "-" "*" "/" nil))
|
||||
|
||||
(er-test
|
||||
"op: eq/neq"
|
||||
(tok-values "== /= =:= =/=")
|
||||
(list "==" "/=" "=:=" "=/=" nil))
|
||||
|
||||
(er-test "op: compare" (tok-values "< > =< >=") (list "<" ">" "=<" ">=" nil))
|
||||
|
||||
(er-test "op: list ops" (tok-values "++ --") (list "++" "--" nil))
|
||||
|
||||
(er-test "op: send" (tok-values "!") (list "!" nil))
|
||||
|
||||
(er-test "op: match" (tok-values "=") (list "=" nil))
|
||||
|
||||
;; ── comments ──────────────────────────────────────────────────────
|
||||
(er-test
|
||||
"comment: ignored"
|
||||
(tok-values "x % this is a comment\ny")
|
||||
(list "x" "y" nil))
|
||||
|
||||
(er-test
|
||||
"comment: end-of-file"
|
||||
(tok-values "x % comment to eof")
|
||||
(list "x" nil))
|
||||
|
||||
;; ── combined ──────────────────────────────────────────────────────
|
||||
(er-test
|
||||
"combined: function head"
|
||||
(tok-values "foo(X, Y) -> X + Y.")
|
||||
(list "foo" "(" "X" "," "Y" ")" "->" "X" "+" "Y" "." nil))
|
||||
|
||||
(er-test
|
||||
"combined: case expression"
|
||||
(tok-values "case X of 1 -> ok; _ -> err end")
|
||||
(list "case" "X" "of" "1" "->" "ok" ";" "_" "->" "err" "end" nil))
|
||||
|
||||
(er-test
|
||||
"combined: tuple"
|
||||
(tok-values "{ok, 42}")
|
||||
(list "{" "ok" "," "42" "}" nil))
|
||||
|
||||
(er-test
|
||||
"combined: list cons"
|
||||
(tok-values "[H|T]")
|
||||
(list "[" "H" "|" "T" "]" nil))
|
||||
|
||||
(er-test
|
||||
"combined: receive"
|
||||
(tok-values "receive X -> X end")
|
||||
(list "receive" "X" "->" "X" "end" nil))
|
||||
|
||||
(er-test
|
||||
"combined: guard"
|
||||
(tok-values "when is_integer(X)")
|
||||
(list "when" "is_integer" "(" "X" ")" nil))
|
||||
|
||||
(er-test
|
||||
"combined: module attr"
|
||||
(tok-values "-module(foo).")
|
||||
(list "-" "module" "(" "foo" ")" "." nil))
|
||||
|
||||
(er-test
|
||||
"combined: send"
|
||||
(tok-values "Pid ! {self(), hello}")
|
||||
(list "Pid" "!" "{" "self" "(" ")" "," "hello" "}" nil))
|
||||
|
||||
(er-test
|
||||
"combined: whitespace skip"
|
||||
(tok-values " a \n b \t c ")
|
||||
(list "a" "b" "c" nil))
|
||||
|
||||
;; ── report ────────────────────────────────────────────────────────
|
||||
(define
|
||||
er-tokenize-test-summary
|
||||
(str "tokenizer " er-test-pass "/" er-test-count))
|
||||
334
lib/erlang/tokenizer.sx
Normal file
334
lib/erlang/tokenizer.sx
Normal file
@@ -0,0 +1,334 @@
|
||||
;; Erlang tokenizer — produces token stream from Erlang source
|
||||
;;
|
||||
;; Tokens: {:type T :value V :pos P}
|
||||
;; Types:
|
||||
;; "atom" — foo, 'Quoted Atom'
|
||||
;; "var" — X, Foo, _Bar, _ (wildcard)
|
||||
;; "integer" — 42, 16#FF, $c (char literal)
|
||||
;; "float" — 3.14, 1.0e10
|
||||
;; "string" — "..."
|
||||
;; "keyword" — case of end if when receive after fun try catch
|
||||
;; begin do let module export import define andalso orelse
|
||||
;; not div rem bnot band bor bxor bsl bsr
|
||||
;; "punct" — ( ) { } [ ] , ; . : :: -> <- <= => | ||
|
||||
;; << >>
|
||||
;; "op" — + - * / = == /= =:= =/= < > =< >= ++ -- ! ?
|
||||
;; "eof"
|
||||
|
||||
(define er-make-token (fn (type value pos) {:pos pos :value value :type type}))
|
||||
|
||||
(define er-digit? (fn (c) (and (>= c "0") (<= c "9"))))
|
||||
|
||||
(define
|
||||
er-hex-digit?
|
||||
(fn
|
||||
(c)
|
||||
(or
|
||||
(er-digit? c)
|
||||
(and (>= c "a") (<= c "f"))
|
||||
(and (>= c "A") (<= c "F")))))
|
||||
|
||||
(define er-lower? (fn (c) (and (>= c "a") (<= c "z"))))
|
||||
|
||||
(define er-upper? (fn (c) (and (>= c "A") (<= c "Z"))))
|
||||
|
||||
(define er-letter? (fn (c) (or (er-lower? c) (er-upper? c))))
|
||||
|
||||
(define
|
||||
er-ident-char?
|
||||
(fn (c) (or (er-letter? c) (er-digit? c) (= c "_") (= c "@"))))
|
||||
|
||||
(define er-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
|
||||
|
||||
;; Erlang reserved words — everything else starting lowercase is an atom
|
||||
(define
|
||||
er-keywords
|
||||
(list
|
||||
"after"
|
||||
"and"
|
||||
"andalso"
|
||||
"band"
|
||||
"begin"
|
||||
"bnot"
|
||||
"bor"
|
||||
"bsl"
|
||||
"bsr"
|
||||
"bxor"
|
||||
"case"
|
||||
"catch"
|
||||
"cond"
|
||||
"div"
|
||||
"end"
|
||||
"fun"
|
||||
"if"
|
||||
"let"
|
||||
"not"
|
||||
"of"
|
||||
"or"
|
||||
"orelse"
|
||||
"receive"
|
||||
"rem"
|
||||
"try"
|
||||
"when"
|
||||
"xor"))
|
||||
|
||||
(define er-keyword? (fn (word) (some (fn (k) (= k word)) er-keywords)))
|
||||
|
||||
(define
|
||||
er-tokenize
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
(define
|
||||
er-peek
|
||||
(fn
|
||||
(offset)
|
||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||
(define er-cur (fn () (er-peek 0)))
|
||||
(define er-advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define
|
||||
skip-ws!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (er-ws? (er-cur)))
|
||||
(er-advance! 1)
|
||||
(skip-ws!))))
|
||||
(define
|
||||
skip-comment!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (not (= (er-cur) "\n")))
|
||||
(er-advance! 1)
|
||||
(skip-comment!))))
|
||||
(define
|
||||
read-ident-chars
|
||||
(fn
|
||||
(start)
|
||||
(when
|
||||
(and (< pos src-len) (er-ident-char? (er-cur)))
|
||||
(er-advance! 1)
|
||||
(read-ident-chars start))
|
||||
(slice src start pos)))
|
||||
(define
|
||||
read-integer-digits
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (er-digit? (er-cur)))
|
||||
(er-advance! 1)
|
||||
(read-integer-digits))))
|
||||
(define
|
||||
read-hex-digits
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (er-hex-digit? (er-cur)))
|
||||
(er-advance! 1)
|
||||
(read-hex-digits))))
|
||||
(define
|
||||
read-number
|
||||
(fn
|
||||
(start)
|
||||
(read-integer-digits)
|
||||
(cond
|
||||
(and
|
||||
(< pos src-len)
|
||||
(= (er-cur) "#")
|
||||
(< (+ pos 1) src-len)
|
||||
(er-hex-digit? (er-peek 1)))
|
||||
(do (er-advance! 1) (read-hex-digits) {:value (slice src start pos) :type "integer"})
|
||||
(and
|
||||
(< pos src-len)
|
||||
(= (er-cur) ".")
|
||||
(< (+ pos 1) src-len)
|
||||
(er-digit? (er-peek 1)))
|
||||
(do
|
||||
(er-advance! 1)
|
||||
(read-integer-digits)
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(or (= (er-cur) "e") (= (er-cur) "E")))
|
||||
(er-advance! 1)
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(or (= (er-cur) "+") (= (er-cur) "-")))
|
||||
(er-advance! 1))
|
||||
(read-integer-digits))
|
||||
{:value (slice src start pos) :type "float"})
|
||||
:else {:value (slice src start pos) :type "integer"})))
|
||||
(define
|
||||
read-string
|
||||
(fn
|
||||
(quote-char)
|
||||
(let
|
||||
((chars (list)))
|
||||
(er-advance! 1)
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
(>= pos src-len)
|
||||
nil
|
||||
(= (er-cur) "\\")
|
||||
(do
|
||||
(er-advance! 1)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (er-cur)))
|
||||
(cond
|
||||
(= ch "n")
|
||||
(append! chars "\n")
|
||||
(= ch "t")
|
||||
(append! chars "\t")
|
||||
(= ch "r")
|
||||
(append! chars "\r")
|
||||
(= ch "\\")
|
||||
(append! chars "\\")
|
||||
(= ch "\"")
|
||||
(append! chars "\"")
|
||||
(= ch "'")
|
||||
(append! chars "'")
|
||||
:else (append! chars ch))
|
||||
(er-advance! 1)))
|
||||
(loop))
|
||||
(= (er-cur) quote-char)
|
||||
(er-advance! 1)
|
||||
:else (do (append! chars (er-cur)) (er-advance! 1) (loop)))))
|
||||
(loop)
|
||||
(join "" chars))))
|
||||
(define
|
||||
er-emit!
|
||||
(fn
|
||||
(type value start)
|
||||
(append! tokens (er-make-token type value start))))
|
||||
(define
|
||||
scan!
|
||||
(fn
|
||||
()
|
||||
(skip-ws!)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (er-cur)) (start pos))
|
||||
(cond
|
||||
(= ch "%")
|
||||
(do (skip-comment!) (scan!))
|
||||
(er-digit? ch)
|
||||
(do
|
||||
(let
|
||||
((tok (read-number start)))
|
||||
(er-emit! (get tok :type) (get tok :value) start))
|
||||
(scan!))
|
||||
(= ch "$")
|
||||
(do
|
||||
(er-advance! 1)
|
||||
(if
|
||||
(and (< pos src-len) (= (er-cur) "\\"))
|
||||
(do
|
||||
(er-advance! 1)
|
||||
(when (< pos src-len) (er-advance! 1)))
|
||||
(when (< pos src-len) (er-advance! 1)))
|
||||
(er-emit! "integer" (slice src start pos) start)
|
||||
(scan!))
|
||||
(er-lower? ch)
|
||||
(do
|
||||
(let
|
||||
((word (read-ident-chars start)))
|
||||
(er-emit!
|
||||
(if (er-keyword? word) "keyword" "atom")
|
||||
word
|
||||
start))
|
||||
(scan!))
|
||||
(or (er-upper? ch) (= ch "_"))
|
||||
(do
|
||||
(let
|
||||
((word (read-ident-chars start)))
|
||||
(er-emit! "var" word start))
|
||||
(scan!))
|
||||
(= ch "'")
|
||||
(do (er-emit! "atom" (read-string "'") start) (scan!))
|
||||
(= ch "\"")
|
||||
(do (er-emit! "string" (read-string "\"") start) (scan!))
|
||||
(and (= ch "<") (= (er-peek 1) "<"))
|
||||
(do (er-emit! "punct" "<<" start) (er-advance! 2) (scan!))
|
||||
(and (= ch ">") (= (er-peek 1) ">"))
|
||||
(do (er-emit! "punct" ">>" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "-") (= (er-peek 1) ">"))
|
||||
(do (er-emit! "punct" "->" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "<") (= (er-peek 1) "-"))
|
||||
(do (er-emit! "punct" "<-" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "<") (= (er-peek 1) "="))
|
||||
(do (er-emit! "punct" "<=" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "=") (= (er-peek 1) ">"))
|
||||
(do (er-emit! "punct" "=>" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "=") (= (er-peek 1) ":") (= (er-peek 2) "="))
|
||||
(do (er-emit! "op" "=:=" start) (er-advance! 3) (scan!))
|
||||
(and (= ch "=") (= (er-peek 1) "/") (= (er-peek 2) "="))
|
||||
(do (er-emit! "op" "=/=" start) (er-advance! 3) (scan!))
|
||||
(and (= ch "=") (= (er-peek 1) "="))
|
||||
(do (er-emit! "op" "==" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "/") (= (er-peek 1) "="))
|
||||
(do (er-emit! "op" "/=" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "=") (= (er-peek 1) "<"))
|
||||
(do (er-emit! "op" "=<" start) (er-advance! 2) (scan!))
|
||||
(and (= ch ">") (= (er-peek 1) "="))
|
||||
(do (er-emit! "op" ">=" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "+") (= (er-peek 1) "+"))
|
||||
(do (er-emit! "op" "++" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "-") (= (er-peek 1) "-"))
|
||||
(do (er-emit! "op" "--" start) (er-advance! 2) (scan!))
|
||||
(and (= ch ":") (= (er-peek 1) ":"))
|
||||
(do (er-emit! "punct" "::" start) (er-advance! 2) (scan!))
|
||||
(and (= ch "|") (= (er-peek 1) "|"))
|
||||
(do (er-emit! "punct" "||" start) (er-advance! 2) (scan!))
|
||||
(= ch "(")
|
||||
(do (er-emit! "punct" "(" start) (er-advance! 1) (scan!))
|
||||
(= ch ")")
|
||||
(do (er-emit! "punct" ")" start) (er-advance! 1) (scan!))
|
||||
(= ch "{")
|
||||
(do (er-emit! "punct" "{" start) (er-advance! 1) (scan!))
|
||||
(= ch "}")
|
||||
(do (er-emit! "punct" "}" start) (er-advance! 1) (scan!))
|
||||
(= ch "[")
|
||||
(do (er-emit! "punct" "[" start) (er-advance! 1) (scan!))
|
||||
(= ch "]")
|
||||
(do (er-emit! "punct" "]" start) (er-advance! 1) (scan!))
|
||||
(= ch ",")
|
||||
(do (er-emit! "punct" "," start) (er-advance! 1) (scan!))
|
||||
(= ch ";")
|
||||
(do (er-emit! "punct" ";" start) (er-advance! 1) (scan!))
|
||||
(= ch ".")
|
||||
(do (er-emit! "punct" "." start) (er-advance! 1) (scan!))
|
||||
(= ch ":")
|
||||
(do (er-emit! "punct" ":" start) (er-advance! 1) (scan!))
|
||||
(= ch "|")
|
||||
(do (er-emit! "punct" "|" start) (er-advance! 1) (scan!))
|
||||
(= ch "+")
|
||||
(do (er-emit! "op" "+" start) (er-advance! 1) (scan!))
|
||||
(= ch "-")
|
||||
(do (er-emit! "op" "-" start) (er-advance! 1) (scan!))
|
||||
(= ch "*")
|
||||
(do (er-emit! "op" "*" start) (er-advance! 1) (scan!))
|
||||
(= ch "/")
|
||||
(do (er-emit! "op" "/" start) (er-advance! 1) (scan!))
|
||||
(= ch "=")
|
||||
(do (er-emit! "op" "=" start) (er-advance! 1) (scan!))
|
||||
(= ch "<")
|
||||
(do (er-emit! "op" "<" start) (er-advance! 1) (scan!))
|
||||
(= ch ">")
|
||||
(do (er-emit! "op" ">" start) (er-advance! 1) (scan!))
|
||||
(= ch "!")
|
||||
(do (er-emit! "op" "!" start) (er-advance! 1) (scan!))
|
||||
(= ch "?")
|
||||
(do (er-emit! "op" "?" start) (er-advance! 1) (scan!))
|
||||
:else (do (er-advance! 1) (scan!)))))))
|
||||
(scan!)
|
||||
(er-emit! "eof" nil pos)
|
||||
tokens)))
|
||||
Reference in New Issue
Block a user