diff --git a/lib/erlang/parser-core.sx b/lib/erlang/parser-core.sx new file mode 100644 index 00000000..e99507d5 --- /dev/null +++ b/lib/erlang/parser-core.sx @@ -0,0 +1,49 @@ +;; Core parser helpers — shared by er-parse-expr and er-parse-module. +;; Everything reads/mutates a parser state dict: +;; {:toks TOKS :idx INDEX} + +(define er-state-make (fn (toks) {:idx 0 :toks toks})) + +(define + er-peek + (fn + (st offset) + (let + ((toks (get st :toks)) (idx (+ (get st :idx) offset))) + (if (< idx (len toks)) (nth toks idx) (nth toks (- (len toks) 1)))))) + +(define er-cur (fn (st) (er-peek st 0))) + +(define er-cur-type (fn (st) (get (er-cur st) :type))) +(define er-cur-value (fn (st) (get (er-cur st) :value))) + +(define er-advance! (fn (st) (dict-set! st :idx (+ (get st :idx) 1)))) + +(define er-at-eof? (fn (st) (= (er-cur-type st) "eof"))) + +(define + er-is? + (fn + (st type value) + (and + (= (er-cur-type st) type) + (or (= value nil) (= (er-cur-value st) value))))) + +(define + er-expect! + (fn + (st type value) + (if + (er-is? st type value) + (let ((t (er-cur st))) (er-advance! st) t) + (error + (str + "Erlang parse: expected " + type + (if value (str " '" value "'") "") + " but got " + (er-cur-type st) + " '" + (er-cur-value st) + "' at pos " + (get (er-cur st) :pos)))))) diff --git a/lib/erlang/parser-expr.sx b/lib/erlang/parser-expr.sx new file mode 100644 index 00000000..afdf6094 --- /dev/null +++ b/lib/erlang/parser-expr.sx @@ -0,0 +1,534 @@ +;; Erlang expression parser — top-level fns operating on parser state. +;; Depends on parser-core.sx (er-state-*, er-cur-*, er-is?, er-expect!) +;; and parser.sx (er-is-binop?, er-any-binop?, er-build-cons, er-slice-list). + +;; ── entry point ─────────────────────────────────────────────────── +(define + er-parse-expr + (fn + (src) + (let + ((st (er-state-make (er-tokenize src)))) + (er-parse-expr-prec st 0)))) + +;; Pratt-like operator-precedence parser. +(define + er-parse-expr-prec + (fn + (st min-prec) + (let + ((left (er-parse-unary st))) + (er-parse-expr-loop st min-prec left)))) + +(define + er-parse-expr-loop + (fn + (st min-prec left) + (if + (er-any-binop? (er-cur st) min-prec) + (let + ((tok (er-cur st))) + (cond + (er-is-binop? tok 0) + (do (er-advance! st) (er-parse-expr-loop st min-prec {:rhs (er-parse-expr-prec st 0) :type "match" :lhs left})) + (er-is-binop? tok 1) + (do (er-advance! st) (er-parse-expr-loop st min-prec {:msg (er-parse-expr-prec st 1) :type "send" :to left})) + (er-is-binop? tok 2) + (let + ((op (get tok :value))) + (er-advance! st) + (er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 3)) :type "op" :op op})) + (er-is-binop? tok 3) + (let + ((op (get tok :value))) + (er-advance! st) + (er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 4)) :type "op" :op op})) + (er-is-binop? tok 4) + (let + ((op (get tok :value))) + (er-advance! st) + (er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 5)) :type "op" :op op})) + (er-is-binop? tok 5) + (let + ((op (get tok :value))) + (er-advance! st) + (er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 5)) :type "op" :op op})) + (er-is-binop? tok 6) + (let + ((op (get tok :value))) + (er-advance! st) + (er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 7)) :type "op" :op op})) + (er-is-binop? tok 7) + (let + ((op (get tok :value))) + (er-advance! st) + (er-parse-expr-loop st min-prec {:args (list left (er-parse-expr-prec st 8)) :type "op" :op op})) + :else left)) + left))) + +(define + er-parse-unary + (fn + (st) + (cond + (er-is? st "op" "-") + (do (er-advance! st) {:arg (er-parse-unary st) :type "unop" :op "-"}) + (er-is? st "op" "+") + (do (er-advance! st) (er-parse-unary st)) + (er-is? st "keyword" "not") + (do (er-advance! st) {:arg (er-parse-unary st) :type "unop" :op "not"}) + (er-is? st "keyword" "bnot") + (do (er-advance! st) {:arg (er-parse-unary st) :type "unop" :op "bnot"}) + :else (er-parse-postfix st)))) + +(define + er-parse-postfix + (fn (st) (er-parse-postfix-loop st (er-parse-primary st)))) + +(define + er-parse-postfix-loop + (fn + (st node) + (cond + (er-is? st "punct" ":") + (do + (er-advance! st) + (let + ((rhs (er-parse-primary st))) + (er-parse-postfix-loop st {:fun rhs :mod node :type "remote"}))) + (er-is? st "punct" "(") + (let + ((args (er-parse-call-args st))) + (er-parse-postfix-loop st {:args args :fun node :type "call"})) + :else node))) + +(define + er-parse-call-args + (fn + (st) + (er-expect! st "punct" "(") + (if + (er-is? st "punct" ")") + (do (er-advance! st) (list)) + (let + ((args (list (er-parse-expr-prec st 0)))) + (er-parse-args-tail st args))))) + +(define + er-parse-args-tail + (fn + (st args) + (cond + (er-is? st "punct" ",") + (do + (er-advance! st) + (append! args (er-parse-expr-prec st 0)) + (er-parse-args-tail st args)) + (er-is? st "punct" ")") + (do (er-advance! st) args) + :else (error + (str + "Erlang parse: expected ',' or ')' in args, got '" + (er-cur-value st) + "'"))))) + +;; A body is: Expr {, Expr} +(define + er-parse-body + (fn + (st) + (let + ((exprs (list (er-parse-expr-prec st 0)))) + (er-parse-body-tail st exprs)))) + +(define + er-parse-body-tail + (fn + (st exprs) + (if + (er-is? st "punct" ",") + (do + (er-advance! st) + (append! exprs (er-parse-expr-prec st 0)) + (er-parse-body-tail st exprs)) + exprs))) + +;; Guards: G1 ; G2 ; ... where each Gi is a guard-conj (T, T, ...) +(define + er-parse-guards + (fn + (st) + (let + ((alts (list (er-parse-guard-conj st)))) + (er-parse-guards-tail st alts)))) + +(define + er-parse-guards-tail + (fn + (st alts) + (if + (er-is? st "punct" ";") + (do + (er-advance! st) + (append! alts (er-parse-guard-conj st)) + (er-parse-guards-tail st alts)) + alts))) + +(define + er-parse-guard-conj + (fn + (st) + (let + ((ts (list (er-parse-expr-prec st 0)))) + (er-parse-guard-conj-tail st ts)))) + +(define + er-parse-guard-conj-tail + (fn + (st ts) + (if + (er-is? st "punct" ",") + (do + (er-advance! st) + (append! ts (er-parse-expr-prec st 0)) + (er-parse-guard-conj-tail st ts)) + ts))) + +(define er-parse-pattern (fn (st) (er-parse-expr-prec st 0))) + +;; ── primary expressions ────────────────────────────────────────── +(define + er-parse-primary + (fn + (st) + (let + ((tok (er-cur st))) + (cond + (= (er-cur-type st) "integer") + (do (er-advance! st) {:value (get tok :value) :type "integer"}) + (= (er-cur-type st) "float") + (do (er-advance! st) {:value (get tok :value) :type "float"}) + (= (er-cur-type st) "string") + (do (er-advance! st) {:value (get tok :value) :type "string"}) + (= (er-cur-type st) "atom") + (do (er-advance! st) {:value (get tok :value) :type "atom"}) + (= (er-cur-type st) "var") + (do (er-advance! st) {:type "var" :name (get tok :value)}) + (er-is? st "punct" "(") + (do + (er-advance! st) + (let + ((e (er-parse-expr-prec st 0))) + (er-expect! st "punct" ")") + e)) + (er-is? st "punct" "{") + (er-parse-tuple st) + (er-is? st "punct" "[") + (er-parse-list st) + (er-is? st "keyword" "if") + (er-parse-if st) + (er-is? st "keyword" "case") + (er-parse-case st) + (er-is? st "keyword" "receive") + (er-parse-receive st) + (er-is? st "keyword" "begin") + (er-parse-begin st) + (er-is? st "keyword" "fun") + (er-parse-fun-expr st) + (er-is? st "keyword" "try") + (er-parse-try st) + :else (error + (str + "Erlang parse: unexpected " + (er-cur-type st) + " '" + (get tok :value) + "' at pos " + (get tok :pos))))))) + +(define + er-parse-tuple + (fn + (st) + (er-expect! st "punct" "{") + (if + (er-is? st "punct" "}") + (do (er-advance! st) {:elements (list) :type "tuple"}) + (let + ((elems (list (er-parse-expr-prec st 0)))) + (er-parse-tuple-tail st elems))))) + +(define + er-parse-tuple-tail + (fn + (st elems) + (cond + (er-is? st "punct" ",") + (do + (er-advance! st) + (append! elems (er-parse-expr-prec st 0)) + (er-parse-tuple-tail st elems)) + (er-is? st "punct" "}") + (do (er-advance! st) {:elements elems :type "tuple"}) + :else (error + (str + "Erlang parse: expected ',' or '}' in tuple, got '" + (er-cur-value st) + "'"))))) + +(define + er-parse-list + (fn + (st) + (er-expect! st "punct" "[") + (if + (er-is? st "punct" "]") + (do (er-advance! st) {:type "nil"}) + (let + ((elems (list (er-parse-expr-prec st 0)))) + (er-parse-list-tail st elems))))) + +(define + er-parse-list-tail + (fn + (st elems) + (cond + (er-is? st "punct" ",") + (do + (er-advance! st) + (append! elems (er-parse-expr-prec st 0)) + (er-parse-list-tail st elems)) + (er-is? st "punct" "|") + (do + (er-advance! st) + (let + ((tail (er-parse-expr-prec st 0))) + (er-expect! st "punct" "]") + (er-build-cons elems tail))) + (er-is? st "punct" "]") + (do (er-advance! st) (er-build-cons elems {:type "nil"})) + :else (error + (str + "Erlang parse: expected ',' '|' or ']' in list, got '" + (er-cur-value st) + "'"))))) + +;; ── if ────────────────────────────────────────────────────────── +(define + er-parse-if + (fn + (st) + (er-expect! st "keyword" "if") + (let + ((clauses (list (er-parse-if-clause st)))) + (er-parse-if-tail st clauses)))) + +(define + er-parse-if-tail + (fn + (st clauses) + (if + (er-is? st "punct" ";") + (do + (er-advance! st) + (append! clauses (er-parse-if-clause st)) + (er-parse-if-tail st clauses)) + (do (er-expect! st "keyword" "end") {:clauses clauses :type "if"})))) + +(define + er-parse-if-clause + (fn + (st) + (let + ((guards (er-parse-guards st))) + (er-expect! st "punct" "->") + (let ((body (er-parse-body st))) {:body body :guards guards})))) + +;; ── case ──────────────────────────────────────────────────────── +(define + er-parse-case + (fn + (st) + (er-expect! st "keyword" "case") + (let + ((e (er-parse-expr-prec st 0))) + (er-expect! st "keyword" "of") + (let + ((clauses (list (er-parse-case-clause st)))) + (er-parse-case-tail st e clauses))))) + +(define + er-parse-case-tail + (fn + (st e clauses) + (if + (er-is? st "punct" ";") + (do + (er-advance! st) + (append! clauses (er-parse-case-clause st)) + (er-parse-case-tail st e clauses)) + (do (er-expect! st "keyword" "end") {:expr e :clauses clauses :type "case"})))) + +(define + er-parse-case-clause + (fn + (st) + (let + ((pat (er-parse-pattern st))) + (let + ((guards (if (er-is? st "keyword" "when") (do (er-advance! st) (er-parse-guards st)) (list)))) + (er-expect! st "punct" "->") + (let ((body (er-parse-body st))) {:pattern pat :body body :guards guards}))))) + +;; ── receive ───────────────────────────────────────────────────── +(define + er-parse-receive + (fn + (st) + (er-expect! st "keyword" "receive") + (let + ((clauses (if (er-is? st "keyword" "after") (list) (list (er-parse-case-clause st))))) + (er-parse-receive-clauses st clauses)))) + +(define + er-parse-receive-clauses + (fn + (st clauses) + (cond + (er-is? st "punct" ";") + (do + (er-advance! st) + (append! clauses (er-parse-case-clause st)) + (er-parse-receive-clauses st clauses)) + (er-is? st "keyword" "after") + (do + (er-advance! st) + (let + ((after-ms (er-parse-expr-prec st 0))) + (er-expect! st "punct" "->") + (let + ((after-body (er-parse-body st))) + (er-expect! st "keyword" "end") + {:clauses clauses :type "receive" :after-ms after-ms :after-body after-body}))) + :else (do (er-expect! st "keyword" "end") {:clauses clauses :type "receive" :after-ms nil :after-body (list)})))) + +(define + er-parse-begin + (fn + (st) + (er-expect! st "keyword" "begin") + (let + ((exprs (er-parse-body st))) + (er-expect! st "keyword" "end") + {:exprs exprs :type "block"}))) + +(define + er-parse-fun-expr + (fn + (st) + (er-expect! st "keyword" "fun") + (cond + (er-is? st "punct" "(") + (let + ((clauses (list (er-parse-fun-clause st nil)))) + (er-parse-fun-expr-tail st clauses)) + :else (error "Erlang parse: fun-ref syntax not yet supported")))) + +(define + er-parse-fun-expr-tail + (fn + (st clauses) + (if + (er-is? st "punct" ";") + (do + (er-advance! st) + (append! clauses (er-parse-fun-clause st nil)) + (er-parse-fun-expr-tail st clauses)) + (do (er-expect! st "keyword" "end") {:clauses clauses :type "fun"})))) + +(define + er-parse-fun-clause + (fn + (st named-name) + (er-expect! st "punct" "(") + (let + ((patterns (if (er-is? st "punct" ")") (list) (er-parse-pattern-list st (list (er-parse-pattern st)))))) + (er-expect! st "punct" ")") + (let + ((guards (if (er-is? st "keyword" "when") (do (er-advance! st) (er-parse-guards st)) (list)))) + (er-expect! st "punct" "->") + (let ((body (er-parse-body st))) {:patterns patterns :body body :guards guards :name named-name}))))) + +(define + er-parse-pattern-list + (fn + (st pats) + (if + (er-is? st "punct" ",") + (do + (er-advance! st) + (append! pats (er-parse-pattern st)) + (er-parse-pattern-list st pats)) + pats))) + +;; ── try ───────────────────────────────────────────────────────── +(define + er-parse-try + (fn + (st) + (er-expect! st "keyword" "try") + (let + ((exprs (er-parse-body st)) + (of-clauses (list)) + (catch-clauses (list)) + (after-body (list))) + (when + (er-is? st "keyword" "of") + (er-advance! st) + (append! of-clauses (er-parse-case-clause st)) + (er-parse-try-of-tail st of-clauses)) + (when + (er-is? st "keyword" "catch") + (er-advance! st) + (append! catch-clauses (er-parse-catch-clause st)) + (er-parse-try-catch-tail st catch-clauses)) + (when + (er-is? st "keyword" "after") + (er-advance! st) + (set! after-body (er-parse-body st))) + (er-expect! st "keyword" "end") + {:exprs exprs :catch-clauses catch-clauses :type "try" :of-clauses of-clauses :after after-body}))) + +(define + er-parse-try-of-tail + (fn + (st clauses) + (when + (er-is? st "punct" ";") + (er-advance! st) + (append! clauses (er-parse-case-clause st)) + (er-parse-try-of-tail st clauses)))) + +(define + er-parse-try-catch-tail + (fn + (st clauses) + (when + (er-is? st "punct" ";") + (er-advance! st) + (append! clauses (er-parse-catch-clause st)) + (er-parse-try-catch-tail st clauses)))) + +(define + er-parse-catch-clause + (fn + (st) + (let + ((p1 (er-parse-pattern st))) + (let + ((klass (if (= (get p1 :type) "remote") (get p1 :mod) {:value "throw" :type "atom"})) + (pat (if (= (get p1 :type) "remote") (get p1 :fun) p1))) + (let + ((guards (if (er-is? st "keyword" "when") (do (er-advance! st) (er-parse-guards st)) (list)))) + (er-expect! st "punct" "->") + (let ((body (er-parse-body st))) {:pattern pat :body body :class klass :guards guards})))))) diff --git a/lib/erlang/parser-module.sx b/lib/erlang/parser-module.sx new file mode 100644 index 00000000..00109abb --- /dev/null +++ b/lib/erlang/parser-module.sx @@ -0,0 +1,113 @@ +;; Erlang module parser — reads top-level forms and builds a module AST. +;; +;; Depends on parser-core.sx, parser.sx, parser-expr.sx. + +(define + er-parse-module + (fn + (src) + (let + ((st (er-state-make (er-tokenize src))) + (mod-ref (list nil)) + (attrs (list)) + (functions (list))) + (er-parse-module-loop st mod-ref attrs functions) + {:functions functions :type "module" :attrs attrs :name (nth mod-ref 0)}))) + +(define + er-parse-module-loop + (fn + (st mod-ref attrs functions) + (when + (not (er-at-eof? st)) + (er-parse-top-form st mod-ref attrs functions) + (er-parse-module-loop st mod-ref attrs functions)))) + +(define + er-parse-top-form + (fn + (st mod-ref attrs functions) + (cond + (er-is? st "op" "-") + (do + (er-advance! st) + (let + ((attr-name (er-cur-value st))) + (er-advance! st) + (let + ((args (er-parse-attr-args st))) + (er-expect! st "punct" ".") + (cond + (= attr-name "module") + (set-nth! mod-ref 0 (get (nth args 0) :value)) + :else (append! attrs {:args args :name attr-name}))))) + (= (er-cur-type st) "atom") + (append! functions (er-parse-function st)) + :else (error + (str + "Erlang parse (top): unexpected " + (er-cur-type st) + " '" + (er-cur-value st) + "' at pos " + (get (er-cur st) :pos)))))) + +(define + er-parse-attr-args + (fn + (st) + (er-expect! st "punct" "(") + (if + (er-is? st "punct" ")") + (do (er-advance! st) (list)) + (let + ((args (list (er-parse-attr-arg st)))) + (er-parse-attr-args-tail st args))))) + +(define + er-parse-attr-args-tail + (fn + (st args) + (cond + (er-is? st "punct" ",") + (do + (er-advance! st) + (append! args (er-parse-attr-arg st)) + (er-parse-attr-args-tail st args)) + (er-is? st "punct" ")") + (do (er-advance! st) args) + :else (error (str "Erlang parse attr: got '" (er-cur-value st) "'"))))) + +;; Attribute args often contain `Name/Arity` pairs — parse as a +;; general expression so the caller can interpret the shape. +(define er-parse-attr-arg (fn (st) (er-parse-expr-prec st 0))) + +(define + er-parse-function + (fn + (st) + (let + ((name (er-cur-value st))) + (er-advance! st) + (let + ((clauses (list (er-parse-fun-clause st name)))) + (er-parse-function-tail st name clauses) + (er-expect! st "punct" ".") + (let ((arity (len (get (nth clauses 0) :patterns)))) {:arity arity :clauses clauses :type "function" :name name}))))) + +(define + er-parse-function-tail + (fn + (st name clauses) + (when + (er-is? st "punct" ";") + (let + ((save (get st :idx))) + (er-advance! st) + (if + (and (= (er-cur-type st) "atom") (= (er-cur-value st) name)) + (do + (er-advance! st) + (append! clauses (er-parse-fun-clause st name)) + (er-parse-function-tail st name clauses)) + (dict-set! st :idx save)))))) diff --git a/lib/erlang/parser.sx b/lib/erlang/parser.sx new file mode 100644 index 00000000..a3ee9a07 --- /dev/null +++ b/lib/erlang/parser.sx @@ -0,0 +1,111 @@ +;; Erlang parser — turns a token list into an AST. +;; +;; Shared state lives in the surrounding `let` of `er-parse-*`. +;; All helpers use recursion (no `while` in SX). +;; +;; AST node shapes: +;; {:type "atom" :value "foo"} +;; {:type "integer" :value "42"} ; value kept as string +;; {:type "float" :value "3.14"} +;; {:type "string" :value "hi"} +;; {:type "var" :name "X"} ; "_" is wildcard +;; {:type "nil"} +;; {:type "tuple" :elements [...]} +;; {:type "cons" :head E :tail E} +;; {:type "call" :fun E :args [...]} +;; {:type "remote" :mod E :fun E} +;; {:type "op" :op OP :args [L R]} +;; {:type "unop" :op OP :arg E} +;; {:type "match" :lhs P :rhs E} +;; {:type "send" :to E :msg E} +;; {:type "if" :clauses [{:guards [...] :body [...]} ...]} +;; {:type "case" :expr E :clauses [{:pattern P :guards [...] :body [...]} ...]} +;; {:type "receive" :clauses [...] :after-ms E-or-nil :after-body [...]} +;; {:type "fun" :clauses [...]} +;; {:type "block" :exprs [...]} +;; {:type "try" :exprs [...] :of-clauses [...] :catch-clauses [...] :after [...]} +;; Top-level: {:type "module" :name A :attrs [{:name A :args [...]} ...] :functions [...]} +;; {:type "function" :name A :arity N :clauses [{:name :patterns :guards :body}]} + +(define + er-is-binop? + (fn + (tok prec) + (let + ((ty (get tok :type)) (v (get tok :value))) + (cond + (= prec 0) + (and (= ty "op") (= v "=")) + (= prec 1) + (and (= ty "op") (= v "!")) + (= prec 2) + (or + (and (= ty "keyword") (= v "orelse")) + (and (= ty "keyword") (= v "or")) + (and (= ty "keyword") (= v "xor"))) + (= prec 3) + (or + (and (= ty "keyword") (= v "andalso")) + (and (= ty "keyword") (= v "and"))) + (= prec 4) + (and + (= ty "op") + (or + (= v "==") + (= v "/=") + (= v "=:=") + (= v "=/=") + (= v "<") + (= v ">") + (= v "=<") + (= v ">="))) + (= prec 5) + (and (= ty "op") (or (= v "++") (= v "--"))) + (= prec 6) + (and (= ty "op") (or (= v "+") (= v "-"))) + (= prec 7) + (or + (and (= ty "op") (or (= v "*") (= v "/"))) + (and + (= ty "keyword") + (or + (= v "div") + (= v "rem") + (= v "band") + (= v "bor") + (= v "bxor") + (= v "bsl") + (= v "bsr")))) + :else false)))) + +(define + er-any-binop? + (fn + (tok min-prec) + (or + (and (>= 0 min-prec) (er-is-binop? tok 0)) + (and (>= 1 min-prec) (er-is-binop? tok 1)) + (and (>= 2 min-prec) (er-is-binop? tok 2)) + (and (>= 3 min-prec) (er-is-binop? tok 3)) + (and (>= 4 min-prec) (er-is-binop? tok 4)) + (and (>= 5 min-prec) (er-is-binop? tok 5)) + (and (>= 6 min-prec) (er-is-binop? tok 6)) + (and (>= 7 min-prec) (er-is-binop? tok 7))))) + +(define + er-slice-list + (fn + (xs from) + (if + (>= from (len xs)) + (list) + (let + ((out (list))) + (for-each + (fn (i) (append! out (nth xs i))) + (range from (len xs))) + out)))) + +(define + er-build-cons + (fn (elems tail) (if (= (len elems) 0) tail {:head (nth elems 0) :tail (er-build-cons (er-slice-list elems 1) tail) :type "cons"}))) diff --git a/lib/erlang/tests/parse.sx b/lib/erlang/tests/parse.sx new file mode 100644 index 00000000..697c3450 --- /dev/null +++ b/lib/erlang/tests/parse.sx @@ -0,0 +1,230 @@ +;; Erlang parser tests + +(define er-parse-test-count 0) +(define er-parse-test-pass 0) +(define er-parse-test-fails (list)) + +(define + deep= + (fn + (a b) + (cond + (and (= (type-of a) "dict") (= (type-of b) "dict")) + (let + ((ka (sort (keys a))) (kb (sort (keys b)))) + (and (= ka kb) (every? (fn (k) (deep= (get a k) (get b k))) ka))) + (and (= (type-of a) "list") (= (type-of b) "list")) + (and + (= (len a) (len b)) + (every? (fn (i) (deep= (nth a i) (nth b i))) (range 0 (len a)))) + :else (= a b)))) + +(define + er-parse-test + (fn + (name actual expected) + (set! er-parse-test-count (+ er-parse-test-count 1)) + (if + (deep= actual expected) + (set! er-parse-test-pass (+ er-parse-test-pass 1)) + (append! er-parse-test-fails {:actual actual :expected expected :name name})))) +(define pe er-parse-expr) + +;; ── literals ────────────────────────────────────────────────────── +(define pm er-parse-module) + +(er-parse-test "int" (pe "42") {:value "42" :type "integer"}) + +(er-parse-test "float" (pe "3.14") {:value "3.14" :type "float"}) + +(er-parse-test "atom" (pe "foo") {:value "foo" :type "atom"}) + +(er-parse-test "quoted atom" (pe "'Hello'") {:value "Hello" :type "atom"}) + +(er-parse-test "var" (pe "X") {:type "var" :name "X"}) + +(er-parse-test "wildcard" (pe "_") {:type "var" :name "_"}) + +(er-parse-test "string" (pe "\"hello\"") {:value "hello" :type "string"}) + +;; ── tuples ──────────────────────────────────────────────────────── +(er-parse-test "nil list" (pe "[]") {:type "nil"}) + +(er-parse-test "empty tuple" (pe "{}") {:elements (list) :type "tuple"}) + +(er-parse-test "pair" (pe "{ok, 1}") {:elements (list {:value "ok" :type "atom"} {:value "1" :type "integer"}) :type "tuple"}) + +;; ── lists ───────────────────────────────────────────────────────── +(er-parse-test "triple" (pe "{a, b, c}") {:elements (list {:value "a" :type "atom"} {:value "b" :type "atom"} {:value "c" :type "atom"}) :type "tuple"}) + +(er-parse-test "list [1]" (pe "[1]") {:head {:value "1" :type "integer"} :tail {:type "nil"} :type "cons"}) + +(er-parse-test "cons [H|T]" (pe "[H|T]") {:head {:type "var" :name "H"} :tail {:type "var" :name "T"} :type "cons"}) + +;; ── operators / precedence ──────────────────────────────────────── +(er-parse-test "list [1,2]" (pe "[1,2]") {:head {:value "1" :type "integer"} :tail {:head {:value "2" :type "integer"} :tail {:type "nil"} :type "cons"} :type "cons"}) + +(er-parse-test "add" (pe "1 + 2") {:args (list {:value "1" :type "integer"} {:value "2" :type "integer"}) :type "op" :op "+"}) + +(er-parse-test "mul binds tighter" (pe "1 + 2 * 3") {:args (list {:value "1" :type "integer"} {:args (list {:value "2" :type "integer"} {:value "3" :type "integer"}) :type "op" :op "*"}) :type "op" :op "+"}) + +(er-parse-test "parens" (pe "(1 + 2) * 3") {:args (list {:args (list {:value "1" :type "integer"} {:value "2" :type "integer"}) :type "op" :op "+"} {:value "3" :type "integer"}) :type "op" :op "*"}) + +(er-parse-test "neg unary" (pe "-5") {:arg {:value "5" :type "integer"} :type "unop" :op "-"}) + +(er-parse-test "not" (pe "not X") {:arg {:type "var" :name "X"} :type "unop" :op "not"}) + +(er-parse-test "match" (pe "X = 42") {:rhs {:value "42" :type "integer"} :type "match" :lhs {:type "var" :name "X"}}) + +(er-parse-test "cmp" (pe "X > 0") {:args (list {:type "var" :name "X"} {:value "0" :type "integer"}) :type "op" :op ">"}) + +(er-parse-test "eq =:=" (pe "X =:= 1") {:args (list {:type "var" :name "X"} {:value "1" :type "integer"}) :type "op" :op "=:="}) + +(er-parse-test "send" (pe "Pid ! hello") {:msg {:value "hello" :type "atom"} :type "send" :to {:type "var" :name "Pid"}}) + +(er-parse-test "andalso" (pe "X andalso Y") {:args (list {:type "var" :name "X"} {:type "var" :name "Y"}) :type "op" :op "andalso"}) + +(er-parse-test "orelse" (pe "X orelse Y") {:args (list {:type "var" :name "X"} {:type "var" :name "Y"}) :type "op" :op "orelse"}) + +(er-parse-test "++" (pe "A ++ B") {:args (list {:type "var" :name "A"} {:type "var" :name "B"}) :type "op" :op "++"}) + +(er-parse-test "div" (pe "10 div 3") {:args (list {:value "10" :type "integer"} {:value "3" :type "integer"}) :type "op" :op "div"}) + +;; ── calls ───────────────────────────────────────────────────────── +(er-parse-test "rem" (pe "10 rem 3") {:args (list {:value "10" :type "integer"} {:value "3" :type "integer"}) :type "op" :op "rem"}) + +(er-parse-test "local call 0-arity" (pe "self()") {:args (list) :fun {:value "self" :type "atom"} :type "call"}) + +(er-parse-test "local call 2-arg" (pe "foo(1, 2)") {:args (list {:value "1" :type "integer"} {:value "2" :type "integer"}) :fun {:value "foo" :type "atom"} :type "call"}) + +;; ── if / case / receive / fun / try ─────────────────────────────── +(er-parse-test "remote call" (pe "lists:map(F, L)") {:args (list {:type "var" :name "F"} {:type "var" :name "L"}) :fun {:fun {:value "map" :type "atom"} :mod {:value "lists" :type "atom"} :type "remote"} :type "call"}) + +(er-parse-test "if-else" (pe "if X > 0 -> pos; true -> neg end") {:clauses (list {:body (list {:value "pos" :type "atom"}) :guards (list (list {:args (list {:type "var" :name "X"} {:value "0" :type "integer"}) :type "op" :op ">"}))} {:body (list {:value "neg" :type "atom"}) :guards (list (list {:value "true" :type "atom"}))}) :type "if"}) + +(er-parse-test + "case 2-clause" + (pe "case X of 0 -> zero; _ -> nz end") + {:expr {:type "var" :name "X"} :clauses (list {:pattern {:value "0" :type "integer"} :body (list {:value "zero" :type "atom"}) :guards (list)} {:pattern {:type "var" :name "_"} :body (list {:value "nz" :type "atom"}) :guards (list)}) :type "case"}) + +(er-parse-test + "case with guard" + (pe "case X of N when N > 0 -> pos; _ -> other end") + {:expr {:type "var" :name "X"} :clauses (list {:pattern {:type "var" :name "N"} :body (list {:value "pos" :type "atom"}) :guards (list (list {:args (list {:type "var" :name "N"} {:value "0" :type "integer"}) :type "op" :op ">"}))} {:pattern {:type "var" :name "_"} :body (list {:value "other" :type "atom"}) :guards (list)}) :type "case"}) + +(er-parse-test "receive one clause" (pe "receive X -> X end") {:clauses (list {:pattern {:type "var" :name "X"} :body (list {:type "var" :name "X"}) :guards (list)}) :type "receive" :after-ms nil :after-body (list)}) + +(er-parse-test + "receive after" + (pe "receive X -> X after 1000 -> timeout end") + {:clauses (list {:pattern {:type "var" :name "X"} :body (list {:type "var" :name "X"}) :guards (list)}) :type "receive" :after-ms {:value "1000" :type "integer"} :after-body (list {:value "timeout" :type "atom"})}) + +(er-parse-test + "receive just after" + (pe "receive after 0 -> ok end") + {:clauses (list) :type "receive" :after-ms {:value "0" :type "integer"} :after-body (list {:value "ok" :type "atom"})}) + +(er-parse-test + "anonymous fun 1-clause" + (pe "fun (X) -> X * 2 end") + {:clauses (list {:patterns (list {:type "var" :name "X"}) :body (list {:args (list {:type "var" :name "X"} {:value "2" :type "integer"}) :type "op" :op "*"}) :guards (list) :name nil}) :type "fun"}) + +(er-parse-test "begin/end block" (pe "begin 1, 2, 3 end") {:exprs (list {:value "1" :type "integer"} {:value "2" :type "integer"} {:value "3" :type "integer"}) :type "block"}) + +(er-parse-test "try/catch" (pe "try foo() catch error:X -> X end") {:exprs (list {:args (list) :fun {:value "foo" :type "atom"} :type "call"}) :catch-clauses (list {:pattern {:type "var" :name "X"} :body (list {:type "var" :name "X"}) :class {:value "error" :type "atom"} :guards (list)}) :type "try" :of-clauses (list) :after (list)}) + +;; ── module-level ────────────────────────────────────────────────── +(er-parse-test + "try catch default class" + (pe "try foo() catch X -> X end") + {:exprs (list {:args (list) :fun {:value "foo" :type "atom"} :type "call"}) :catch-clauses (list {:pattern {:type "var" :name "X"} :body (list {:type "var" :name "X"}) :class {:value "throw" :type "atom"} :guards (list)}) :type "try" :of-clauses (list) :after (list)}) + +(er-parse-test "minimal module" (pm "-module(m).\nfoo(X) -> X.") {:functions (list {:arity 1 :clauses (list {:patterns (list {:type "var" :name "X"}) :body (list {:type "var" :name "X"}) :guards (list) :name "foo"}) :type "function" :name "foo"}) :type "module" :attrs (list) :name "m"}) + +(er-parse-test + "module with export" + (let + ((m (pm "-module(m).\n-export([foo/1]).\nfoo(X) -> X."))) + (list + (get m :name) + (len (get m :attrs)) + (get (nth (get m :attrs) 0) :name) + (len (get m :functions)))) + (list "m" 1 "export" 1)) + +(er-parse-test + "two-clause function" + (let + ((m (pm "-module(m).\nf(0) -> z; f(N) -> n."))) + (list (len (get (nth (get m :functions) 0) :clauses)))) + (list 2)) + +(er-parse-test + "multi-arg function" + (let + ((m (pm "-module(m).\nadd(X, Y) -> X + Y."))) + (list (get (nth (get m :functions) 0) :arity))) + (list 2)) + +(er-parse-test + "zero-arity" + (let + ((m (pm "-module(m).\npi() -> 3.14."))) + (list (get (nth (get m :functions) 0) :arity))) + (list 0)) + +(er-parse-test + "function with guard" + (let + ((m (pm "-module(m).\nabs(N) when N < 0 -> -N; abs(N) -> N."))) + (list + (len (get (nth (get m :functions) 0) :clauses)) + (len + (get (nth (get (nth (get m :functions) 0) :clauses) 0) :guards)))) + (list 2 1)) + +;; ── combined programs ──────────────────────────────────────────── +(er-parse-test + "three-function module" + (let + ((m (pm "-module(m).\na() -> 1.\nb() -> 2.\nc() -> 3."))) + (list + (len (get m :functions)) + (get (nth (get m :functions) 0) :name) + (get (nth (get m :functions) 1) :name) + (get (nth (get m :functions) 2) :name))) + (list 3 "a" "b" "c")) + +(er-parse-test + "factorial" + (let + ((m (pm "-module(fact).\n-export([fact/1]).\nfact(0) -> 1;\nfact(N) -> N * fact(N - 1)."))) + (list + (get m :name) + (get (nth (get m :functions) 0) :arity) + (len (get (nth (get m :functions) 0) :clauses)))) + (list "fact" 1 2)) + +(er-parse-test + "ping-pong snippet" + (let + ((e (pe "receive ping -> Sender ! pong end"))) + (list (get e :type) (len (get e :clauses)))) + (list "receive" 1)) + +(er-parse-test + "case with nested tuple" + (let + ((e (pe "case X of {ok, V} -> V; error -> 0 end"))) + (list (get e :type) (len (get e :clauses)))) + (list "case" 2)) + +;; ── summary ────────────────────────────────────────────────────── +(er-parse-test + "deep expression" + (let ((e (pe "A + B * C - D / E"))) (get e :op)) + "-") + +(define + er-parse-test-summary + (str "parser " er-parse-test-pass "/" er-parse-test-count)) diff --git a/lib/erlang/tests/tokenize.sx b/lib/erlang/tests/tokenize.sx new file mode 100644 index 00000000..6a340d92 --- /dev/null +++ b/lib/erlang/tests/tokenize.sx @@ -0,0 +1,245 @@ +;; Erlang tokenizer tests + +(define er-test-count 0) +(define er-test-pass 0) +(define er-test-fails (list)) + +(define tok-type (fn (t) (get t :type))) +(define tok-value (fn (t) (get t :value))) + +(define tok-types (fn (src) (map tok-type (er-tokenize src)))) + +(define tok-values (fn (src) (map tok-value (er-tokenize src)))) + +(define + er-test + (fn + (name actual expected) + (set! er-test-count (+ er-test-count 1)) + (if + (= actual expected) + (set! er-test-pass (+ er-test-pass 1)) + (append! er-test-fails {:actual actual :expected expected :name name})))) + +;; ── atoms ───────────────────────────────────────────────────────── +(er-test "atom: bare" (tok-values "foo") (list "foo" nil)) + +(er-test + "atom: snake_case" + (tok-values "hello_world") + (list "hello_world" nil)) + +(er-test + "atom: quoted" + (tok-values "'Hello World'") + (list "Hello World" nil)) + +(er-test + "atom: quoted with special chars" + (tok-values "'foo-bar'") + (list "foo-bar" nil)) + +(er-test "atom: with @" (tok-values "node@host") (list "node@host" nil)) + +(er-test + "atom: type is atom" + (tok-types "foo bar baz") + (list "atom" "atom" "atom" "eof")) + +;; ── variables ───────────────────────────────────────────────────── +(er-test "var: uppercase" (tok-values "X") (list "X" nil)) + +(er-test "var: camelcase" (tok-values "FooBar") (list "FooBar" nil)) + +(er-test "var: underscore" (tok-values "_") (list "_" nil)) + +(er-test "var: _prefixed" (tok-values "_ignored") (list "_ignored" nil)) + +(er-test "var: type" (tok-types "X Y _") (list "var" "var" "var" "eof")) + +;; ── integers ────────────────────────────────────────────────────── +(er-test "integer: zero" (tok-values "0") (list "0" nil)) + +(er-test "integer: positive" (tok-values "42") (list "42" nil)) + +(er-test "integer: big" (tok-values "12345678") (list "12345678" nil)) + +(er-test "integer: hex" (tok-values "16#FF") (list "16#FF" nil)) + +(er-test + "integer: type" + (tok-types "1 2 3") + (list "integer" "integer" "integer" "eof")) + +(er-test "integer: char literal" (tok-types "$a") (list "integer" "eof")) + +(er-test + "integer: char literal escape" + (tok-types "$\\n") + (list "integer" "eof")) + +;; ── floats ──────────────────────────────────────────────────────── +(er-test "float: simple" (tok-values "3.14") (list "3.14" nil)) + +(er-test "float: exponent" (tok-values "1.0e10") (list "1.0e10" nil)) + +(er-test "float: neg exponent" (tok-values "1.5e-3") (list "1.5e-3" nil)) + +(er-test "float: type" (tok-types "3.14") (list "float" "eof")) + +;; ── strings ─────────────────────────────────────────────────────── +(er-test "string: simple" (tok-values "\"hello\"") (list "hello" nil)) + +(er-test "string: empty" (tok-values "\"\"") (list "" nil)) + +(er-test "string: escape newline" (tok-values "\"a\\nb\"") (list "a\nb" nil)) + +(er-test "string: type" (tok-types "\"hello\"") (list "string" "eof")) + +;; ── keywords ────────────────────────────────────────────────────── +(er-test "keyword: case" (tok-types "case") (list "keyword" "eof")) + +(er-test + "keyword: of end when" + (tok-types "of end when") + (list "keyword" "keyword" "keyword" "eof")) + +(er-test + "keyword: receive after" + (tok-types "receive after") + (list "keyword" "keyword" "eof")) + +(er-test + "keyword: fun try catch" + (tok-types "fun try catch") + (list "keyword" "keyword" "keyword" "eof")) + +(er-test + "keyword: andalso orelse not" + (tok-types "andalso orelse not") + (list "keyword" "keyword" "keyword" "eof")) + +(er-test + "keyword: div rem" + (tok-types "div rem") + (list "keyword" "keyword" "eof")) + +;; ── punct ───────────────────────────────────────────────────────── +(er-test "punct: parens" (tok-values "()") (list "(" ")" nil)) + +(er-test "punct: braces" (tok-values "{}") (list "{" "}" nil)) + +(er-test "punct: brackets" (tok-values "[]") (list "[" "]" nil)) + +(er-test + "punct: commas" + (tok-types "a,b") + (list "atom" "punct" "atom" "eof")) + +(er-test + "punct: semicolon" + (tok-types "a;b") + (list "atom" "punct" "atom" "eof")) + +(er-test "punct: period" (tok-types "a.") (list "atom" "punct" "eof")) + +(er-test "punct: arrow" (tok-values "->") (list "->" nil)) + +(er-test "punct: backarrow" (tok-values "<-") (list "<-" nil)) + +(er-test "punct: binary brackets" (tok-values "<<>>") (list "<<" ">>" nil)) + +(er-test + "punct: cons bar" + (tok-values "[a|b]") + (list "[" "a" "|" "b" "]" nil)) + +(er-test "punct: double-bar (list comp)" (tok-values "||") (list "||" nil)) + +(er-test "punct: double-colon" (tok-values "::") (list "::" nil)) + +(er-test + "punct: module-colon" + (tok-values "lists:map") + (list "lists" ":" "map" nil)) + +;; ── operators ───────────────────────────────────────────────────── +(er-test + "op: plus minus times div" + (tok-values "+ - * /") + (list "+" "-" "*" "/" nil)) + +(er-test + "op: eq/neq" + (tok-values "== /= =:= =/=") + (list "==" "/=" "=:=" "=/=" nil)) + +(er-test "op: compare" (tok-values "< > =< >=") (list "<" ">" "=<" ">=" nil)) + +(er-test "op: list ops" (tok-values "++ --") (list "++" "--" nil)) + +(er-test "op: send" (tok-values "!") (list "!" nil)) + +(er-test "op: match" (tok-values "=") (list "=" nil)) + +;; ── comments ────────────────────────────────────────────────────── +(er-test + "comment: ignored" + (tok-values "x % this is a comment\ny") + (list "x" "y" nil)) + +(er-test + "comment: end-of-file" + (tok-values "x % comment to eof") + (list "x" nil)) + +;; ── combined ────────────────────────────────────────────────────── +(er-test + "combined: function head" + (tok-values "foo(X, Y) -> X + Y.") + (list "foo" "(" "X" "," "Y" ")" "->" "X" "+" "Y" "." nil)) + +(er-test + "combined: case expression" + (tok-values "case X of 1 -> ok; _ -> err end") + (list "case" "X" "of" "1" "->" "ok" ";" "_" "->" "err" "end" nil)) + +(er-test + "combined: tuple" + (tok-values "{ok, 42}") + (list "{" "ok" "," "42" "}" nil)) + +(er-test + "combined: list cons" + (tok-values "[H|T]") + (list "[" "H" "|" "T" "]" nil)) + +(er-test + "combined: receive" + (tok-values "receive X -> X end") + (list "receive" "X" "->" "X" "end" nil)) + +(er-test + "combined: guard" + (tok-values "when is_integer(X)") + (list "when" "is_integer" "(" "X" ")" nil)) + +(er-test + "combined: module attr" + (tok-values "-module(foo).") + (list "-" "module" "(" "foo" ")" "." nil)) + +(er-test + "combined: send" + (tok-values "Pid ! {self(), hello}") + (list "Pid" "!" "{" "self" "(" ")" "," "hello" "}" nil)) + +(er-test + "combined: whitespace skip" + (tok-values " a \n b \t c ") + (list "a" "b" "c" nil)) + +;; ── report ──────────────────────────────────────────────────────── +(define + er-tokenize-test-summary + (str "tokenizer " er-test-pass "/" er-test-count)) diff --git a/lib/erlang/tokenizer.sx b/lib/erlang/tokenizer.sx new file mode 100644 index 00000000..c46e7bc6 --- /dev/null +++ b/lib/erlang/tokenizer.sx @@ -0,0 +1,334 @@ +;; Erlang tokenizer — produces token stream from Erlang source +;; +;; Tokens: {:type T :value V :pos P} +;; Types: +;; "atom" — foo, 'Quoted Atom' +;; "var" — X, Foo, _Bar, _ (wildcard) +;; "integer" — 42, 16#FF, $c (char literal) +;; "float" — 3.14, 1.0e10 +;; "string" — "..." +;; "keyword" — case of end if when receive after fun try catch +;; begin do let module export import define andalso orelse +;; not div rem bnot band bor bxor bsl bsr +;; "punct" — ( ) { } [ ] , ; . : :: -> <- <= => | || +;; << >> +;; "op" — + - * / = == /= =:= =/= < > =< >= ++ -- ! ? +;; "eof" + +(define er-make-token (fn (type value pos) {:pos pos :value value :type type})) + +(define er-digit? (fn (c) (and (>= c "0") (<= c "9")))) + +(define + er-hex-digit? + (fn + (c) + (or + (er-digit? c) + (and (>= c "a") (<= c "f")) + (and (>= c "A") (<= c "F"))))) + +(define er-lower? (fn (c) (and (>= c "a") (<= c "z")))) + +(define er-upper? (fn (c) (and (>= c "A") (<= c "Z")))) + +(define er-letter? (fn (c) (or (er-lower? c) (er-upper? c)))) + +(define + er-ident-char? + (fn (c) (or (er-letter? c) (er-digit? c) (= c "_") (= c "@")))) + +(define er-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) + +;; Erlang reserved words — everything else starting lowercase is an atom +(define + er-keywords + (list + "after" + "and" + "andalso" + "band" + "begin" + "bnot" + "bor" + "bsl" + "bsr" + "bxor" + "case" + "catch" + "cond" + "div" + "end" + "fun" + "if" + "let" + "not" + "of" + "or" + "orelse" + "receive" + "rem" + "try" + "when" + "xor")) + +(define er-keyword? (fn (word) (some (fn (k) (= k word)) er-keywords))) + +(define + er-tokenize + (fn + (src) + (let + ((tokens (list)) (pos 0) (src-len (len src))) + (define + er-peek + (fn + (offset) + (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) + (define er-cur (fn () (er-peek 0))) + (define er-advance! (fn (n) (set! pos (+ pos n)))) + (define + skip-ws! + (fn + () + (when + (and (< pos src-len) (er-ws? (er-cur))) + (er-advance! 1) + (skip-ws!)))) + (define + skip-comment! + (fn + () + (when + (and (< pos src-len) (not (= (er-cur) "\n"))) + (er-advance! 1) + (skip-comment!)))) + (define + read-ident-chars + (fn + (start) + (when + (and (< pos src-len) (er-ident-char? (er-cur))) + (er-advance! 1) + (read-ident-chars start)) + (slice src start pos))) + (define + read-integer-digits + (fn + () + (when + (and (< pos src-len) (er-digit? (er-cur))) + (er-advance! 1) + (read-integer-digits)))) + (define + read-hex-digits + (fn + () + (when + (and (< pos src-len) (er-hex-digit? (er-cur))) + (er-advance! 1) + (read-hex-digits)))) + (define + read-number + (fn + (start) + (read-integer-digits) + (cond + (and + (< pos src-len) + (= (er-cur) "#") + (< (+ pos 1) src-len) + (er-hex-digit? (er-peek 1))) + (do (er-advance! 1) (read-hex-digits) {:value (slice src start pos) :type "integer"}) + (and + (< pos src-len) + (= (er-cur) ".") + (< (+ pos 1) src-len) + (er-digit? (er-peek 1))) + (do + (er-advance! 1) + (read-integer-digits) + (when + (and + (< pos src-len) + (or (= (er-cur) "e") (= (er-cur) "E"))) + (er-advance! 1) + (when + (and + (< pos src-len) + (or (= (er-cur) "+") (= (er-cur) "-"))) + (er-advance! 1)) + (read-integer-digits)) + {:value (slice src start pos) :type "float"}) + :else {:value (slice src start pos) :type "integer"}))) + (define + read-string + (fn + (quote-char) + (let + ((chars (list))) + (er-advance! 1) + (define + loop + (fn + () + (cond + (>= pos src-len) + nil + (= (er-cur) "\\") + (do + (er-advance! 1) + (when + (< pos src-len) + (let + ((ch (er-cur))) + (cond + (= ch "n") + (append! chars "\n") + (= ch "t") + (append! chars "\t") + (= ch "r") + (append! chars "\r") + (= ch "\\") + (append! chars "\\") + (= ch "\"") + (append! chars "\"") + (= ch "'") + (append! chars "'") + :else (append! chars ch)) + (er-advance! 1))) + (loop)) + (= (er-cur) quote-char) + (er-advance! 1) + :else (do (append! chars (er-cur)) (er-advance! 1) (loop))))) + (loop) + (join "" chars)))) + (define + er-emit! + (fn + (type value start) + (append! tokens (er-make-token type value start)))) + (define + scan! + (fn + () + (skip-ws!) + (when + (< pos src-len) + (let + ((ch (er-cur)) (start pos)) + (cond + (= ch "%") + (do (skip-comment!) (scan!)) + (er-digit? ch) + (do + (let + ((tok (read-number start))) + (er-emit! (get tok :type) (get tok :value) start)) + (scan!)) + (= ch "$") + (do + (er-advance! 1) + (if + (and (< pos src-len) (= (er-cur) "\\")) + (do + (er-advance! 1) + (when (< pos src-len) (er-advance! 1))) + (when (< pos src-len) (er-advance! 1))) + (er-emit! "integer" (slice src start pos) start) + (scan!)) + (er-lower? ch) + (do + (let + ((word (read-ident-chars start))) + (er-emit! + (if (er-keyword? word) "keyword" "atom") + word + start)) + (scan!)) + (or (er-upper? ch) (= ch "_")) + (do + (let + ((word (read-ident-chars start))) + (er-emit! "var" word start)) + (scan!)) + (= ch "'") + (do (er-emit! "atom" (read-string "'") start) (scan!)) + (= ch "\"") + (do (er-emit! "string" (read-string "\"") start) (scan!)) + (and (= ch "<") (= (er-peek 1) "<")) + (do (er-emit! "punct" "<<" start) (er-advance! 2) (scan!)) + (and (= ch ">") (= (er-peek 1) ">")) + (do (er-emit! "punct" ">>" start) (er-advance! 2) (scan!)) + (and (= ch "-") (= (er-peek 1) ">")) + (do (er-emit! "punct" "->" start) (er-advance! 2) (scan!)) + (and (= ch "<") (= (er-peek 1) "-")) + (do (er-emit! "punct" "<-" start) (er-advance! 2) (scan!)) + (and (= ch "<") (= (er-peek 1) "=")) + (do (er-emit! "punct" "<=" start) (er-advance! 2) (scan!)) + (and (= ch "=") (= (er-peek 1) ">")) + (do (er-emit! "punct" "=>" start) (er-advance! 2) (scan!)) + (and (= ch "=") (= (er-peek 1) ":") (= (er-peek 2) "=")) + (do (er-emit! "op" "=:=" start) (er-advance! 3) (scan!)) + (and (= ch "=") (= (er-peek 1) "/") (= (er-peek 2) "=")) + (do (er-emit! "op" "=/=" start) (er-advance! 3) (scan!)) + (and (= ch "=") (= (er-peek 1) "=")) + (do (er-emit! "op" "==" start) (er-advance! 2) (scan!)) + (and (= ch "/") (= (er-peek 1) "=")) + (do (er-emit! "op" "/=" start) (er-advance! 2) (scan!)) + (and (= ch "=") (= (er-peek 1) "<")) + (do (er-emit! "op" "=<" start) (er-advance! 2) (scan!)) + (and (= ch ">") (= (er-peek 1) "=")) + (do (er-emit! "op" ">=" start) (er-advance! 2) (scan!)) + (and (= ch "+") (= (er-peek 1) "+")) + (do (er-emit! "op" "++" start) (er-advance! 2) (scan!)) + (and (= ch "-") (= (er-peek 1) "-")) + (do (er-emit! "op" "--" start) (er-advance! 2) (scan!)) + (and (= ch ":") (= (er-peek 1) ":")) + (do (er-emit! "punct" "::" start) (er-advance! 2) (scan!)) + (and (= ch "|") (= (er-peek 1) "|")) + (do (er-emit! "punct" "||" start) (er-advance! 2) (scan!)) + (= ch "(") + (do (er-emit! "punct" "(" start) (er-advance! 1) (scan!)) + (= ch ")") + (do (er-emit! "punct" ")" start) (er-advance! 1) (scan!)) + (= ch "{") + (do (er-emit! "punct" "{" start) (er-advance! 1) (scan!)) + (= ch "}") + (do (er-emit! "punct" "}" start) (er-advance! 1) (scan!)) + (= ch "[") + (do (er-emit! "punct" "[" start) (er-advance! 1) (scan!)) + (= ch "]") + (do (er-emit! "punct" "]" start) (er-advance! 1) (scan!)) + (= ch ",") + (do (er-emit! "punct" "," start) (er-advance! 1) (scan!)) + (= ch ";") + (do (er-emit! "punct" ";" start) (er-advance! 1) (scan!)) + (= ch ".") + (do (er-emit! "punct" "." start) (er-advance! 1) (scan!)) + (= ch ":") + (do (er-emit! "punct" ":" start) (er-advance! 1) (scan!)) + (= ch "|") + (do (er-emit! "punct" "|" start) (er-advance! 1) (scan!)) + (= ch "+") + (do (er-emit! "op" "+" start) (er-advance! 1) (scan!)) + (= ch "-") + (do (er-emit! "op" "-" start) (er-advance! 1) (scan!)) + (= ch "*") + (do (er-emit! "op" "*" start) (er-advance! 1) (scan!)) + (= ch "/") + (do (er-emit! "op" "/" start) (er-advance! 1) (scan!)) + (= ch "=") + (do (er-emit! "op" "=" start) (er-advance! 1) (scan!)) + (= ch "<") + (do (er-emit! "op" "<" start) (er-advance! 1) (scan!)) + (= ch ">") + (do (er-emit! "op" ">" start) (er-advance! 1) (scan!)) + (= ch "!") + (do (er-emit! "op" "!" start) (er-advance! 1) (scan!)) + (= ch "?") + (do (er-emit! "op" "?" start) (er-advance! 1) (scan!)) + :else (do (er-advance! 1) (scan!))))))) + (scan!) + (er-emit! "eof" nil pos) + tokens))) diff --git a/lib/forth/compiler.sx b/lib/forth/compiler.sx new file mode 100644 index 00000000..43e8edd0 --- /dev/null +++ b/lib/forth/compiler.sx @@ -0,0 +1,274 @@ +;; Phase 2 — colon definitions, compile mode, VARIABLE/CONSTANT/VALUE/TO, @/!/+!. +;; +;; Compile-mode representation: +;; A colon-definition body is a list of "ops", each an SX lambda (fn (s) ...). +;; : FOO 1 2 + ; -> body = (push-1 push-2 call-plus) +;; References to other words are compiled as late-binding thunks so that +;; self-reference works and redefinitions take effect for future runs. +;; +;; State additions used in Phase 2: +;; "compiling" : bool — are we inside :..; ? +;; "current-def" : dict {:name "..." :body (list)} during compile +;; "vars" : dict {"addr-name" -> cell-value} for VARIABLE storage + +(define + forth-compile-token + (fn + (state tok) + (let + ((w (forth-lookup state tok))) + (if + (not (nil? w)) + (if + (get w "immediate?") + (forth-execute-word state w) + (forth-compile-call state tok)) + (let + ((n (forth-parse-number tok (get state "base")))) + (if + (not (nil? n)) + (forth-compile-lit state n) + (forth-error state (str tok " ?")))))))) + +(define + forth-compile-call + (fn + (state name) + (let + ((op (fn (s) (let ((w (forth-lookup s name))) (if (nil? w) (forth-error s (str name " ? (compiled)")) (forth-execute-word s w)))))) + (forth-def-append! state op)))) + +(define + forth-compile-lit + (fn + (state n) + (let ((op (fn (s) (forth-push s n)))) (forth-def-append! state op)))) + +(define + forth-def-append! + (fn + (state op) + (let + ((def (get state "current-def"))) + (dict-set! def "body" (concat (get def "body") (list op)))))) + +(define + forth-make-colon-body + (fn (ops) (fn (s) (for-each (fn (op) (op s)) ops)))) + +;; Override forth-interpret-token to branch on compile mode. +(define + forth-interpret-token + (fn + (state tok) + (if + (get state "compiling") + (forth-compile-token state tok) + (let + ((w (forth-lookup state tok))) + (if + (not (nil? w)) + (forth-execute-word state w) + (let + ((n (forth-parse-number tok (get state "base")))) + (if + (not (nil? n)) + (forth-push state n) + (forth-error state (str tok " ?"))))))))) + +;; Install `:` and `;` plus VARIABLE, CONSTANT, VALUE, TO, @, !, +!, RECURSE. +(define + forth-install-compiler! + (fn + (state) + (forth-def-prim! + state + ":" + (fn + (s) + (let + ((name (forth-next-token! s))) + (when (nil? name) (forth-error s ": expects name")) + (let + ((def (dict))) + (dict-set! def "name" name) + (dict-set! def "body" (list)) + (dict-set! s "current-def" def) + (dict-set! s "compiling" true))))) + (forth-def-prim-imm! + state + ";" + (fn + (s) + (let + ((def (get s "current-def"))) + (when (nil? def) (forth-error s "; outside definition")) + (let + ((ops (get def "body"))) + (let + ((body-fn (forth-make-colon-body ops))) + (dict-set! + (get s "dict") + (downcase (get def "name")) + (forth-make-word "colon-def" body-fn false)) + (dict-set! s "current-def" nil) + (dict-set! s "compiling" false)))))) + (forth-def-prim-imm! + state + "IMMEDIATE" + (fn + (s) + (let + ((def-name (get (get s "current-def") "name")) + (target + (if + (nil? (get s "current-def")) + (forth-last-defined s) + (get (get s "current-def") "name")))) + (let + ((w (forth-lookup s target))) + (when (not (nil? w)) (dict-set! w "immediate?" true)))))) + (forth-def-prim-imm! + state + "RECURSE" + (fn + (s) + (when + (not (get s "compiling")) + (forth-error s "RECURSE only in definition")) + (let + ((name (get (get s "current-def") "name"))) + (forth-compile-call s name)))) + (forth-def-prim! + state + "VARIABLE" + (fn + (s) + (let + ((name (forth-next-token! s))) + (when (nil? name) (forth-error s "VARIABLE expects name")) + (dict-set! (get s "vars") (downcase name) 0) + (forth-def-prim! + s + name + (fn (ss) (forth-push ss (downcase name))))))) + (forth-def-prim! + state + "CONSTANT" + (fn + (s) + (let + ((name (forth-next-token! s)) (v (forth-pop s))) + (when (nil? name) (forth-error s "CONSTANT expects name")) + (forth-def-prim! s name (fn (ss) (forth-push ss v)))))) + (forth-def-prim! + state + "VALUE" + (fn + (s) + (let + ((name (forth-next-token! s)) (v (forth-pop s))) + (when (nil? name) (forth-error s "VALUE expects name")) + (dict-set! (get s "vars") (downcase name) v) + (forth-def-prim! + s + name + (fn + (ss) + (forth-push ss (get (get ss "vars") (downcase name)))))))) + (forth-def-prim! + state + "TO" + (fn + (s) + (let + ((name (forth-next-token! s)) (v (forth-pop s))) + (when (nil? name) (forth-error s "TO expects name")) + (dict-set! (get s "vars") (downcase name) v)))) + (forth-def-prim! + state + "@" + (fn + (s) + (let + ((addr (forth-pop s))) + (forth-push s (or (get (get s "vars") addr) 0))))) + (forth-def-prim! + state + "!" + (fn + (s) + (let + ((addr (forth-pop s)) (v (forth-pop s))) + (dict-set! (get s "vars") addr v)))) + (forth-def-prim! + state + "+!" + (fn + (s) + (let + ((addr (forth-pop s)) (v (forth-pop s))) + (let + ((cur (or (get (get s "vars") addr) 0))) + (dict-set! (get s "vars") addr (+ cur v)))))) + state)) + +;; Track the most recently defined word name for IMMEDIATE. +(define forth-last-defined (fn (state) (get state "last-defined"))) + +;; forth-next-token!: during `:`, VARIABLE, CONSTANT, etc. we need to pull +;; the next token from the *input stream* (not the dict/stack). Phase-1 +;; interpreter fed tokens one at a time via for-each, so a parsing word +;; can't reach ahead. We rework `forth-interpret` to keep the remaining +;; token list on the state so parsing words can consume from it. + +(define + forth-next-token! + (fn + (state) + (let + ((rest (get state "input"))) + (if + (or (nil? rest) (= (len rest) 0)) + nil + (let + ((tok (first rest))) + (dict-set! state "input" (rest-of rest)) + tok))))) + +(define rest-of (fn (l) (rest l))) + +;; Rewritten forth-interpret: drives a token list stored in state so that +;; parsing words like `:`, `VARIABLE`, `CONSTANT`, `TO` can consume the +;; following token. +(define + forth-interpret + (fn + (state src) + (dict-set! state "input" (forth-tokens src)) + (forth-interpret-loop state) + state)) + +(define + forth-interpret-loop + (fn + (state) + (let + ((tok (forth-next-token! state))) + (if + (nil? tok) + state + (begin + (forth-interpret-token state tok) + (forth-interpret-loop state)))))) + +;; Re-export forth-boot to include the compiler primitives too. +(define + forth-boot + (fn + () + (let + ((s (forth-make-state))) + (forth-install-primitives! s) + (forth-install-compiler! s) + s))) diff --git a/lib/forth/interpreter.sx b/lib/forth/interpreter.sx new file mode 100644 index 00000000..d019993e --- /dev/null +++ b/lib/forth/interpreter.sx @@ -0,0 +1,48 @@ +;; Forth interpreter loop — interpret mode only (Phase 1). +;; Reads whitespace-delimited words, looks them up, executes. +;; Numbers (parsed via BASE) push onto the data stack. +;; Unknown words raise "?". + +(define + forth-execute-word + (fn (state word) (let ((body (get word "body"))) (body state)))) + +(define + forth-interpret-token + (fn + (state tok) + (let + ((w (forth-lookup state tok))) + (if + (not (nil? w)) + (forth-execute-word state w) + (let + ((n (forth-parse-number tok (get state "base")))) + (if + (not (nil? n)) + (forth-push state n) + (forth-error state (str tok " ?")))))))) + +(define + forth-interpret + (fn + (state src) + (for-each + (fn (tok) (forth-interpret-token state tok)) + (forth-tokens src)) + state)) + +;; Convenience: build a fresh state with primitives loaded. +(define + forth-boot + (fn () (let ((s (forth-make-state))) (forth-install-primitives! s) s))) + +;; Run source on a fresh state and return (state, output, stack-top-to-bottom). +(define + forth-run + (fn + (src) + (let + ((s (forth-boot))) + (forth-interpret s src) + (list s (get s "output") (reverse (get s "dstack")))))) diff --git a/lib/forth/reader.sx b/lib/forth/reader.sx new file mode 100644 index 00000000..24234f56 --- /dev/null +++ b/lib/forth/reader.sx @@ -0,0 +1,104 @@ +;; Forth reader — whitespace-delimited tokens. + +(define + forth-whitespace? + (fn (ch) (or (= ch " ") (or (= ch "\t") (or (= ch "\n") (= ch "\r")))))) + +(define + forth-tokens-loop + (fn + (src n i buf out) + (if + (>= i n) + (if (> (len buf) 0) (concat out (list buf)) out) + (let + ((ch (char-at src i))) + (if + (forth-whitespace? ch) + (if + (> (len buf) 0) + (forth-tokens-loop src n (+ i 1) "" (concat out (list buf))) + (forth-tokens-loop src n (+ i 1) buf out)) + (forth-tokens-loop src n (+ i 1) (str buf ch) out)))))) + +(define + forth-tokens + (fn (src) (forth-tokens-loop src (len src) 0 "" (list)))) + +(define + forth-digit-value + (fn + (ch base) + (let + ((code (char-code ch)) (cc (char-code (downcase ch)))) + (let + ((v (if (and (>= code 48) (<= code 57)) (- code 48) (if (and (>= cc 97) (<= cc 122)) (+ 10 (- cc 97)) -1)))) + (if (and (>= v 0) (< v base)) v nil))))) + +(define + forth-parse-digits-loop + (fn + (src n i base acc) + (if + (>= i n) + acc + (let + ((d (forth-digit-value (char-at src i) base))) + (if + (nil? d) + nil + (forth-parse-digits-loop src n (+ i 1) base (+ (* acc base) d))))))) + +(define + forth-parse-digits + (fn + (src base) + (if + (= (len src) 0) + nil + (forth-parse-digits-loop src (len src) 0 base 0)))) + +(define + forth-strip-prefix + (fn + (s) + (if + (<= (len s) 1) + (list s 0) + (let + ((c (char-at s 0))) + (if + (= c "$") + (list (substring s 1 (len s)) 16) + (if + (= c "%") + (list (substring s 1 (len s)) 2) + (if (= c "#") (list (substring s 1 (len s)) 10) (list s 0)))))))) + +(define + forth-parse-number + (fn + (tok base) + (let + ((n (len tok))) + (if + (= n 0) + nil + (if + (and + (= n 3) + (and (= (char-at tok 0) "'") (= (char-at tok 2) "'"))) + (char-code (char-at tok 1)) + (let + ((neg? (and (> n 1) (= (char-at tok 0) "-")))) + (let + ((s1 (if neg? (substring tok 1 n) tok))) + (let + ((pair (forth-strip-prefix s1))) + (let + ((s (first pair)) (b-override (nth pair 1))) + (let + ((b (if (= b-override 0) base b-override))) + (let + ((v (forth-parse-digits s b))) + (if (nil? v) nil (if neg? (- 0 v) v))))))))))))) diff --git a/lib/forth/runtime.sx b/lib/forth/runtime.sx new file mode 100644 index 00000000..54078477 --- /dev/null +++ b/lib/forth/runtime.sx @@ -0,0 +1,433 @@ +;; Forth runtime — state, stacks, dictionary, output buffer. +;; Data stack: mutable SX list, TOS = first. +;; Return stack: separate mutable list. +;; Dictionary: SX dict {lowercased-name -> word-record}. +;; Word record: {"kind" "body" "immediate?"}; kind is "primitive" or "colon-def". +;; Output buffer: mutable string appended to by `.`, `EMIT`, `CR`, etc. +;; Compile-mode flag: "compiling" on the state. + +(define + forth-make-state + (fn + () + (let + ((s (dict))) + (dict-set! s "dstack" (list)) + (dict-set! s "rstack" (list)) + (dict-set! s "dict" (dict)) + (dict-set! s "output" "") + (dict-set! s "compiling" false) + (dict-set! s "current-def" nil) + (dict-set! s "base" 10) + (dict-set! s "vars" (dict)) + s))) + +(define + forth-error + (fn (state msg) (dict-set! state "error" msg) (raise msg))) + +(define + forth-push + (fn (state v) (dict-set! state "dstack" (cons v (get state "dstack"))))) + +(define + forth-pop + (fn + (state) + (let + ((st (get state "dstack"))) + (if + (= (len st) 0) + (forth-error state "stack underflow") + (let ((top (first st))) (dict-set! state "dstack" (rest st)) top))))) + +(define + forth-peek + (fn + (state) + (let + ((st (get state "dstack"))) + (if (= (len st) 0) (forth-error state "stack underflow") (first st))))) + +(define forth-depth (fn (state) (len (get state "dstack")))) + +(define + forth-rpush + (fn (state v) (dict-set! state "rstack" (cons v (get state "rstack"))))) + +(define + forth-rpop + (fn + (state) + (let + ((st (get state "rstack"))) + (if + (= (len st) 0) + (forth-error state "return stack underflow") + (let ((top (first st))) (dict-set! state "rstack" (rest st)) top))))) + +(define + forth-rpeek + (fn + (state) + (let + ((st (get state "rstack"))) + (if + (= (len st) 0) + (forth-error state "return stack underflow") + (first st))))) + +(define + forth-emit-str + (fn (state s) (dict-set! state "output" (str (get state "output") s)))) + +(define + forth-make-word + (fn + (kind body immediate?) + (let + ((w (dict))) + (dict-set! w "kind" kind) + (dict-set! w "body" body) + (dict-set! w "immediate?" immediate?) + w))) + +(define + forth-def-prim! + (fn + (state name body) + (dict-set! + (get state "dict") + (downcase name) + (forth-make-word "primitive" body false)))) + +(define + forth-def-prim-imm! + (fn + (state name body) + (dict-set! + (get state "dict") + (downcase name) + (forth-make-word "primitive" body true)))) + +(define + forth-lookup + (fn (state name) (get (get state "dict") (downcase name)))) + +(define + forth-binop + (fn + (op) + (fn + (state) + (let + ((b (forth-pop state)) (a (forth-pop state))) + (forth-push state (op a b)))))) + +(define + forth-unop + (fn + (op) + (fn (state) (let ((a (forth-pop state))) (forth-push state (op a)))))) + +(define + forth-cmp + (fn + (op) + (fn + (state) + (let + ((b (forth-pop state)) (a (forth-pop state))) + (forth-push state (if (op a b) -1 0)))))) + +(define + forth-cmp0 + (fn + (op) + (fn + (state) + (let ((a (forth-pop state))) (forth-push state (if (op a) -1 0)))))) + +(define + forth-trunc + (fn (x) (if (< x 0) (- 0 (floor (- 0 x))) (floor x)))) + +(define + forth-div + (fn + (a b) + (if (= b 0) (raise "division by zero") (forth-trunc (/ a b))))) + +(define + forth-mod + (fn + (a b) + (if (= b 0) (raise "division by zero") (- a (* b (forth-div a b)))))) + +(define forth-bits-width 32) + +(define + forth-to-unsigned + (fn (n w) (let ((m (pow 2 w))) (mod (+ (mod n m) m) m)))) + +(define + forth-from-unsigned + (fn + (n w) + (let ((half (pow 2 (- w 1)))) (if (>= n half) (- n (pow 2 w)) n)))) + +(define + forth-bitwise-step + (fn + (op ua ub out place i w) + (if + (>= i w) + out + (let + ((da (mod ua 2)) (db (mod ub 2))) + (forth-bitwise-step + op + (floor (/ ua 2)) + (floor (/ ub 2)) + (+ out (* place (op da db))) + (* place 2) + (+ i 1) + w))))) + +(define + forth-bitwise-uu + (fn + (op) + (fn + (a b) + (let + ((ua (forth-to-unsigned a forth-bits-width)) + (ub (forth-to-unsigned b forth-bits-width))) + (forth-from-unsigned + (forth-bitwise-step op ua ub 0 1 0 forth-bits-width) + forth-bits-width))))) + +(define + forth-bit-and + (forth-bitwise-uu (fn (x y) (if (and (= x 1) (= y 1)) 1 0)))) + +(define + forth-bit-or + (forth-bitwise-uu (fn (x y) (if (or (= x 1) (= y 1)) 1 0)))) + +(define forth-bit-xor (forth-bitwise-uu (fn (x y) (if (= x y) 0 1)))) + +(define forth-bit-invert (fn (a) (- 0 (+ a 1)))) + +(define + forth-install-primitives! + (fn + (state) + (forth-def-prim! state "DUP" (fn (s) (forth-push s (forth-peek s)))) + (forth-def-prim! state "DROP" (fn (s) (forth-pop s))) + (forth-def-prim! + state + "SWAP" + (fn + (s) + (let + ((b (forth-pop s)) (a (forth-pop s))) + (forth-push s b) + (forth-push s a)))) + (forth-def-prim! + state + "OVER" + (fn + (s) + (let + ((b (forth-pop s)) (a (forth-pop s))) + (forth-push s a) + (forth-push s b) + (forth-push s a)))) + (forth-def-prim! + state + "ROT" + (fn + (s) + (let + ((c (forth-pop s)) (b (forth-pop s)) (a (forth-pop s))) + (forth-push s b) + (forth-push s c) + (forth-push s a)))) + (forth-def-prim! + state + "-ROT" + (fn + (s) + (let + ((c (forth-pop s)) (b (forth-pop s)) (a (forth-pop s))) + (forth-push s c) + (forth-push s a) + (forth-push s b)))) + (forth-def-prim! + state + "NIP" + (fn (s) (let ((b (forth-pop s))) (forth-pop s) (forth-push s b)))) + (forth-def-prim! + state + "TUCK" + (fn + (s) + (let + ((b (forth-pop s)) (a (forth-pop s))) + (forth-push s b) + (forth-push s a) + (forth-push s b)))) + (forth-def-prim! + state + "?DUP" + (fn + (s) + (let ((a (forth-peek s))) (when (not (= a 0)) (forth-push s a))))) + (forth-def-prim! state "DEPTH" (fn (s) (forth-push s (forth-depth s)))) + (forth-def-prim! + state + "PICK" + (fn + (s) + (let + ((n (forth-pop s)) (st (get s "dstack"))) + (if + (or (< n 0) (>= n (len st))) + (forth-error s "PICK out of range") + (forth-push s (nth st n)))))) + (forth-def-prim! + state + "ROLL" + (fn + (s) + (let + ((n (forth-pop s)) (st (get s "dstack"))) + (if + (or (< n 0) (>= n (len st))) + (forth-error s "ROLL out of range") + (let + ((taken (nth st n)) + (before (take st n)) + (after (drop st (+ n 1)))) + (dict-set! s "dstack" (concat before after)) + (forth-push s taken)))))) + (forth-def-prim! + state + "2DUP" + (fn + (s) + (let + ((b (forth-pop s)) (a (forth-pop s))) + (forth-push s a) + (forth-push s b) + (forth-push s a) + (forth-push s b)))) + (forth-def-prim! state "2DROP" (fn (s) (forth-pop s) (forth-pop s))) + (forth-def-prim! + state + "2SWAP" + (fn + (s) + (let + ((d (forth-pop s)) + (c (forth-pop s)) + (b (forth-pop s)) + (a (forth-pop s))) + (forth-push s c) + (forth-push s d) + (forth-push s a) + (forth-push s b)))) + (forth-def-prim! + state + "2OVER" + (fn + (s) + (let + ((d (forth-pop s)) + (c (forth-pop s)) + (b (forth-pop s)) + (a (forth-pop s))) + (forth-push s a) + (forth-push s b) + (forth-push s c) + (forth-push s d) + (forth-push s a) + (forth-push s b)))) + (forth-def-prim! state "+" (forth-binop (fn (a b) (+ a b)))) + (forth-def-prim! state "-" (forth-binop (fn (a b) (- a b)))) + (forth-def-prim! state "*" (forth-binop (fn (a b) (* a b)))) + (forth-def-prim! state "/" (forth-binop forth-div)) + (forth-def-prim! state "MOD" (forth-binop forth-mod)) + (forth-def-prim! + state + "/MOD" + (fn + (s) + (let + ((b (forth-pop s)) (a (forth-pop s))) + (forth-push s (forth-mod a b)) + (forth-push s (forth-div a b))))) + (forth-def-prim! state "NEGATE" (forth-unop (fn (a) (- 0 a)))) + (forth-def-prim! state "ABS" (forth-unop abs)) + (forth-def-prim! + state + "MIN" + (forth-binop (fn (a b) (if (< a b) a b)))) + (forth-def-prim! + state + "MAX" + (forth-binop (fn (a b) (if (> a b) a b)))) + (forth-def-prim! state "1+" (forth-unop (fn (a) (+ a 1)))) + (forth-def-prim! state "1-" (forth-unop (fn (a) (- a 1)))) + (forth-def-prim! state "2+" (forth-unop (fn (a) (+ a 2)))) + (forth-def-prim! state "2-" (forth-unop (fn (a) (- a 2)))) + (forth-def-prim! state "2*" (forth-unop (fn (a) (* a 2)))) + (forth-def-prim! state "2/" (forth-unop (fn (a) (floor (/ a 2))))) + (forth-def-prim! state "=" (forth-cmp (fn (a b) (= a b)))) + (forth-def-prim! state "<>" (forth-cmp (fn (a b) (not (= a b))))) + (forth-def-prim! state "<" (forth-cmp (fn (a b) (< a b)))) + (forth-def-prim! state ">" (forth-cmp (fn (a b) (> a b)))) + (forth-def-prim! state "<=" (forth-cmp (fn (a b) (<= a b)))) + (forth-def-prim! state ">=" (forth-cmp (fn (a b) (>= a b)))) + (forth-def-prim! state "0=" (forth-cmp0 (fn (a) (= a 0)))) + (forth-def-prim! state "0<>" (forth-cmp0 (fn (a) (not (= a 0))))) + (forth-def-prim! state "0<" (forth-cmp0 (fn (a) (< a 0)))) + (forth-def-prim! state "0>" (forth-cmp0 (fn (a) (> a 0)))) + (forth-def-prim! state "AND" (forth-binop forth-bit-and)) + (forth-def-prim! state "OR" (forth-binop forth-bit-or)) + (forth-def-prim! state "XOR" (forth-binop forth-bit-xor)) + (forth-def-prim! state "INVERT" (forth-unop forth-bit-invert)) + (forth-def-prim! + state + "." + (fn (s) (forth-emit-str s (str (forth-pop s) " ")))) + (forth-def-prim! + state + ".S" + (fn + (s) + (let + ((st (reverse (get s "dstack")))) + (forth-emit-str s "<") + (forth-emit-str s (str (len st))) + (forth-emit-str s "> ") + (for-each (fn (v) (forth-emit-str s (str v " "))) st)))) + (forth-def-prim! + state + "EMIT" + (fn (s) (forth-emit-str s (code-char (forth-pop s))))) + (forth-def-prim! state "CR" (fn (s) (forth-emit-str s "\n"))) + (forth-def-prim! state "SPACE" (fn (s) (forth-emit-str s " "))) + (forth-def-prim! + state + "SPACES" + (fn + (s) + (let + ((n (forth-pop s))) + (when + (> n 0) + (for-each (fn (_) (forth-emit-str s " ")) (range 0 n)))))) + (forth-def-prim! state "BL" (fn (s) (forth-push s 32))) + state)) diff --git a/lib/forth/tests/test-phase1.sx b/lib/forth/tests/test-phase1.sx new file mode 100644 index 00000000..3d4b5c25 --- /dev/null +++ b/lib/forth/tests/test-phase1.sx @@ -0,0 +1,224 @@ +;; Phase 1 — reader + interpret mode + core words. +;; Simple assertion driver: (forth-test label input expected-stack) +;; forth-run returns (state, output, stack-bottom-to-top). + +(define forth-tests-passed 0) +(define forth-tests-failed 0) +(define forth-tests-failures (list)) + +(define + forth-assert + (fn + (label expected actual) + (if + (= expected actual) + (set! forth-tests-passed (+ forth-tests-passed 1)) + (begin + (set! forth-tests-failed (+ forth-tests-failed 1)) + (set! + forth-tests-failures + (concat + forth-tests-failures + (list + (str label ": expected " (str expected) " got " (str actual))))))))) + +(define + forth-check-stack + (fn + (label src expected) + (let ((r (forth-run src))) (forth-assert label expected (nth r 2))))) + +(define + forth-check-output + (fn + (label src expected) + (let ((r (forth-run src))) (forth-assert label expected (nth r 1))))) + +(define + forth-reader-tests + (fn + () + (forth-assert + "tokens split" + (list "1" "2" "+") + (forth-tokens " 1 2 + ")) + (forth-assert "tokens empty" (list) (forth-tokens "")) + (forth-assert + "tokens tab/newline" + (list "a" "b" "c") + (forth-tokens "a\tb\nc")) + (forth-assert "number decimal" 42 (forth-parse-number "42" 10)) + (forth-assert "number negative" -7 (forth-parse-number "-7" 10)) + (forth-assert "number hex prefix" 255 (forth-parse-number "$ff" 10)) + (forth-assert "number binary prefix" 10 (forth-parse-number "%1010" 10)) + (forth-assert + "number decimal override under hex base" + 123 + (forth-parse-number "#123" 16)) + (forth-assert "number none" nil (forth-parse-number "abc" 10)) + (forth-assert "number in hex base" 255 (forth-parse-number "ff" 16)) + (forth-assert + "number negative hex prefix" + -16 + (forth-parse-number "-$10" 10)) + (forth-assert "char literal" 65 (forth-parse-number "'A'" 10)) + (forth-assert + "mixed-case digit in base 10" + nil + (forth-parse-number "1A" 10)) + (forth-assert + "mixed-case digit in base 16" + 26 + (forth-parse-number "1a" 16)))) + +(define + forth-stack-tests + (fn + () + (forth-check-stack "push literal" "42" (list 42)) + (forth-check-stack "push multiple" "1 2 3" (list 1 2 3)) + (forth-check-stack "DUP" "7 DUP" (list 7 7)) + (forth-check-stack "DROP" "1 2 DROP" (list 1)) + (forth-check-stack "SWAP" "1 2 SWAP" (list 2 1)) + (forth-check-stack "OVER" "1 2 OVER" (list 1 2 1)) + (forth-check-stack "ROT" "1 2 3 ROT" (list 2 3 1)) + (forth-check-stack "-ROT" "1 2 3 -ROT" (list 3 1 2)) + (forth-check-stack "NIP" "1 2 NIP" (list 2)) + (forth-check-stack "TUCK" "1 2 TUCK" (list 2 1 2)) + (forth-check-stack "?DUP non-zero" "5 ?DUP" (list 5 5)) + (forth-check-stack "?DUP zero" "0 ?DUP" (list 0)) + (forth-check-stack "DEPTH empty" "DEPTH" (list 0)) + (forth-check-stack "DEPTH non-empty" "1 2 3 DEPTH" (list 1 2 3 3)) + (forth-check-stack "PICK 0" "10 20 30 0 PICK" (list 10 20 30 30)) + (forth-check-stack "PICK 1" "10 20 30 1 PICK" (list 10 20 30 20)) + (forth-check-stack "PICK 2" "10 20 30 2 PICK" (list 10 20 30 10)) + (forth-check-stack "ROLL 0 is no-op" "10 20 30 0 ROLL" (list 10 20 30)) + (forth-check-stack "ROLL 2" "10 20 30 2 ROLL" (list 20 30 10)) + (forth-check-stack "2DUP" "1 2 2DUP" (list 1 2 1 2)) + (forth-check-stack "2DROP" "1 2 3 4 2DROP" (list 1 2)) + (forth-check-stack "2SWAP" "1 2 3 4 2SWAP" (list 3 4 1 2)) + (forth-check-stack "2OVER" "1 2 3 4 2OVER" (list 1 2 3 4 1 2)))) + +(define + forth-arith-tests + (fn + () + (forth-check-stack "+" "3 4 +" (list 7)) + (forth-check-stack "-" "10 3 -" (list 7)) + (forth-check-stack "*" "6 7 *" (list 42)) + (forth-check-stack "/ positive" "7 2 /" (list 3)) + (forth-check-stack "/ negative numerator" "-7 2 /" (list -3)) + (forth-check-stack "/ both negative" "-7 -2 /" (list 3)) + (forth-check-stack "MOD positive" "7 3 MOD" (list 1)) + (forth-check-stack "MOD negative" "-7 3 MOD" (list -1)) + (forth-check-stack "/MOD positive" "7 3 /MOD" (list 1 2)) + (forth-check-stack "NEGATE" "5 NEGATE" (list -5)) + (forth-check-stack "ABS negative" "-5 ABS" (list 5)) + (forth-check-stack "ABS positive" "5 ABS" (list 5)) + (forth-check-stack "MIN ab" "5 3 MIN" (list 3)) + (forth-check-stack "MAX ab" "5 3 MAX" (list 5)) + (forth-check-stack "1+" "5 1+" (list 6)) + (forth-check-stack "1-" "5 1-" (list 4)) + (forth-check-stack "2+" "5 2+" (list 7)) + (forth-check-stack "2-" "5 2-" (list 3)) + (forth-check-stack "2*" "5 2*" (list 10)) + (forth-check-stack "2/" "7 2/" (list 3)))) + +(define + forth-cmp-tests + (fn + () + (forth-check-stack "= true" "5 5 =" (list -1)) + (forth-check-stack "= false" "5 6 =" (list 0)) + (forth-check-stack "<> true" "5 6 <>" (list -1)) + (forth-check-stack "<> false" "5 5 <>" (list 0)) + (forth-check-stack "< true" "3 5 <" (list -1)) + (forth-check-stack "< false" "5 3 <" (list 0)) + (forth-check-stack "> true" "5 3 >" (list -1)) + (forth-check-stack "> false" "3 5 >" (list 0)) + (forth-check-stack "<= equal" "5 5 <=" (list -1)) + (forth-check-stack "<= less" "3 5 <=" (list -1)) + (forth-check-stack ">= equal" "5 5 >=" (list -1)) + (forth-check-stack ">= greater" "5 3 >=" (list -1)) + (forth-check-stack "0= true" "0 0=" (list -1)) + (forth-check-stack "0= false" "1 0=" (list 0)) + (forth-check-stack "0<> true" "1 0<>" (list -1)) + (forth-check-stack "0<> false" "0 0<>" (list 0)) + (forth-check-stack "0< true" "-5 0<" (list -1)) + (forth-check-stack "0< false" "5 0<" (list 0)) + (forth-check-stack "0> true" "5 0>" (list -1)) + (forth-check-stack "0> false" "-5 0>" (list 0)))) + +(define + forth-bitwise-tests + (fn + () + (forth-check-stack "AND flags" "-1 0 AND" (list 0)) + (forth-check-stack "AND flags 2" "-1 -1 AND" (list -1)) + (forth-check-stack "AND 12 10" "12 10 AND" (list 8)) + (forth-check-stack "OR flags" "-1 0 OR" (list -1)) + (forth-check-stack "OR 12 10" "12 10 OR" (list 14)) + (forth-check-stack "XOR 12 10" "12 10 XOR" (list 6)) + (forth-check-stack "XOR same" "15 15 XOR" (list 0)) + (forth-check-stack "INVERT 0" "0 INVERT" (list -1)) + (forth-check-stack "INVERT 5" "5 INVERT" (list -6)) + (forth-check-stack "double INVERT" "7 INVERT INVERT" (list 7)))) + +(define + forth-io-tests + (fn + () + (forth-check-output "." "42 ." "42 ") + (forth-check-output ". two values" "1 2 . ." "2 1 ") + (forth-check-output ".S empty" ".S" "<0> ") + (forth-check-output ".S three" "1 2 3 .S" "<3> 1 2 3 ") + (forth-check-output "EMIT A" "65 EMIT" "A") + (forth-check-output "CR" "CR" "\n") + (forth-check-output "SPACE" "SPACE" " ") + (forth-check-output "SPACES 3" "3 SPACES" " ") + (forth-check-output "SPACES 0" "0 SPACES" "") + (forth-check-stack "BL" "BL" (list 32)))) + +(define + forth-case-tests + (fn + () + (forth-check-stack "case-insensitive DUP" "5 dup" (list 5 5)) + (forth-check-stack "case-insensitive SWAP" "1 2 Swap" (list 2 1)))) + +(define + forth-mixed-tests + (fn + () + (forth-check-stack "chained arith" "1 2 3 + +" (list 6)) + (forth-check-stack "(3+4)*2" "3 4 + 2 *" (list 14)) + (forth-check-stack "max of three" "5 3 MAX 7 MAX" (list 7)) + (forth-check-stack "abs chain" "-5 ABS 1+" (list 6)) + (forth-check-stack "swap then add" "5 7 SWAP -" (list 2)) + (forth-check-stack "hex literal" "$10 $20 +" (list 48)) + (forth-check-stack "binary literal" "%1010 %0011 +" (list 13)))) + +(define + forth-run-all-phase1-tests + (fn + () + (set! forth-tests-passed 0) + (set! forth-tests-failed 0) + (set! forth-tests-failures (list)) + (forth-reader-tests) + (forth-stack-tests) + (forth-arith-tests) + (forth-cmp-tests) + (forth-bitwise-tests) + (forth-io-tests) + (forth-case-tests) + (forth-mixed-tests) + (dict + "passed" + forth-tests-passed + "failed" + forth-tests-failed + "failures" + forth-tests-failures))) diff --git a/lib/forth/tests/test-phase2.sx b/lib/forth/tests/test-phase2.sx new file mode 100644 index 00000000..a4d7a841 --- /dev/null +++ b/lib/forth/tests/test-phase2.sx @@ -0,0 +1,146 @@ +;; Phase 2 — colon definitions + compile mode + variables/values/fetch/store. + +(define forth-p2-passed 0) +(define forth-p2-failed 0) +(define forth-p2-failures (list)) + +(define + forth-p2-assert + (fn + (label expected actual) + (if + (= expected actual) + (set! forth-p2-passed (+ forth-p2-passed 1)) + (begin + (set! forth-p2-failed (+ forth-p2-failed 1)) + (set! + forth-p2-failures + (concat + forth-p2-failures + (list + (str label ": expected " (str expected) " got " (str actual))))))))) + +(define + forth-p2-check-stack + (fn + (label src expected) + (let ((r (forth-run src))) (forth-p2-assert label expected (nth r 2))))) + +(define + forth-p2-check-output + (fn + (label src expected) + (let ((r (forth-run src))) (forth-p2-assert label expected (nth r 1))))) + +(define + forth-p2-colon-tests + (fn + () + (forth-p2-check-stack "simple colon" ": DOUBLE 2 * ; 7 DOUBLE" (list 14)) + (forth-p2-check-stack "three-op body" ": ADD3 + + ; 1 2 3 ADD3" (list 6)) + (forth-p2-check-stack + "nested call" + ": SQR DUP * ; : SOS SQR SWAP SQR + ; 3 4 SOS" + (list 25)) + (forth-p2-check-stack + "deep chain" + ": D 2 ; : B D ; : A B D * ; A" + (list 4)) + (forth-p2-check-stack + "colon uses literal" + ": FOO 1 2 + ; FOO FOO +" + (list 6)) + (forth-p2-check-stack "case-insensitive def" ": BAR 9 ; bar" (list 9)) + (forth-p2-check-stack + "redefinition picks newest" + ": F 1 ; : F 2 ; F" + (list 2)) + (forth-p2-check-stack + "negative literal in def" + ": NEG5 -5 ; NEG5" + (list -5)) + (forth-p2-check-stack "hex literal in def" ": X $10 ; X" (list 16)))) + +(define + forth-p2-var-tests + (fn + () + (forth-p2-check-stack "VARIABLE + !, @" "VARIABLE X 42 X ! X @" (list 42)) + (forth-p2-check-stack "uninitialised @ is 0" "VARIABLE Y Y @" (list 0)) + (forth-p2-check-stack + "two variables" + "VARIABLE A VARIABLE B 1 A ! 2 B ! A @ B @ +" + (list 3)) + (forth-p2-check-stack + "+! increments" + "VARIABLE X 10 X ! 5 X +! X @" + (list 15)) + (forth-p2-check-stack + "+! multiple" + "VARIABLE X 0 X ! 1 X +! 2 X +! 3 X +! X @" + (list 6)))) + +(define + forth-p2-const-tests + (fn + () + (forth-p2-check-stack "CONSTANT" "100 CONSTANT C C" (list 100)) + (forth-p2-check-stack + "CONSTANT used twice" + "5 CONSTANT FIVE FIVE FIVE *" + (list 25)) + (forth-p2-check-stack + "CONSTANT in colon" + "3 CONSTANT T : TRIPLE T * ; 7 TRIPLE" + (list 21)))) + +(define + forth-p2-value-tests + (fn + () + (forth-p2-check-stack "VALUE initial" "50 VALUE V V" (list 50)) + (forth-p2-check-stack "TO overwrites" "50 VALUE V 99 TO V V" (list 99)) + (forth-p2-check-stack "TO twice" "1 VALUE V 2 TO V 3 TO V V" (list 3)) + (forth-p2-check-stack "VALUE in arithmetic" "7 VALUE V V 3 +" (list 10)))) + +(define + forth-p2-io-tests + (fn + () + (forth-p2-check-output + "colon prints" + ": HELLO 72 EMIT 73 EMIT ; HELLO" + "HI") + (forth-p2-check-output "colon CR" ": LINE 42 . CR ; LINE" "42 \n"))) + +(define + forth-p2-mode-tests + (fn + () + (forth-p2-check-stack "empty colon body" ": NOP ; 5 NOP" (list 5)) + (forth-p2-check-stack + "colon using DUP" + ": TWICE DUP ; 9 TWICE" + (list 9 9)) + (forth-p2-check-stack "IMMEDIATE NOP" ": X ; X" (list)))) + +(define + forth-p2-run-all + (fn + () + (set! forth-p2-passed 0) + (set! forth-p2-failed 0) + (set! forth-p2-failures (list)) + (forth-p2-colon-tests) + (forth-p2-var-tests) + (forth-p2-const-tests) + (forth-p2-value-tests) + (forth-p2-io-tests) + (forth-p2-mode-tests) + (dict + "passed" + forth-p2-passed + "failed" + forth-p2-failed + "failures" + forth-p2-failures))) diff --git a/lib/haskell/test.sh b/lib/haskell/test.sh new file mode 100755 index 00000000..892194d4 --- /dev/null +++ b/lib/haskell/test.sh @@ -0,0 +1,104 @@ +#!/usr/bin/env bash +# Fast Haskell-on-SX test runner — pipes directly to sx_server.exe. +# No MCP, no Docker. All tests live in lib/haskell/tests/*.sx and +# produce a summary dict at the end of each file. +# +# Usage: +# bash lib/haskell/test.sh # run all tests +# bash lib/haskell/test.sh -v # verbose — show each file's pass/fail +# bash lib/haskell/test.sh tests/parse.sx # run one file + +set -euo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="hosts/ocaml/_build/default/bin/sx_server.exe" +if [ ! -x "$SX_SERVER" ]; then + # Fall back to the main-repo build if we're in a worktree. + MAIN_ROOT=$(git worktree list | head -1 | awk '{print $1}') + if [ -x "$MAIN_ROOT/$SX_SERVER" ]; then + SX_SERVER="$MAIN_ROOT/$SX_SERVER" + else + echo "ERROR: sx_server.exe not found. Run: cd hosts/ocaml && dune build" + exit 1 + fi +fi + +VERBOSE="" +FILES=() +for arg in "$@"; do + case "$arg" in + -v|--verbose) VERBOSE=1 ;; + *) FILES+=("$arg") ;; + esac +done + +if [ ${#FILES[@]} -eq 0 ]; then + mapfile -t FILES < <(find lib/haskell/tests -maxdepth 2 -name '*.sx' | sort) +fi + +TOTAL_PASS=0 +TOTAL_FAIL=0 +FAILED_FILES=() + +for FILE in "${FILES[@]}"; do + [ -f "$FILE" ] || { echo "skip $FILE (not found)"; continue; } + TMPFILE=$(mktemp) + cat > "$TMPFILE" <&1 || true) + rm -f "$TMPFILE" + + # Output format: either "(ok 3 (P F))" on one line (short result) or + # "(ok-len 3 N)\n(P F)" where the value appears on the following line. + LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}') + if [ -z "$LINE" ]; then + LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \ + | sed -E 's/^\(ok 3 //; s/\)$//') + fi + if [ -z "$LINE" ]; then + echo "✗ $FILE: could not extract summary" + echo "$OUTPUT" | tail -20 + TOTAL_FAIL=$((TOTAL_FAIL + 1)) + FAILED_FILES+=("$FILE") + continue + fi + P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/') + F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/') + TOTAL_PASS=$((TOTAL_PASS + P)) + TOTAL_FAIL=$((TOTAL_FAIL + F)) + if [ "$F" -gt 0 ]; then + FAILED_FILES+=("$FILE") + printf '✗ %-40s %d/%d\n' "$FILE" "$P" "$((P+F))" + # Print failure names + TMPFILE2=$(mktemp) + cat > "$TMPFILE2" <&1 | grep -E '^\(ok 3 ' || true) + rm -f "$TMPFILE2" + echo " $FAILS" + elif [ "$VERBOSE" = "1" ]; then + printf '✓ %-40s %d passed\n' "$FILE" "$P" + fi +done + +TOTAL=$((TOTAL_PASS + TOTAL_FAIL)) +if [ $TOTAL_FAIL -eq 0 ]; then + echo "✓ $TOTAL_PASS/$TOTAL haskell-on-sx tests passed" +else + echo "✗ $TOTAL_PASS/$TOTAL passed, $TOTAL_FAIL failed in: ${FAILED_FILES[*]}" +fi + +[ $TOTAL_FAIL -eq 0 ] diff --git a/lib/haskell/tests/parse.sx b/lib/haskell/tests/parse.sx new file mode 100644 index 00000000..7b9c9da1 --- /dev/null +++ b/lib/haskell/tests/parse.sx @@ -0,0 +1,251 @@ +;; Haskell parser / tokenizer tests. +;; +;; Lightweight runner: each test checks actual vs expected with +;; structural (deep) equality and accumulates pass/fail counters. +;; Final value of this file is a summary dict with :pass :fail :fails. + +(define + hk-deep=? + (fn + (a b) + (cond + ((= a b) true) + ((and (dict? a) (dict? b)) + (let + ((ak (keys a)) (bk (keys b))) + (if + (not (= (len ak) (len bk))) + false + (every? + (fn + (k) + (and (has-key? b k) (hk-deep=? (get a k) (get b k)))) + ak)))) + ((and (list? a) (list? b)) + (if + (not (= (len a) (len b))) + false + (let + ((i 0) (ok true)) + (define + hk-de-loop + (fn + () + (when + (and ok (< i (len a))) + (do + (when + (not (hk-deep=? (nth a i) (nth b i))) + (set! ok false)) + (set! i (+ i 1)) + (hk-de-loop))))) + (hk-de-loop) + ok))) + (:else false)))) + +(define hk-test-pass 0) +(define hk-test-fail 0) +(define hk-test-fails (list)) + +(define + hk-test + (fn + (name actual expected) + (if + (hk-deep=? actual expected) + (set! hk-test-pass (+ hk-test-pass 1)) + (do + (set! hk-test-fail (+ hk-test-fail 1)) + (append! hk-test-fails {:actual actual :expected expected :name name}))))) + +;; Convenience: tokenize and drop newline + eof tokens so tests focus +;; on meaningful content. Returns list of {:type :value} pairs. +(define + hk-toks + (fn + (src) + (map + (fn (tok) {:value (get tok "value") :type (get tok "type")}) + (filter + (fn + (tok) + (let + ((ty (get tok "type"))) + (not (or (= ty "newline") (= ty "eof"))))) + (hk-tokenize src))))) + +;; ── 1. Identifiers & reserved words ── +(hk-test "varid simple" (hk-toks "foo") (list {:value "foo" :type "varid"})) + +(hk-test + "varid with digits and prime" + (hk-toks "foo123' bar2") + (list {:value "foo123'" :type "varid"} {:value "bar2" :type "varid"})) + +(hk-test "conid" (hk-toks "Maybe") (list {:value "Maybe" :type "conid"})) + +(hk-test "reserved: where" (hk-toks "where") (list {:value "where" :type "reserved"})) + +(hk-test + "reserved: case of" + (hk-toks "case of") + (list {:value "case" :type "reserved"} {:value "of" :type "reserved"})) + +(hk-test "underscore is reserved" (hk-toks "_") (list {:value "_" :type "reserved"})) + +;; ── 2. Qualified names ── +(hk-test "qvarid" (hk-toks "Data.Map.lookup") (list {:value "Data.Map.lookup" :type "qvarid"})) + +(hk-test "qconid" (hk-toks "Data.Map") (list {:value "Data.Map" :type "qconid"})) + +(hk-test "qualified operator" (hk-toks "Prelude.+") (list {:value "Prelude.+" :type "varsym"})) + +;; ── 3. Numbers ── +(hk-test "integer" (hk-toks "42") (list {:value 42 :type "integer"})) + +(hk-test "hex" (hk-toks "0x2A") (list {:value 42 :type "integer"})) + +(hk-test "octal" (hk-toks "0o17") (list {:value 15 :type "integer"})) + +(hk-test "float" (hk-toks "3.14") (list {:value 3.14 :type "float"})) + +(hk-test "float with exp" (hk-toks "1.5e-3") (list {:value 0.0015 :type "float"})) + +;; ── 4. Strings / chars ── +(hk-test "string" (hk-toks "\"hello\"") (list {:value "hello" :type "string"})) + +(hk-test "char" (hk-toks "'a'") (list {:value "a" :type "char"})) + +(hk-test "char escape newline" (hk-toks "'\\n'") (list {:value "\n" :type "char"})) + +(hk-test "string escape" (hk-toks "\"a\\nb\"") (list {:value "a\nb" :type "string"})) + +;; ── 5. Operators ── +(hk-test "operator +" (hk-toks "+") (list {:value "+" :type "varsym"})) + +(hk-test "operator >>=" (hk-toks ">>=") (list {:value ">>=" :type "varsym"})) + +(hk-test "consym" (hk-toks ":+:") (list {:value ":+:" :type "consym"})) + +(hk-test "reservedop ->" (hk-toks "->") (list {:value "->" :type "reservedop"})) + +(hk-test "reservedop =>" (hk-toks "=>") (list {:value "=>" :type "reservedop"})) + +(hk-test "reservedop .. (range)" (hk-toks "..") (list {:value ".." :type "reservedop"})) + +(hk-test "reservedop backslash" (hk-toks "\\") (list {:value "\\" :type "reservedop"})) + +;; ── 6. Punctuation ── +(hk-test "parens" (hk-toks "( )") (list {:value "(" :type "lparen"} {:value ")" :type "rparen"})) + +(hk-test "brackets" (hk-toks "[]") (list {:value "[" :type "lbracket"} {:value "]" :type "rbracket"})) + +(hk-test "braces" (hk-toks "{}") (list {:value "{" :type "lbrace"} {:value "}" :type "rbrace"})) + +(hk-test + "backtick" + (hk-toks "`mod`") + (list {:value "`" :type "backtick"} {:value "mod" :type "varid"} {:value "`" :type "backtick"})) + +(hk-test "comma and semi" (hk-toks ",;") (list {:value "," :type "comma"} {:value ";" :type "semi"})) + +;; ── 7. Comments ── +(hk-test "line comment stripped" (hk-toks "-- a comment") (list)) + +(hk-test "line comment before code" (hk-toks "-- c\nfoo") (list {:value "foo" :type "varid"})) + +(hk-test + "block comment stripped" + (hk-toks "{- block -} foo") + (list {:value "foo" :type "varid"})) + +(hk-test + "nested block comment" + (hk-toks "{- {- nested -} -} x") + (list {:value "x" :type "varid"})) + +(hk-test + "-- inside operator is comment in Haskell" + (hk-toks "-->") + (list {:value "-->" :type "varsym"})) + +;; ── 8. Mixed declarations ── +(hk-test + "type signature" + (hk-toks "main :: IO ()") + (list {:value "main" :type "varid"} {:value "::" :type "reservedop"} {:value "IO" :type "conid"} {:value "(" :type "lparen"} {:value ")" :type "rparen"})) + +(hk-test + "data declaration" + (hk-toks "data Maybe a = Nothing | Just a") + (list + {:value "data" :type "reserved"} + {:value "Maybe" :type "conid"} + {:value "a" :type "varid"} + {:value "=" :type "reservedop"} + {:value "Nothing" :type "conid"} + {:value "|" :type "reservedop"} + {:value "Just" :type "conid"} + {:value "a" :type "varid"})) + +(hk-test + "lambda" + (hk-toks "\\x -> x + 1") + (list {:value "\\" :type "reservedop"} {:value "x" :type "varid"} {:value "->" :type "reservedop"} {:value "x" :type "varid"} {:value "+" :type "varsym"} {:value 1 :type "integer"})) + +(hk-test + "let expression" + (hk-toks "let x = 1 in x + x") + (list + {:value "let" :type "reserved"} + {:value "x" :type "varid"} + {:value "=" :type "reservedop"} + {:value 1 :type "integer"} + {:value "in" :type "reserved"} + {:value "x" :type "varid"} + {:value "+" :type "varsym"} + {:value "x" :type "varid"})) + +(hk-test + "case expr" + (hk-toks "case x of Just y -> y") + (list + {:value "case" :type "reserved"} + {:value "x" :type "varid"} + {:value "of" :type "reserved"} + {:value "Just" :type "conid"} + {:value "y" :type "varid"} + {:value "->" :type "reservedop"} + {:value "y" :type "varid"})) + +(hk-test + "list literal" + (hk-toks "[1, 2, 3]") + (list + {:value "[" :type "lbracket"} + {:value 1 :type "integer"} + {:value "," :type "comma"} + {:value 2 :type "integer"} + {:value "," :type "comma"} + {:value 3 :type "integer"} + {:value "]" :type "rbracket"})) + +(hk-test + "range syntax" + (hk-toks "[1..10]") + (list {:value "[" :type "lbracket"} {:value 1 :type "integer"} {:value ".." :type "reservedop"} {:value 10 :type "integer"} {:value "]" :type "rbracket"})) + +;; ── 9. Positions ── +(hk-test + "line/col positions" + (let + ((toks (hk-tokenize "foo\n bar"))) + (list + (get (nth toks 0) "line") + (get (nth toks 0) "col") + (get (nth toks 2) "line") + (get (nth toks 2) "col"))) + (list 1 1 2 3)) + +;; ── Summary — final value of this file ── +{:fails hk-test-fails :pass hk-test-pass :fail hk-test-fail} diff --git a/lib/haskell/tokenizer.sx b/lib/haskell/tokenizer.sx new file mode 100644 index 00000000..fe2608ff --- /dev/null +++ b/lib/haskell/tokenizer.sx @@ -0,0 +1,628 @@ +;; Haskell tokenizer — produces a token stream from Haskell 98 source. +;; +;; Tokens: {:type T :value V :line L :col C} +;; +;; Types: +;; "varid" lowercase ident, e.g. fmap, x, myFunc +;; "conid" uppercase ident, e.g. Nothing, Just, Map +;; "qvarid" qualified varid, value holds raw "A.B.foo" +;; "qconid" qualified conid, e.g. "Data.Map" +;; "reserved" reserved word — value is the word +;; "varsym" operator symbol, e.g. +, ++, >>= +;; "consym" constructor operator (starts with :), e.g. :, :+ +;; "reservedop" reserved operator ("::", "=", "->", "<-", "=>", "|", "\\", "@", "~", "..") +;; "integer" integer literal (number) +;; "float" float literal (number) +;; "char" char literal (string of length 1) +;; "string" string literal +;; "lparen" "rparen" "lbracket" "rbracket" "lbrace" "rbrace" +;; "vlbrace" "vrbrace" "vsemi" virtual layout tokens (inserted later) +;; "comma" "semi" "backtick" +;; "newline" a logical line break (used by layout pass; stripped afterwards) +;; "eof" +;; +;; Note: SX `cond`/`when` clauses evaluate ONLY their last expression. +;; Multi-expression bodies must be wrapped in (do ...). All helpers use +;; the hk- prefix to avoid clashing with SX evaluator special forms. + +;; ── Char-code table ─────────────────────────────────────────────── +(define + hk-ord-table + (let + ((t (dict)) (i 0)) + (define + hk-build-table + (fn + () + (when + (< i 128) + (do + (dict-set! t (char-from-code i) i) + (set! i (+ i 1)) + (hk-build-table))))) + (hk-build-table) + t)) + +(define hk-ord (fn (c) (or (get hk-ord-table c) 0))) + +;; ── Character predicates ────────────────────────────────────────── +(define + hk-digit? + (fn (c) (and (string? c) (>= (hk-ord c) 48) (<= (hk-ord c) 57)))) + +(define + hk-hex-digit? + (fn + (c) + (and + (string? c) + (or + (and (>= (hk-ord c) 48) (<= (hk-ord c) 57)) + (and (>= (hk-ord c) 97) (<= (hk-ord c) 102)) + (and (>= (hk-ord c) 65) (<= (hk-ord c) 70)))))) + +(define + hk-octal-digit? + (fn (c) (and (string? c) (>= (hk-ord c) 48) (<= (hk-ord c) 55)))) + +(define + hk-lower? + (fn + (c) + (and + (string? c) + (or (and (>= (hk-ord c) 97) (<= (hk-ord c) 122)) (= c "_"))))) + +(define + hk-upper? + (fn (c) (and (string? c) (>= (hk-ord c) 65) (<= (hk-ord c) 90)))) + +(define hk-alpha? (fn (c) (or (hk-lower? c) (hk-upper? c)))) + +(define + hk-ident-char? + (fn (c) (or (hk-alpha? c) (hk-digit? c) (= c "'")))) + +(define + hk-symbol-char? + (fn + (c) + (or + (= c "!") + (= c "#") + (= c "$") + (= c "%") + (= c "&") + (= c "*") + (= c "+") + (= c ".") + (= c "/") + (= c "<") + (= c "=") + (= c ">") + (= c "?") + (= c "@") + (= c "\\") + (= c "^") + (= c "|") + (= c "-") + (= c "~") + (= c ":")))) + +(define hk-space? (fn (c) (or (= c " ") (= c "\t")))) + +;; ── Hex / oct parser (parse-int is decimal only) ────────────────── +(define + hk-parse-radix + (fn + (s radix) + (let + ((n-len (len s)) (idx 0) (acc 0)) + (define + hk-rad-loop + (fn + () + (when + (< idx n-len) + (do + (let + ((c (substring s idx (+ idx 1)))) + (cond + ((and (>= (hk-ord c) 48) (<= (hk-ord c) 57)) + (set! acc (+ (* acc radix) (- (hk-ord c) 48)))) + ((and (>= (hk-ord c) 97) (<= (hk-ord c) 102)) + (set! acc (+ (* acc radix) (+ 10 (- (hk-ord c) 97))))) + ((and (>= (hk-ord c) 65) (<= (hk-ord c) 70)) + (set! acc (+ (* acc radix) (+ 10 (- (hk-ord c) 65))))))) + (set! idx (+ idx 1)) + (hk-rad-loop))))) + (hk-rad-loop) + acc))) + +(define + hk-parse-float + (fn + (s) + (let + ((n-len (len s)) + (idx 0) + (sign 1) + (int-part 0) + (frac-part 0) + (frac-div 1) + (exp-sign 1) + (exp-val 0) + (has-exp false)) + (when + (and (< idx n-len) (= (substring s idx (+ idx 1)) "-")) + (do (set! sign -1) (set! idx (+ idx 1)))) + (when + (and (< idx n-len) (= (substring s idx (+ idx 1)) "+")) + (set! idx (+ idx 1))) + (define + hk-int-loop + (fn + () + (when + (and (< idx n-len) (hk-digit? (substring s idx (+ idx 1)))) + (do + (set! + int-part + (+ (* int-part 10) (parse-int (substring s idx (+ idx 1))))) + (set! idx (+ idx 1)) + (hk-int-loop))))) + (hk-int-loop) + (when + (and (< idx n-len) (= (substring s idx (+ idx 1)) ".")) + (do + (set! idx (+ idx 1)) + (define + hk-frac-loop + (fn + () + (when + (and (< idx n-len) (hk-digit? (substring s idx (+ idx 1)))) + (do + (set! frac-div (* frac-div 10)) + (set! + frac-part + (+ + frac-part + (/ (parse-int (substring s idx (+ idx 1))) frac-div))) + (set! idx (+ idx 1)) + (hk-frac-loop))))) + (hk-frac-loop))) + (when + (and + (< idx n-len) + (let + ((c (substring s idx (+ idx 1)))) + (or (= c "e") (= c "E")))) + (do + (set! has-exp true) + (set! idx (+ idx 1)) + (cond + ((and (< idx n-len) (= (substring s idx (+ idx 1)) "-")) + (do (set! exp-sign -1) (set! idx (+ idx 1)))) + ((and (< idx n-len) (= (substring s idx (+ idx 1)) "+")) + (set! idx (+ idx 1)))) + (define + hk-exp-loop + (fn + () + (when + (and (< idx n-len) (hk-digit? (substring s idx (+ idx 1)))) + (do + (set! + exp-val + (+ + (* exp-val 10) + (parse-int (substring s idx (+ idx 1))))) + (set! idx (+ idx 1)) + (hk-exp-loop))))) + (hk-exp-loop))) + (let + ((base (* sign (+ int-part frac-part)))) + (if has-exp (* base (pow 10 (* exp-sign exp-val))) base))))) + +;; ── Reserved words / ops ────────────────────────────────────────── +(define + hk-reserved-words + (list + "case" + "class" + "data" + "default" + "deriving" + "do" + "else" + "foreign" + "if" + "import" + "in" + "infix" + "infixl" + "infixr" + "instance" + "let" + "module" + "newtype" + "of" + "then" + "type" + "where" + "_")) + +(define hk-reserved? (fn (w) (contains? hk-reserved-words w))) + +(define + hk-reserved-ops + (list ".." ":" "::" "=" "\\" "|" "<-" "->" "@" "~" "=>")) + +(define hk-reserved-op? (fn (w) (contains? hk-reserved-ops w))) + +;; ── Token constructor ───────────────────────────────────────────── +(define hk-make-token (fn (type value line col) {:line line :value value :col col :type type})) + +;; ── Main tokenizer ──────────────────────────────────────────────── +(define + hk-tokenize + (fn + (src) + (let + ((tokens (list)) (pos 0) (line 1) (col 1) (src-len (len src))) + (define + hk-peek + (fn + (offset) + (if + (< (+ pos offset) src-len) + (substring src (+ pos offset) (+ pos offset 1)) + nil))) + (define hk-cur (fn () (hk-peek 0))) + (define + hk-advance! + (fn + () + (let + ((c (hk-cur))) + (set! pos (+ pos 1)) + (if + (= c "\n") + (do (set! line (+ line 1)) (set! col 1)) + (set! col (+ col 1)))))) + (define + hk-advance-n! + (fn + (n) + (when (> n 0) (do (hk-advance!) (hk-advance-n! (- n 1)))))) + (define + hk-push! + (fn + (type value tok-line tok-col) + (append! tokens (hk-make-token type value tok-line tok-col)))) + (define + hk-read-while + (fn + (pred) + (let + ((start pos)) + (define + hk-rw-loop + (fn + () + (when + (and (< pos src-len) (pred (hk-cur))) + (do (hk-advance!) (hk-rw-loop))))) + (hk-rw-loop) + (substring src start pos)))) + (define + hk-skip-line-comment! + (fn + () + (define + hk-slc-loop + (fn + () + (when + (and (< pos src-len) (not (= (hk-cur) "\n"))) + (do (hk-advance!) (hk-slc-loop))))) + (hk-slc-loop))) + (define + hk-skip-block-comment! + (fn + () + (hk-advance-n! 2) + (let + ((depth 1)) + (define + hk-sbc-loop + (fn + () + (cond + ((>= pos src-len) nil) + ((and (= (hk-cur) "{") (= (hk-peek 1) "-")) + (do + (hk-advance-n! 2) + (set! depth (+ depth 1)) + (hk-sbc-loop))) + ((and (= (hk-cur) "-") (= (hk-peek 1) "}")) + (do + (hk-advance-n! 2) + (set! depth (- depth 1)) + (when (> depth 0) (hk-sbc-loop)))) + (:else (do (hk-advance!) (hk-sbc-loop)))))) + (hk-sbc-loop)))) + (define + hk-read-escape + (fn + () + (hk-advance!) + (let + ((c (hk-cur))) + (cond + ((= c "n") (do (hk-advance!) "\n")) + ((= c "t") (do (hk-advance!) "\t")) + ((= c "r") (do (hk-advance!) "\r")) + ((= c "\\") (do (hk-advance!) "\\")) + ((= c "'") (do (hk-advance!) "'")) + ((= c "\"") (do (hk-advance!) "\"")) + ((= c "0") (do (hk-advance!) (char-from-code 0))) + ((= c "a") (do (hk-advance!) (char-from-code 7))) + ((= c "b") (do (hk-advance!) (char-from-code 8))) + ((= c "f") (do (hk-advance!) (char-from-code 12))) + ((= c "v") (do (hk-advance!) (char-from-code 11))) + ((hk-digit? c) + (let + ((digits (hk-read-while hk-digit?))) + (char-from-code (parse-int digits)))) + (:else (do (hk-advance!) (str "\\" c))))))) + (define + hk-read-string + (fn + () + (let + ((parts (list))) + (hk-advance!) + (define + hk-rs-loop + (fn + () + (cond + ((>= pos src-len) nil) + ((= (hk-cur) "\"") (hk-advance!)) + ((= (hk-cur) "\\") + (do (append! parts (hk-read-escape)) (hk-rs-loop))) + (:else + (do + (append! parts (hk-cur)) + (hk-advance!) + (hk-rs-loop)))))) + (hk-rs-loop) + (join "" parts)))) + (define + hk-read-char-lit + (fn + () + (hk-advance!) + (let + ((c (if (= (hk-cur) "\\") (hk-read-escape) (let ((ch (hk-cur))) (hk-advance!) ch)))) + (when (= (hk-cur) "'") (hk-advance!)) + c))) + (define + hk-read-number + (fn + (tok-line tok-col) + (let + ((start pos)) + (cond + ((and (= (hk-cur) "0") (or (= (hk-peek 1) "x") (= (hk-peek 1) "X"))) + (do + (hk-advance-n! 2) + (let + ((hex-start pos)) + (hk-read-while hk-hex-digit?) + (hk-push! + "integer" + (hk-parse-radix (substring src hex-start pos) 16) + tok-line + tok-col)))) + ((and (= (hk-cur) "0") (or (= (hk-peek 1) "o") (= (hk-peek 1) "O"))) + (do + (hk-advance-n! 2) + (let + ((oct-start pos)) + (hk-read-while hk-octal-digit?) + (hk-push! + "integer" + (hk-parse-radix (substring src oct-start pos) 8) + tok-line + tok-col)))) + (:else + (do + (hk-read-while hk-digit?) + (let + ((is-float false)) + (when + (and (= (hk-cur) ".") (hk-digit? (hk-peek 1))) + (do + (set! is-float true) + (hk-advance!) + (hk-read-while hk-digit?))) + (when + (or (= (hk-cur) "e") (= (hk-cur) "E")) + (do + (set! is-float true) + (hk-advance!) + (when + (or (= (hk-cur) "+") (= (hk-cur) "-")) + (hk-advance!)) + (hk-read-while hk-digit?))) + (let + ((num-str (substring src start pos))) + (if + is-float + (hk-push! + "float" + (hk-parse-float num-str) + tok-line + tok-col) + (hk-push! + "integer" + (parse-int num-str) + tok-line + tok-col)))))))))) + (define + hk-read-qualified! + (fn + (tok-line tok-col) + (let + ((parts (list)) (w (hk-read-while hk-ident-char?))) + (append! parts w) + (let + ((emitted false)) + (define + hk-rq-loop + (fn + () + (when + (and + (not emitted) + (= (hk-cur) ".") + (or + (hk-upper? (hk-peek 1)) + (hk-lower? (hk-peek 1)) + (hk-symbol-char? (hk-peek 1)))) + (let + ((next (hk-peek 1))) + (cond + ((hk-upper? next) + (do + (hk-advance!) + (append! parts ".") + (append! parts (hk-read-while hk-ident-char?)) + (hk-rq-loop))) + ((hk-lower? next) + (do + (hk-advance!) + (set! emitted true) + (hk-push! + "qvarid" + (str + (join "" parts) + "." + (hk-read-while hk-ident-char?)) + tok-line + tok-col))) + ((hk-symbol-char? next) + (do + (hk-advance!) + (set! emitted true) + (hk-push! + "varsym" + (str + (join "" parts) + "." + (hk-read-while hk-symbol-char?)) + tok-line + tok-col)))))))) + (hk-rq-loop) + (when + (not emitted) + (let + ((full (join "" parts))) + (if + (string-contains? full ".") + (hk-push! "qconid" full tok-line tok-col) + (hk-push! "conid" full tok-line tok-col)))))))) + (define + hk-scan! + (fn + () + (cond + ((>= pos src-len) nil) + ((hk-space? (hk-cur)) (do (hk-advance!) (hk-scan!))) + ((= (hk-cur) "\n") + (do + (let + ((l line) (c col)) + (hk-advance!) + (hk-push! "newline" nil l c)) + (hk-scan!))) + ((and (= (hk-cur) "{") (= (hk-peek 1) "-")) + (do (hk-skip-block-comment!) (hk-scan!))) + ((and (= (hk-cur) "-") (= (hk-peek 1) "-") (let ((p2 (hk-peek 2))) (or (nil? p2) (= p2 "\n") (not (hk-symbol-char? p2))))) + (do (hk-skip-line-comment!) (hk-scan!))) + ((= (hk-cur) "\"") + (do + (let + ((l line) (c col)) + (hk-push! "string" (hk-read-string) l c)) + (hk-scan!))) + ((= (hk-cur) "'") + (do + (let + ((l line) (c col)) + (hk-push! "char" (hk-read-char-lit) l c)) + (hk-scan!))) + ((hk-digit? (hk-cur)) + (do (hk-read-number line col) (hk-scan!))) + ((hk-lower? (hk-cur)) + (do + (let + ((l line) (c col)) + (let + ((w (hk-read-while hk-ident-char?))) + (if + (hk-reserved? w) + (hk-push! "reserved" w l c) + (hk-push! "varid" w l c)))) + (hk-scan!))) + ((hk-upper? (hk-cur)) + (do + (let ((l line) (c col)) (hk-read-qualified! l c)) + (hk-scan!))) + ((= (hk-cur) "(") + (do (hk-push! "lparen" "(" line col) (hk-advance!) (hk-scan!))) + ((= (hk-cur) ")") + (do (hk-push! "rparen" ")" line col) (hk-advance!) (hk-scan!))) + ((= (hk-cur) "[") + (do + (hk-push! "lbracket" "[" line col) + (hk-advance!) + (hk-scan!))) + ((= (hk-cur) "]") + (do + (hk-push! "rbracket" "]" line col) + (hk-advance!) + (hk-scan!))) + ((= (hk-cur) "{") + (do (hk-push! "lbrace" "{" line col) (hk-advance!) (hk-scan!))) + ((= (hk-cur) "}") + (do (hk-push! "rbrace" "}" line col) (hk-advance!) (hk-scan!))) + ((= (hk-cur) ",") + (do (hk-push! "comma" "," line col) (hk-advance!) (hk-scan!))) + ((= (hk-cur) ";") + (do (hk-push! "semi" ";" line col) (hk-advance!) (hk-scan!))) + ((= (hk-cur) "`") + (do + (hk-push! "backtick" "`" line col) + (hk-advance!) + (hk-scan!))) + ((hk-symbol-char? (hk-cur)) + (do + (let + ((l line) (c col)) + (let + ((first (hk-cur))) + (let + ((w (hk-read-while hk-symbol-char?))) + (cond + ((hk-reserved-op? w) (hk-push! "reservedop" w l c)) + ((= first ":") (hk-push! "consym" w l c)) + (:else (hk-push! "varsym" w l c)))))) + (hk-scan!))) + (:else (do (hk-advance!) (hk-scan!)))))) + (hk-scan!) + (hk-push! "eof" nil line col) + tokens))) diff --git a/lib/lua/parser.sx b/lib/lua/parser.sx new file mode 100644 index 00000000..d604224b --- /dev/null +++ b/lib/lua/parser.sx @@ -0,0 +1,802 @@ +(define lua-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) + +(define lua-tok-value (fn (t) (if (= t nil) nil (get t :value)))) + +(define + lua-binop-prec + (fn + (op) + (cond + ((= op "or") 1) + ((= op "and") 2) + ((= op "<") 3) + ((= op ">") 3) + ((= op "<=") 3) + ((= op ">=") 3) + ((= op "==") 3) + ((= op "~=") 3) + ((= op "..") 5) + ((= op "+") 6) + ((= op "-") 6) + ((= op "*") 7) + ((= op "/") 7) + ((= op "%") 7) + ((= op "^") 10) + (else 0)))) + +(define lua-binop-right? (fn (op) (or (= op "..") (= op "^")))) + +(define + lua-parse + (fn + (src) + (let + ((tokens (lua-tokenize src)) (idx 0) (tok-len 0)) + (begin + (set! tok-len (len tokens)) + (define peek-tok (fn () (nth tokens idx))) + (define + peek-tok-at + (fn (n) (if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil))) + (define advance-tok! (fn () (set! idx (+ idx 1)))) + (define + check-tok? + (fn + (type value) + (let + ((t (peek-tok))) + (and + (= (lua-tok-type t) type) + (or (= value nil) (= (lua-tok-value t) value)))))) + (define + consume! + (fn + (type value) + (if + (check-tok? type value) + (let ((t (peek-tok))) (begin (advance-tok!) t)) + (error + (str + "lua-parse: expected " + type + " " + value + " got " + (lua-tok-type (peek-tok)) + " " + (lua-tok-value (peek-tok))))))) + (define at-keyword? (fn (kw) (check-tok? "keyword" kw))) + (define at-op? (fn (op) (check-tok? "op" op))) + (define + at-binop? + (fn + () + (let + ((t (peek-tok))) + (cond + ((and (= (lua-tok-type t) "keyword") (or (= (lua-tok-value t) "and") (= (lua-tok-value t) "or"))) + true) + ((and (= (lua-tok-type t) "op") (> (lua-binop-prec (lua-tok-value t)) 0)) + true) + (else false))))) + (define parse-expr nil) + (define parse-block nil) + (define parse-prefixexp nil) + (define parse-table-ctor nil) + (define parse-unary nil) + (define parse-args nil) + (define parse-funcbody nil) + (set! + parse-args + (fn + () + (cond + ((at-op? "(") + (begin + (advance-tok!) + (if + (at-op? ")") + (begin (advance-tok!) (list)) + (let + ((args (list (parse-expr)))) + (begin + (define + more + (fn + () + (when + (at-op? ",") + (begin + (advance-tok!) + (append! args (parse-expr)) + (more))))) + (more) + (consume! "op" ")") + args))))) + ((at-op? "{") (list (parse-table-ctor))) + ((check-tok? "string" nil) + (let + ((t (peek-tok))) + (begin + (advance-tok!) + (list (list (quote lua-str) (lua-tok-value t)))))) + (else (error "lua-parse: expected args"))))) + (set! + parse-funcbody + (fn + () + (begin + (consume! "op" "(") + (let + ((params (list)) (is-vararg false)) + (begin + (when + (not (at-op? ")")) + (begin + (cond + ((at-op? "...") + (begin (advance-tok!) (set! is-vararg true))) + (else + (let + ((t (peek-tok))) + (begin + (when + (not (= (lua-tok-type t) "ident")) + (error "lua-parse: expected name in params")) + (append! params (lua-tok-value t)) + (advance-tok!))))) + (define + more-params + (fn + () + (when + (and (at-op? ",") (not is-vararg)) + (begin + (advance-tok!) + (cond + ((at-op? "...") + (begin + (advance-tok!) + (set! is-vararg true))) + (else + (let + ((t (peek-tok))) + (begin + (when + (not (= (lua-tok-type t) "ident")) + (error "lua-parse: expected name")) + (append! params (lua-tok-value t)) + (advance-tok!))))) + (more-params))))) + (more-params))) + (consume! "op" ")") + (let + ((body (parse-block))) + (begin + (consume! "keyword" "end") + (list (quote lua-function) params is-vararg body)))))))) + (define + parse-primary + (fn + () + (let + ((t (peek-tok))) + (cond + ((= (lua-tok-type t) "number") + (begin + (advance-tok!) + (list (quote lua-num) (lua-tok-value t)))) + ((= (lua-tok-type t) "string") + (begin + (advance-tok!) + (list (quote lua-str) (lua-tok-value t)))) + ((and (= (lua-tok-type t) "keyword") (= (lua-tok-value t) "nil")) + (begin (advance-tok!) (list (quote lua-nil)))) + ((and (= (lua-tok-type t) "keyword") (= (lua-tok-value t) "true")) + (begin (advance-tok!) (list (quote lua-true)))) + ((and (= (lua-tok-type t) "keyword") (= (lua-tok-value t) "false")) + (begin (advance-tok!) (list (quote lua-false)))) + ((and (= (lua-tok-type t) "op") (= (lua-tok-value t) "...")) + (begin (advance-tok!) (list (quote lua-vararg)))) + ((and (= (lua-tok-type t) "keyword") (= (lua-tok-value t) "function")) + (begin (advance-tok!) (parse-funcbody))) + ((and (= (lua-tok-type t) "op") (= (lua-tok-value t) "{")) + (parse-table-ctor)) + ((or (= (lua-tok-type t) "ident") (and (= (lua-tok-type t) "op") (= (lua-tok-value t) "("))) + (parse-prefixexp)) + (else + (error + (str + "lua-parse: unexpected " + (lua-tok-type t) + " " + (lua-tok-value t)))))))) + (set! + parse-unary + (fn + () + (cond + ((at-op? "-") + (begin + (advance-tok!) + (list (quote lua-unop) "-" (parse-unary)))) + ((at-op? "#") + (begin + (advance-tok!) + (list (quote lua-unop) "#" (parse-unary)))) + ((at-keyword? "not") + (begin + (advance-tok!) + (list (quote lua-unop) "not" (parse-unary)))) + (else (parse-primary))))) + (define + parse-binop-rhs + (fn + (min-prec lhs) + (begin + (define + loop + (fn + (lhs-acc) + (if + (at-binop?) + (let + ((op (lua-tok-value (peek-tok)))) + (let + ((prec (lua-binop-prec op))) + (if + (< prec min-prec) + lhs-acc + (begin + (advance-tok!) + (let + ((rhs (parse-unary))) + (let + ((next-min (if (lua-binop-right? op) prec (+ prec 1)))) + (let + ((rhs2 (parse-binop-rhs next-min rhs))) + (loop + (list (quote lua-binop) op lhs-acc rhs2))))))))) + lhs-acc))) + (loop lhs))))) + (set! + parse-expr + (fn () (let ((lhs (parse-unary))) (parse-binop-rhs 1 lhs)))) + (set! + parse-prefixexp + (fn + () + (let + ((base nil)) + (begin + (cond + ((check-tok? "ident" nil) + (let + ((t (peek-tok))) + (begin + (advance-tok!) + (set! base (list (quote lua-name) (lua-tok-value t)))))) + ((at-op? "(") + (begin + (advance-tok!) + (set! base (parse-expr)) + (consume! "op" ")"))) + (else (error "lua-parse: expected prefixexp"))) + (define + more + (fn + () + (cond + ((at-op? ".") + (begin + (advance-tok!) + (let + ((t (peek-tok))) + (begin + (when + (not (= (lua-tok-type t) "ident")) + (error "lua-parse: expected name after .")) + (set! + base + (list (quote lua-field) base (lua-tok-value t))) + (advance-tok!) + (more))))) + ((at-op? "[") + (begin + (advance-tok!) + (let + ((e (parse-expr))) + (begin + (consume! "op" "]") + (set! base (list (quote lua-index) base e)) + (more))))) + ((at-op? ":") + (begin + (advance-tok!) + (let + ((t (peek-tok))) + (begin + (when + (not (= (lua-tok-type t) "ident")) + (error "lua-parse: expected name after :")) + (let + ((name (lua-tok-value t))) + (begin + (advance-tok!) + (let + ((args (parse-args))) + (begin + (set! + base + (list + (quote lua-method-call) + base + name + args)) + (more))))))))) + ((or (at-op? "(") (at-op? "{") (check-tok? "string" nil)) + (let + ((args (parse-args))) + (begin + (set! base (list (quote lua-call) base args)) + (more)))) + (else nil)))) + (more) + base)))) + (set! + parse-table-ctor + (fn + () + (begin + (consume! "op" "{") + (let + ((fields (list))) + (begin + (define + parse-field + (fn + () + (cond + ((at-op? "[") + (begin + (advance-tok!) + (let + ((k (parse-expr))) + (begin + (consume! "op" "]") + (consume! "op" "=") + (let + ((v (parse-expr))) + (append! fields (list (quote lua-kv) k v))))))) + ((and (check-tok? "ident" nil) (let ((nxt (peek-tok-at 1))) (and (not (= nxt nil)) (= (lua-tok-type nxt) "op") (= (lua-tok-value nxt) "=")))) + (let + ((t (peek-tok))) + (begin + (advance-tok!) + (advance-tok!) + (let + ((v (parse-expr))) + (append! + fields + (list + (quote lua-kv) + (list (quote lua-str) (lua-tok-value t)) + v)))))) + (else + (let + ((v (parse-expr))) + (append! fields (list (quote lua-pos) v))))))) + (when + (not (at-op? "}")) + (begin + (parse-field) + (define + more-fields + (fn + () + (when + (and + (or (at-op? ",") (at-op? ";")) + (not (at-op? "}"))) + (begin + (advance-tok!) + (when + (not (at-op? "}")) + (begin (parse-field) (more-fields))))))) + (more-fields))) + (consume! "op" "}") + (cons (quote lua-table) fields)))))) + (define + parse-explist + (fn + () + (let + ((exps (list (parse-expr)))) + (begin + (define + more + (fn + () + (when + (at-op? ",") + (begin + (advance-tok!) + (append! exps (parse-expr)) + (more))))) + (more) + exps)))) + (define + parse-namelist + (fn + () + (let + ((names (list))) + (begin + (let + ((t (peek-tok))) + (begin + (when + (not (= (lua-tok-type t) "ident")) + (error "lua-parse: expected name")) + (append! names (lua-tok-value t)) + (advance-tok!))) + (define + more + (fn + () + (when + (at-op? ",") + (begin + (advance-tok!) + (let + ((t (peek-tok))) + (begin + (when + (not (= (lua-tok-type t) "ident")) + (error "lua-parse: expected name")) + (append! names (lua-tok-value t)) + (advance-tok!))) + (more))))) + (more) + names)))) + (define + parse-if + (fn + () + (begin + (consume! "keyword" "if") + (let + ((cnd (parse-expr))) + (begin + (consume! "keyword" "then") + (let + ((then-body (parse-block)) + (elseifs (list)) + (else-body nil)) + (begin + (define + elseif-loop + (fn + () + (when + (at-keyword? "elseif") + (begin + (advance-tok!) + (let + ((c (parse-expr))) + (begin + (consume! "keyword" "then") + (let + ((b (parse-block))) + (begin + (append! elseifs (list c b)) + (elseif-loop))))))))) + (elseif-loop) + (when + (at-keyword? "else") + (begin (advance-tok!) (set! else-body (parse-block)))) + (consume! "keyword" "end") + (list (quote lua-if) cnd then-body elseifs else-body)))))))) + (define + parse-while + (fn + () + (begin + (consume! "keyword" "while") + (let + ((cnd (parse-expr))) + (begin + (consume! "keyword" "do") + (let + ((body (parse-block))) + (begin + (consume! "keyword" "end") + (list (quote lua-while) cnd body)))))))) + (define + parse-repeat + (fn + () + (begin + (consume! "keyword" "repeat") + (let + ((body (parse-block))) + (begin + (consume! "keyword" "until") + (let + ((cnd (parse-expr))) + (list (quote lua-repeat) body cnd))))))) + (define + parse-do + (fn + () + (begin + (consume! "keyword" "do") + (let + ((body (parse-block))) + (begin (consume! "keyword" "end") (list (quote lua-do) body)))))) + (define + parse-for + (fn + () + (begin + (consume! "keyword" "for") + (let + ((t (peek-tok))) + (begin + (when + (not (= (lua-tok-type t) "ident")) + (error "lua-parse: expected name in for")) + (let + ((name (lua-tok-value t))) + (begin + (advance-tok!) + (when + (not (at-op? "=")) + (error "lua-parse: only numeric for supported")) + (consume! "op" "=") + (let + ((start (parse-expr))) + (begin + (consume! "op" ",") + (let + ((stop (parse-expr)) (step nil)) + (begin + (when + (at-op? ",") + (begin + (advance-tok!) + (set! step (parse-expr)))) + (consume! "keyword" "do") + (let + ((body (parse-block))) + (begin + (consume! "keyword" "end") + (list + (quote lua-for-num) + name + start + stop + step + body)))))))))))))) + (define + parse-funcname + (fn + () + (let + ((t (peek-tok))) + (begin + (when + (not (= (lua-tok-type t) "ident")) + (error "lua-parse: expected name after function")) + (let + ((base (list (quote lua-name) (lua-tok-value t)))) + (begin + (advance-tok!) + (define + dots + (fn + () + (when + (at-op? ".") + (begin + (advance-tok!) + (let + ((tt (peek-tok))) + (begin + (when + (not (= (lua-tok-type tt) "ident")) + (error "lua-parse: expected name after .")) + (set! + base + (list + (quote lua-field) + base + (lua-tok-value tt))) + (advance-tok!) + (dots))))))) + (dots) + (let + ((is-method false) (method-name nil)) + (begin + (when + (at-op? ":") + (begin + (advance-tok!) + (let + ((tt (peek-tok))) + (begin + (when + (not (= (lua-tok-type tt) "ident")) + (error "lua-parse: expected name after :")) + (set! is-method true) + (set! method-name (lua-tok-value tt)) + (advance-tok!))))) + (list base is-method method-name))))))))) + (define + parse-function-decl + (fn + () + (begin + (consume! "keyword" "function") + (let + ((parts (parse-funcname))) + (let + ((base (nth parts 0)) + (is-method (nth parts 1)) + (m-name (nth parts 2))) + (let + ((func (parse-funcbody))) + (if + is-method + (let + ((target (list (quote lua-field) base m-name)) + (new-params (cons "self" (nth func 1)))) + (list + (quote lua-function-decl) + target + (list + (quote lua-function) + new-params + (nth func 2) + (nth func 3)))) + (list (quote lua-function-decl) base func)))))))) + (define + parse-local + (fn + () + (begin + (consume! "keyword" "local") + (cond + ((at-keyword? "function") + (begin + (advance-tok!) + (let + ((t (peek-tok))) + (begin + (when + (not (= (lua-tok-type t) "ident")) + (error "lua-parse: expected name after local fn")) + (let + ((name (lua-tok-value t))) + (begin + (advance-tok!) + (let + ((func (parse-funcbody))) + (list (quote lua-local-function) name func)))))))) + (else + (let + ((names (parse-namelist)) (exps (list))) + (begin + (when + (at-op? "=") + (begin (advance-tok!) (set! exps (parse-explist)))) + (list (quote lua-local) names exps)))))))) + (define + parse-return + (fn + () + (begin + (consume! "keyword" "return") + (let + ((exps (list))) + (begin + (when + (not + (or + (at-keyword? "end") + (at-keyword? "else") + (at-keyword? "elseif") + (at-keyword? "until") + (check-tok? "eof" nil) + (at-op? ";"))) + (set! exps (parse-explist))) + (list (quote lua-return) exps)))))) + (define + parse-assign-or-call + (fn + () + (let + ((pexpr (parse-prefixexp))) + (cond + ((or (at-op? "=") (at-op? ",")) + (let + ((lhs (list pexpr))) + (begin + (define + more + (fn + () + (when + (at-op? ",") + (begin + (advance-tok!) + (append! lhs (parse-prefixexp)) + (more))))) + (more) + (consume! "op" "=") + (let + ((rhs (parse-explist))) + (list (quote lua-assign) lhs rhs))))) + ((or (= (first pexpr) (quote lua-call)) (= (first pexpr) (quote lua-method-call))) + (list (quote lua-call-stmt) pexpr)) + (else (error "lua-parse: expected '=' or call")))))) + (define + parse-stmt + (fn + () + (cond + ((at-op? ";") (begin (advance-tok!) nil)) + ((at-keyword? "if") (parse-if)) + ((at-keyword? "while") (parse-while)) + ((at-keyword? "repeat") (parse-repeat)) + ((at-keyword? "do") (parse-do)) + ((at-keyword? "for") (parse-for)) + ((at-keyword? "function") (parse-function-decl)) + ((at-keyword? "local") (parse-local)) + ((at-keyword? "return") (parse-return)) + ((at-keyword? "break") + (begin (advance-tok!) (list (quote lua-break)))) + (else (parse-assign-or-call))))) + (define + block-end? + (fn + () + (or + (at-keyword? "end") + (at-keyword? "else") + (at-keyword? "elseif") + (at-keyword? "until") + (check-tok? "eof" nil)))) + (set! + parse-block + (fn + () + (let + ((stmts (list))) + (begin + (define + loop + (fn + () + (when + (not (block-end?)) + (let + ((s (parse-stmt))) + (begin + (when (not (= s nil)) (append! stmts s)) + (cond + ((and (not (= s nil)) (= (first s) (quote lua-return))) + nil) + (else (loop)))))))) + (loop) + (cons (quote lua-block) stmts))))) + (parse-block)))) + + +(define + lua-parse-expr + (fn + (src) + (let + ((tree (lua-parse (str "return " src)))) + (let ((ret (nth tree 1))) (first (nth ret 1)))))) diff --git a/lib/lua/runtime.sx b/lib/lua/runtime.sx new file mode 100644 index 00000000..71b37373 --- /dev/null +++ b/lib/lua/runtime.sx @@ -0,0 +1,171 @@ +(define lua-truthy? (fn (v) (and (not (= v nil)) (not (= v false))))) + +(define + lua-to-number + (fn + (v) + (cond + ((= (type-of v) "number") v) + ((= (type-of v) "string") + (let ((n (parse-number v))) (if (= n nil) nil n))) + (else nil)))) + +(define + lua-to-string + (fn + (v) + (cond + ((= v nil) "nil") + ((= v true) "true") + ((= v false) "false") + ((= (type-of v) "number") (str v)) + ((= (type-of v) "string") v) + (else (str v))))) + +(define + lua-num-op + (fn + (op a b) + (let + ((na (lua-to-number a)) (nb (lua-to-number b))) + (begin + (when + (or (= na nil) (= nb nil)) + (error (str "lua: arith on non-number: " a " " op " " b))) + (cond + ((= op "+") (+ na nb)) + ((= op "-") (- na nb)) + ((= op "*") (* na nb)) + ((= op "/") (/ na nb)) + ((= op "%") (- na (* nb (floor (/ na nb))))) + ((= op "^") (pow na nb)) + (else (error (str "lua: unknown arith op " op)))))))) + +(define lua-add (fn (a b) (lua-num-op "+" a b))) + +(define lua-sub (fn (a b) (lua-num-op "-" a b))) + +(define lua-mul (fn (a b) (lua-num-op "*" a b))) + +(define lua-div (fn (a b) (lua-num-op "/" a b))) + +(define lua-mod (fn (a b) (lua-num-op "%" a b))) + +(define lua-pow (fn (a b) (lua-num-op "^" a b))) + +(define + lua-neg + (fn + (a) + (let + ((na (lua-to-number a))) + (begin + (when (= na nil) (error (str "lua: neg on non-number: " a))) + (- 0 na))))) + +(define + lua-concat-coerce + (fn + (v) + (cond + ((= (type-of v) "string") v) + ((= (type-of v) "number") (str v)) + (else (error (str "lua: cannot concat " v)))))) + +(define + lua-concat + (fn + (a b) + (let + ((sa (lua-concat-coerce a)) (sb (lua-concat-coerce b))) + (str sa sb)))) + +(define + lua-eq + (fn + (a b) + (cond + ((and (= a nil) (= b nil)) true) + ((or (= a nil) (= b nil)) false) + ((and (= (type-of a) (type-of b)) (= a b)) true) + (else false)))) + +(define lua-neq (fn (a b) (not (lua-eq a b)))) + +(define + lua-lt + (fn + (a b) + (cond + ((and (= (type-of a) "number") (= (type-of b) "number")) (< a b)) + ((and (= (type-of a) "string") (= (type-of b) "string")) (< a b)) + (else (error "lua: attempt to compare incompatible types"))))) + +(define lua-le (fn (a b) (or (lua-lt a b) (lua-eq a b)))) + +(define lua-gt (fn (a b) (lua-lt b a))) + +(define lua-ge (fn (a b) (lua-le b a))) + +(define + lua-len + (fn + (a) + (cond + ((= (type-of a) "string") (len a)) + ((= (type-of a) "list") (len a)) + ((= (type-of a) "dict") + (let + ((n 0)) + (begin + (define + count-loop + (fn + (i) + (if + (has? a (str i)) + (begin (set! n i) (count-loop (+ i 1))) + n))) + (count-loop 1)))) + (else (error (str "lua: len on non-len type: " (type-of a))))))) + +(define + lua-for-continue? + (fn (i stop step) (if (> step 0) (<= i stop) (>= i stop)))) + +(define + lua-make-table + (fn + (&rest fields) + (let + ((t {}) (array-idx 1)) + (begin + (define + process + (fn + (fs) + (when + (> (len fs) 0) + (begin + (let + ((f (first fs))) + (cond + ((= (first f) "pos") + (begin + (set! t (assoc t (str array-idx) (nth f 1))) + (set! array-idx (+ array-idx 1)))) + ((= (first f) "kv") + (let + ((k (nth f 1)) (v (nth f 2))) + (set! t (assoc t (str k) v)))))) + (process (rest fs)))))) + (process fields) + t)))) + +(define + lua-get + (fn + (t k) + (if (= t nil) nil (let ((v (get t (str k)))) (if (= v nil) nil v))))) + +(define lua-set! (fn (t k v) (assoc t (str k) v))) diff --git a/lib/lua/test.sh b/lib/lua/test.sh new file mode 100755 index 00000000..96a2e495 --- /dev/null +++ b/lib/lua/test.sh @@ -0,0 +1,645 @@ +#!/usr/bin/env bash +# Fast Lua-on-SX test runner — epoch protocol direct to sx_server.exe. +# Mirrors lib/js/test.sh. +# +# Usage: +# bash lib/lua/test.sh # run all tests +# bash lib/lua/test.sh -v # verbose + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + # fallback to main repo binary when running from a worktree without _build + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found. Run: cd hosts/ocaml && dune build" + exit 1 +fi + +VERBOSE="${1:-}" +PASS=0 +FAIL=0 +ERRORS="" +TMPFILE=$(mktemp) +trap "rm -f $TMPFILE" EXIT + +cat > "$TMPFILE" << 'EPOCHS' +(epoch 1) +(load "lib/lua/tokenizer.sx") +(epoch 2) +(load "lib/lua/parser.sx") +(epoch 3) +(load "lib/lua/runtime.sx") +(epoch 4) +(load "lib/lua/transpile.sx") + +;; ── Phase 1: tokenizer ────────────────────────────────────────── +(epoch 100) +(eval "(len (lua-tokenize \"\"))") +(epoch 101) +(eval "(get (nth (lua-tokenize \"\") 0) :type)") + +;; Numbers +(epoch 110) +(eval "(get (nth (lua-tokenize \"42\") 0) :type)") +(epoch 111) +(eval "(get (nth (lua-tokenize \"42\") 0) :value)") +(epoch 112) +(eval "(get (nth (lua-tokenize \"3.14\") 0) :value)") +(epoch 113) +(eval "(get (nth (lua-tokenize \"0xff\") 0) :value)") +(epoch 114) +(eval "(get (nth (lua-tokenize \"1e3\") 0) :value)") +(epoch 115) +(eval "(get (nth (lua-tokenize \"1.5e-2\") 0) :value)") +(epoch 116) +(eval "(get (nth (lua-tokenize \".5\") 0) :value)") + +;; Identifiers and keywords +(epoch 120) +(eval "(get (nth (lua-tokenize \"foo\") 0) :type)") +(epoch 121) +(eval "(get (nth (lua-tokenize \"foo\") 0) :value)") +(epoch 122) +(eval "(get (nth (lua-tokenize \"_bar1\") 0) :value)") +(epoch 123) +(eval "(get (nth (lua-tokenize \"local\") 0) :type)") +(epoch 124) +(eval "(get (nth (lua-tokenize \"function\") 0) :value)") +(epoch 125) +(eval "(get (nth (lua-tokenize \"nil\") 0) :type)") +(epoch 126) +(eval "(get (nth (lua-tokenize \"true\") 0) :value)") +(epoch 127) +(eval "(get (nth (lua-tokenize \"false\") 0) :type)") + +;; Short strings +(epoch 130) +(eval "(get (nth (lua-tokenize \"\\\"hi\\\"\") 0) :type)") +(epoch 131) +(eval "(get (nth (lua-tokenize \"\\\"hi\\\"\") 0) :value)") +(epoch 132) +(eval "(get (nth (lua-tokenize \"'ab'\") 0) :value)") +(epoch 133) +(eval "(get (nth (lua-tokenize \"\\\"a\\\\nb\\\"\") 0) :value)") + +;; Long strings +(epoch 140) +(eval "(get (nth (lua-tokenize \"[[hello]]\") 0) :type)") +(epoch 141) +(eval "(get (nth (lua-tokenize \"[[hello]]\") 0) :value)") +(epoch 142) +(eval "(get (nth (lua-tokenize \"[==[level 2]==]\") 0) :value)") + +;; Operators (multi-char) +(epoch 150) +(eval "(get (nth (lua-tokenize \"==\") 0) :value)") +(epoch 151) +(eval "(get (nth (lua-tokenize \"~=\") 0) :value)") +(epoch 152) +(eval "(get (nth (lua-tokenize \"<=\") 0) :value)") +(epoch 153) +(eval "(get (nth (lua-tokenize \">=\") 0) :value)") +(epoch 154) +(eval "(get (nth (lua-tokenize \"..\") 0) :value)") +(epoch 155) +(eval "(get (nth (lua-tokenize \"...\") 0) :value)") +(epoch 156) +(eval "(get (nth (lua-tokenize \"::\") 0) :value)") + +;; Single-char operators / punctuation +(epoch 160) +(eval "(get (nth (lua-tokenize \"+\") 0) :value)") +(epoch 161) +(eval "(get (nth (lua-tokenize \"-\") 0) :value)") +(epoch 162) +(eval "(get (nth (lua-tokenize \"*\") 0) :value)") +(epoch 163) +(eval "(get (nth (lua-tokenize \"/\") 0) :value)") +(epoch 164) +(eval "(get (nth (lua-tokenize \"%\") 0) :value)") +(epoch 165) +(eval "(get (nth (lua-tokenize \"^\") 0) :value)") +(epoch 166) +(eval "(get (nth (lua-tokenize \"#\") 0) :value)") +(epoch 167) +(eval "(get (nth (lua-tokenize \"(\") 0) :value)") +(epoch 168) +(eval "(get (nth (lua-tokenize \"{\") 0) :value)") +(epoch 169) +(eval "(get (nth (lua-tokenize \";\") 0) :value)") + +;; Comments are stripped +(epoch 170) +(eval "(len (lua-tokenize \"-- comment\\n\"))") +(epoch 171) +(eval "(len (lua-tokenize \"-- comment\\n1\"))") +(epoch 172) +(eval "(get (nth (lua-tokenize \"-- c\\n42\") 0) :value)") +(epoch 173) +(eval "(len (lua-tokenize \"--[[ block ]] 1\"))") +(epoch 174) +(eval "(get (nth (lua-tokenize \"--[[ c ]] 42\") 0) :value)") +(epoch 175) +(eval "(get (nth (lua-tokenize \"--[==[ x ]==] 7\") 0) :value)") + +;; Compound expressions +(epoch 180) +(eval "(len (lua-tokenize \"local x = 1\"))") +(epoch 181) +(eval "(get (nth (lua-tokenize \"local x = 1\") 0) :type)") +(epoch 182) +(eval "(get (nth (lua-tokenize \"local x = 1\") 0) :value)") +(epoch 183) +(eval "(get (nth (lua-tokenize \"local x = 1\") 1) :type)") +(epoch 184) +(eval "(get (nth (lua-tokenize \"local x = 1\") 2) :value)") +(epoch 185) +(eval "(get (nth (lua-tokenize \"local x = 1\") 3) :value)") + +(epoch 190) +(eval "(len (lua-tokenize \"a.b:c()\"))") +(epoch 191) +(eval "(get (nth (lua-tokenize \"a.b:c()\") 1) :value)") +(epoch 192) +(eval "(get (nth (lua-tokenize \"a.b:c()\") 3) :value)") + +;; ── Phase 1.parse: parser ──────────────────────────────────── +;; Literals +(epoch 200) +(eval "(lua-parse-expr \"42\")") +(epoch 201) +(eval "(lua-parse-expr \"3.14\")") +(epoch 202) +(eval "(lua-parse-expr \"\\\"hi\\\"\")") +(epoch 203) +(eval "(lua-parse-expr \"true\")") +(epoch 204) +(eval "(lua-parse-expr \"false\")") +(epoch 205) +(eval "(lua-parse-expr \"nil\")") +(epoch 206) +(eval "(lua-parse-expr \"foo\")") +(epoch 207) +(eval "(lua-parse-expr \"...\")") + +;; Binops with precedence +(epoch 210) +(eval "(lua-parse-expr \"1+2\")") +(epoch 211) +(eval "(lua-parse-expr \"a+b*c\")") +(epoch 212) +(eval "(lua-parse-expr \"a*b+c\")") +(epoch 213) +(eval "(lua-parse-expr \"a and b or c\")") +(epoch 214) +(eval "(lua-parse-expr \"a==b\")") +(epoch 215) +(eval "(lua-parse-expr \"a..b..c\")") +(epoch 216) +(eval "(lua-parse-expr \"a^b^c\")") +(epoch 217) +(eval "(lua-parse-expr \"(a+b)*c\")") + +;; Unary +(epoch 220) +(eval "(lua-parse-expr \"-x\")") +(epoch 221) +(eval "(lua-parse-expr \"not x\")") +(epoch 222) +(eval "(lua-parse-expr \"#a\")") + +;; Member/index/call +(epoch 230) +(eval "(lua-parse-expr \"a.b\")") +(epoch 231) +(eval "(lua-parse-expr \"a.b.c\")") +(epoch 232) +(eval "(lua-parse-expr \"a[0]\")") +(epoch 233) +(eval "(lua-parse-expr \"f()\")") +(epoch 234) +(eval "(lua-parse-expr \"f(1,2)\")") +(epoch 235) +(eval "(lua-parse-expr \"a:b()\")") + +;; Table constructors +(epoch 240) +(eval "(lua-parse-expr \"{1,2,3}\")") +(epoch 241) +(eval "(lua-parse-expr \"{x=1,y=2}\")") +(epoch 242) +(eval "(lua-parse-expr \"{[1+1]=\\\"a\\\"}\")") +(epoch 243) +(eval "(lua-parse-expr \"{}\")") + +;; Anonymous function +(epoch 250) +(eval "(lua-parse-expr \"function() return 1 end\")") +(epoch 251) +(eval "(lua-parse-expr \"function(a,b) return a+b end\")") +(epoch 252) +(eval "(lua-parse-expr \"function(...) return 1 end\")") + +;; Statements +(epoch 260) +(eval "(lua-parse \"local x = 1\")") +(epoch 261) +(eval "(lua-parse \"local a, b = 1, 2\")") +(epoch 262) +(eval "(lua-parse \"x = 1\")") +(epoch 263) +(eval "(lua-parse \"a, b = 1, 2\")") +(epoch 264) +(eval "(lua-parse \"if x then y = 1 end\")") +(epoch 265) +(eval "(lua-parse \"if x then y = 1 else y = 2 end\")") +(epoch 266) +(eval "(lua-parse \"if x then y = 1 elseif z then y = 2 else y = 3 end\")") +(epoch 267) +(eval "(lua-parse \"while x < 10 do x = x + 1 end\")") +(epoch 268) +(eval "(lua-parse \"repeat x = x + 1 until x > 10\")") +(epoch 269) +(eval "(lua-parse \"for i = 1, 10 do x = i end\")") +(epoch 270) +(eval "(lua-parse \"for i = 1, 10, 2 do x = i end\")") +(epoch 271) +(eval "(lua-parse \"do local x = 1 end\")") +(epoch 272) +(eval "(lua-parse \"break\")") +(epoch 273) +(eval "(lua-parse \"return 42\")") +(epoch 274) +(eval "(lua-parse \"return 1, 2\")") +(epoch 275) +(eval "(lua-parse \"return\")") + +;; Function declarations +(epoch 280) +(eval "(lua-parse \"function f() return 1 end\")") +(epoch 281) +(eval "(lua-parse \"local function f(x) return x * 2 end\")") +(epoch 282) +(eval "(lua-parse \"function t.m(x) return x end\")") +(epoch 283) +(eval "(lua-parse \"function t:m(x) return self end\")") + +;; Calls as statements +(epoch 290) +(eval "(lua-parse \"print(42)\")") +(epoch 291) +(eval "(lua-parse \"a:b()\")") +(epoch 292) +(eval "(lua-parse \"t.f()\")") + +;; Multi-statement chunks +(epoch 300) +(eval "(len (lua-parse \"local x = 1 x = x + 1 return x\"))") + +;; ── Phase 2: transpile + eval ───────────────────────────────── +;; Literals via return +(epoch 400) +(eval "(lua-eval-ast \"return 1\")") +(epoch 401) +(eval "(lua-eval-ast \"return true\")") +(epoch 402) +(eval "(lua-eval-ast \"return false\")") +(epoch 403) +(eval "(lua-eval-ast \"return nil\")") +(epoch 404) +(eval "(lua-eval-ast \"return \\\"hi\\\"\")") + +;; Arithmetic +(epoch 410) +(eval "(lua-eval-ast \"return 1 + 2\")") +(epoch 411) +(eval "(lua-eval-ast \"return 10 - 3\")") +(epoch 412) +(eval "(lua-eval-ast \"return 4 * 5\")") +(epoch 413) +(eval "(lua-eval-ast \"return 10 / 4\")") +(epoch 414) +(eval "(lua-eval-ast \"return 10 % 3\")") +(epoch 415) +(eval "(lua-eval-ast \"return 2 ^ 10\")") +(epoch 416) +(eval "(lua-eval-ast \"return (1 + 2) * 3\")") +(epoch 417) +(eval "(lua-eval-ast \"return 1 + 2 * 3\")") +(epoch 418) +(eval "(lua-eval-ast \"return -5 + 10\")") + +;; String +(epoch 420) +(eval "(lua-eval-ast \"return \\\"a\\\" .. \\\"b\\\"\")") +(epoch 421) +(eval "(lua-eval-ast \"return \\\"count: \\\" .. 42\")") + +;; Comparison +(epoch 430) +(eval "(lua-eval-ast \"return 1 < 2\")") +(epoch 431) +(eval "(lua-eval-ast \"return 3 > 2\")") +(epoch 432) +(eval "(lua-eval-ast \"return 2 == 2\")") +(epoch 433) +(eval "(lua-eval-ast \"return 1 ~= 2\")") +(epoch 434) +(eval "(lua-eval-ast \"return 1 <= 1\")") +(epoch 435) +(eval "(lua-eval-ast \"return 3 >= 2\")") + +;; Logical (short-circuit, return value) +(epoch 440) +(eval "(lua-eval-ast \"return true and 42\")") +(epoch 441) +(eval "(lua-eval-ast \"return false or 99\")") +(epoch 442) +(eval "(lua-eval-ast \"return nil or 7\")") +(epoch 443) +(eval "(lua-eval-ast \"return 1 and 2\")") +(epoch 444) +(eval "(lua-eval-ast \"return false and 999\")") +(epoch 445) +(eval "(lua-eval-ast \"return not true\")") +(epoch 446) +(eval "(lua-eval-ast \"return not nil\")") +(epoch 447) +(eval "(lua-eval-ast \"return not 0\")") + +;; Truthy +(epoch 450) +(eval "(lua-truthy? 0)") +(epoch 451) +(eval "(lua-truthy? nil)") +(epoch 452) +(eval "(lua-truthy? false)") +(epoch 453) +(eval "(lua-truthy? \"\")") + +;; Control flow +(epoch 460) +(eval "(lua-eval-ast \"if true then return 1 else return 2 end\")") +(epoch 461) +(eval "(lua-eval-ast \"if 1 > 2 then return 100 else return 200 end\")") +(epoch 462) +(eval "(lua-eval-ast \"local x = 1 if x > 0 then x = x * 10 elseif x < 0 then x = 999 else x = 42 end return x\")") + +;; Local and assignment +(epoch 470) +(eval "(lua-eval-ast \"local x = 5 return x * 2\")") +(epoch 471) +(eval "(lua-eval-ast \"local x = 0 x = x + 1 x = x + 1 return x\")") +(epoch 472) +(eval "(lua-eval-ast \"local a, b = 1, 2 return a + b\")") + +;; Loops +(epoch 480) +(eval "(lua-eval-ast \"local sum = 0 for i = 1, 5 do sum = sum + i end return sum\")") +(epoch 481) +(eval "(lua-eval-ast \"local n = 0 for i = 10, 1, -1 do n = n + 1 end return n\")") +(epoch 482) +(eval "(lua-eval-ast \"local i = 0 while i < 5 do i = i + 1 end return i\")") +(epoch 483) +(eval "(lua-eval-ast \"local i = 0 repeat i = i + 1 until i >= 3 return i\")") +(epoch 484) +(eval "(lua-eval-ast \"local s = 0 for i = 1, 100 do s = s + i end return s\")") + +EPOCHS + +OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) + +check() { + local epoch="$1" desc="$2" expected="$3" + local actual + actual=$(echo "$OUTPUT" | grep -A1 "^(ok-len $epoch " | tail -1) + if [ -z "$actual" ]; then + actual=$(echo "$OUTPUT" | grep "^(ok $epoch " || true) + fi + if [ -z "$actual" ]; then + actual=$(echo "$OUTPUT" | grep "^(error $epoch " || true) + fi + if [ -z "$actual" ]; then + actual="" + fi + + if echo "$actual" | grep -qF -- "$expected"; then + PASS=$((PASS + 1)) + [ "$VERBOSE" = "-v" ] && echo " ok $desc" + else + FAIL=$((FAIL + 1)) + ERRORS+=" FAIL $desc (epoch $epoch) + expected: $expected + actual: $actual +" + fi +} + +# ── Empty / EOF ──────────────────────────────────────────────── +check 100 "empty tokens length" '1' +check 101 "empty first token is eof" '"eof"' + +# ── Numbers ──────────────────────────────────────────────────── +check 110 "int token type" '"number"' +check 111 "int value" '42' +check 112 "float value" '3.14' +check 113 "hex value" '255' +check 114 "exponent" '1000' +check 115 "neg exponent" '0.015' +check 116 "leading-dot" '0.5' + +# ── Identifiers / keywords ───────────────────────────────────── +check 120 "ident type" '"ident"' +check 121 "ident value" '"foo"' +check 122 "underscore ident" '"_bar1"' +check 123 "local is keyword" '"keyword"' +check 124 "function keyword" '"function"' +check 125 "nil is keyword" '"keyword"' +check 126 "true value" '"true"' +check 127 "false type" '"keyword"' + +# ── Strings ──────────────────────────────────────────────────── +check 130 "string type" '"string"' +check 131 "string value" '"hi"' +check 132 "single-quote string" '"ab"' +check 133 "escape sequence" '"a' +check 140 "long string type" '"string"' +check 141 "long string value" '"hello"' +check 142 "level-2 long string" '"level 2"' + +# ── Operators ────────────────────────────────────────────────── +check 150 "==" '"=="' +check 151 "~=" '"~="' +check 152 "<=" '"<="' +check 153 ">=" '">="' +check 154 ".." '".."' +check 155 "..." '"..."' +check 156 "::" '"::"' + +check 160 "+" '"+"' +check 161 "-" '"-"' +check 162 "*" '"*"' +check 163 "/" '"/"' +check 164 "%" '"%"' +check 165 "^" '"^"' +check 166 "#" '"#"' +check 167 "(" '"("' +check 168 "{" '"{"' +check 169 ";" '";"' + +# ── Comments ─────────────────────────────────────────────────── +check 170 "line comment only → eof" '1' +check 171 "line comment + num" '2' +check 172 "num after line comment" '42' +check 173 "block comment → eof" '2' +check 174 "num after block comment" '42' +check 175 "num after level-2 block comment" '7' + +# ── Compound ─────────────────────────────────────────────────── +check 180 "local x = 1 token count" '5' +check 181 "local is keyword" '"keyword"' +check 182 "local value" '"local"' +check 183 "x is ident" '"ident"' +check 184 "= value" '"="' +check 185 "1 value" '1' + +check 190 "a.b:c() token count" '8' +check 191 "dot after ident" '"."' +check 192 "colon after ident" '":"' + +# ── Phase 1.parse: parser ──────────────────────────────────── +check 200 "parse int" '(lua-num 42)' +check 201 "parse float" '(lua-num 3.14)' +check 202 "parse string" '(lua-str "hi")' +check 203 "parse true" '(lua-true)' +check 204 "parse false" '(lua-false)' +check 205 "parse nil" '(lua-nil)' +check 206 "parse ident" '(lua-name "foo")' +check 207 "parse vararg" '(lua-vararg)' + +check 210 "parse 1+2" '(lua-binop "+" (lua-num 1) (lua-num 2))' +check 211 "parse a+b*c prec" '(lua-binop "+" (lua-name "a") (lua-binop "*"' +check 212 "parse a*b+c prec" '(lua-binop "+" (lua-binop "*"' +check 213 "parse and/or prec" '(lua-binop "or" (lua-binop "and"' +check 214 "parse ==" '(lua-binop "==" (lua-name "a") (lua-name "b"))' +check 215 "parse .. right-assoc" '(lua-binop ".." (lua-name "a") (lua-binop ".."' +check 216 "parse ^ right-assoc" '(lua-binop "^" (lua-name "a") (lua-binop "^"' +check 217 "parse paren override" '(lua-binop "*" (lua-binop "+"' + +check 220 "parse -x" '(lua-unop "-" (lua-name "x"))' +check 221 "parse not x" '(lua-unop "not" (lua-name "x"))' +check 222 "parse #a" '(lua-unop "#" (lua-name "a"))' + +check 230 "parse a.b" '(lua-field (lua-name "a") "b")' +check 231 "parse a.b.c" '(lua-field (lua-field (lua-name "a") "b") "c")' +check 232 "parse a[0]" '(lua-index (lua-name "a") (lua-num 0))' +check 233 "parse f()" '(lua-call (lua-name "f") ())' +check 234 "parse f(1,2)" '(lua-call (lua-name "f") ((lua-num 1) (lua-num 2)))' +check 235 "parse a:b()" '(lua-method-call (lua-name "a") "b" ())' + +check 240 "parse {1,2,3}" '(lua-table (lua-pos (lua-num 1)) (lua-pos (lua-num 2))' +check 241 "parse {x=1,y=2}" '(lua-table (lua-kv (lua-str "x") (lua-num 1))' +check 242 "parse {[k]=v}" '(lua-table (lua-kv (lua-binop "+"' +check 243 "parse empty table" '(lua-table)' + +check 250 "parse function() 1 end" '(lua-function () false' +check 251 "parse function(a,b)" '(lua-function ("a" "b") false' +check 252 "parse function(...)" '(lua-function () true' + +check 260 "parse local x = 1" '(lua-block (lua-local ("x") ((lua-num 1))))' +check 261 "parse local a,b = 1,2" '(lua-block (lua-local ("a" "b") ((lua-num 1) (lua-num 2))))' +check 262 "parse x = 1" '(lua-assign ((lua-name "x")) ((lua-num 1)))' +check 263 "parse a,b = 1,2" '(lua-assign ((lua-name "a") (lua-name "b"))' +check 264 "parse if then end" '(lua-if (lua-name "x")' +check 265 "parse if-else" '(lua-if (lua-name "x") (lua-block (lua-assign ((lua-name "y")) ((lua-num 1)))) () (lua-block' +check 266 "parse if-elseif-else" '(((lua-name "z") (lua-block (lua-assign ((lua-name "y")) ((lua-num 2))))))' +check 267 "parse while" '(lua-while (lua-binop "<"' +check 268 "parse repeat" '(lua-repeat' +check 269 "parse for num" '(lua-for-num "i" (lua-num 1) (lua-num 10) nil' +check 270 "parse for num step" '(lua-for-num "i" (lua-num 1) (lua-num 10) (lua-num 2)' +check 271 "parse do block" '(lua-do (lua-block (lua-local ("x") ((lua-num 1))))' +check 272 "parse break" '(lua-break)' +check 273 "parse return" '(lua-return ((lua-num 42)))' +check 274 "parse return 1,2" '(lua-return ((lua-num 1) (lua-num 2)))' +check 275 "parse bare return" '(lua-return ())' + +check 280 "parse function decl" '(lua-function-decl (lua-name "f")' +check 281 "parse local function" '(lua-local-function "f" (lua-function ("x") false' +check 282 "parse function t.m" '(lua-function-decl (lua-field (lua-name "t") "m")' +check 283 "parse method t:m" 'self' + +check 290 "parse call stmt" '(lua-call-stmt (lua-call (lua-name "print")' +check 291 "parse method call stmt" '(lua-call-stmt (lua-method-call' +check 292 "parse chained call stmt" '(lua-call-stmt (lua-call (lua-field' + +check 300 "parse multi-statement" '4' + +# ── Phase 2: transpile + eval ──────────────────────────────── +check 400 "eval return 1" '1' +check 401 "eval return true" 'true' +check 402 "eval return false" 'false' +check 403 "eval return nil" 'nil' +check 404 "eval return string" '"hi"' + +check 410 "eval 1+2" '3' +check 411 "eval 10-3" '7' +check 412 "eval 4*5" '20' +check 413 "eval 10/4" '2.5' +check 414 "eval 10%3" '1' +check 415 "eval 2^10" '1024' +check 416 "eval (1+2)*3" '9' +check 417 "eval 1+2*3 prec" '7' +check 418 "eval -5+10" '5' + +check 420 "eval \"a\"..\"b\"" '"ab"' +check 421 "eval str..num" '"count: 42"' + +check 430 "eval 1<2" 'true' +check 431 "eval 3>2" 'true' +check 432 "eval 2==2" 'true' +check 433 "eval 1~=2" 'true' +check 434 "eval 1<=1" 'true' +check 435 "eval 3>=2" 'true' + +check 440 "eval true and 42" '42' +check 441 "eval false or 99" '99' +check 442 "eval nil or 7" '7' +check 443 "eval 1 and 2" '2' +check 444 "eval false and 999" 'false' +check 445 "eval not true" 'false' +check 446 "eval not nil" 'true' +check 447 "eval not 0" 'false' + +check 450 "truthy 0 (Lua truthy!)" 'true' +check 451 "truthy nil" 'false' +check 452 "truthy false" 'false' +check 453 "truthy empty string" 'true' + +check 460 "if true then 1 else 2" '1' +check 461 "if 1>2 then 100 else 200" '200' +check 462 "if-elseif-else branching" '10' + +check 470 "local x=5; x*2" '10' +check 471 "mutate x" '2' +check 472 "local a,b = 1,2; a+b" '3' + +check 480 "for 1..5 sum" '15' +check 481 "for 10..1 step -1 count" '10' +check 482 "while i<5 count" '5' +check 483 "repeat until i>=3" '3' +check 484 "for 1..100 sum" '5050' + +TOTAL=$((PASS + FAIL)) +if [ $FAIL -eq 0 ]; then + echo "ok $PASS/$TOTAL Lua-on-SX tests passed" +else + echo "FAIL $PASS/$TOTAL passed, $FAIL failed:" + echo "" + echo "$ERRORS" +fi + +[ $FAIL -eq 0 ] diff --git a/lib/lua/tests/eval.sx b/lib/lua/tests/eval.sx new file mode 100644 index 00000000..704821b2 --- /dev/null +++ b/lib/lua/tests/eval.sx @@ -0,0 +1,30 @@ +(define + lua-eval-tests + (list + (list "arith-add" "return 1 + 2" 3) + (list "arith-sub" "return 10 - 3" 7) + (list "arith-mul" "return 4 * 5" 20) + (list "arith-prec" "return 1 + 2 * 3" 7) + (list "arith-paren" "return (1 + 2) * 3" 9) + (list "unary-neg" "return -5 + 10" 5) + (list "lt" "return 1 < 2" true) + (list "eq" "return 2 == 2" true) + (list "neq" "return 1 ~= 2" true) + (list "and-value" "return true and 42" 42) + (list "or-value" "return false or 99" 99) + (list "or-nil" "return nil or 7" 7) + (list "and-short" "return false and 999" false) + (list "not-true" "return not true" false) + (list "not-zero" "return not 0" false) + (list "local" "local x = 5 return x * 2" 10) + (list "local-mutate" "local x=0 x=x+1 x=x+1 return x" 2) + (list "local-multi" "local a,b = 1,2 return a + b" 3) + (list "for-sum" "local sum=0 for i=1,5 do sum=sum+i end return sum" 15) + (list "for-neg-step" "local n=0 for i=10,1,-1 do n=n+1 end return n" 10) + (list "while" "local i=0 while i<5 do i=i+1 end return i" 5) + (list "repeat" "local i=0 repeat i=i+1 until i>=3 return i" 3) + (list "for-big" "local s=0 for i=1,100 do s=s+i end return s" 5050) + (list + "nested-for" + "local s=0 for i=1,3 do for j=1,3 do s=s+1 end end return s" + 9))) diff --git a/lib/lua/tests/parse.sx b/lib/lua/tests/parse.sx new file mode 100644 index 00000000..3f9e0a6b --- /dev/null +++ b/lib/lua/tests/parse.sx @@ -0,0 +1,32 @@ +(define + lua-parse-tests + (list + (list "empty" "" (list (quote lua-block))) + (list + "int literal" + "return 42" + (list + (quote lua-block) + (list (quote lua-return) (list (list (quote lua-num) 42))))) + (list + "local" + "local x = 1" + (list + (quote lua-block) + (list + (quote lua-local) + (list "x") + (list (list (quote lua-num) 1))))) + (list + "binop" + "return 1 + 2" + (list + (quote lua-block) + (list + (quote lua-return) + (list + (list + (quote lua-binop) + "+" + (list (quote lua-num) 1) + (list (quote lua-num) 2)))))))) diff --git a/lib/lua/tokenizer.sx b/lib/lua/tokenizer.sx new file mode 100644 index 00000000..6a09788d --- /dev/null +++ b/lib/lua/tokenizer.sx @@ -0,0 +1,353 @@ +(define lua-make-token (fn (type value pos) {:pos pos :value value :type type})) + +(define lua-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9")))) + +(define + lua-hex-digit? + (fn + (c) + (and + (not (= c nil)) + (or + (lua-digit? c) + (and (>= c "a") (<= c "f")) + (and (>= c "A") (<= c "F")))))) + +(define + lua-letter? + (fn + (c) + (and + (not (= c nil)) + (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))) + +(define lua-ident-start? (fn (c) (or (lua-letter? c) (= c "_")))) + +(define lua-ident-char? (fn (c) (or (lua-ident-start? c) (lua-digit? c)))) + +(define lua-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) + +(define + lua-keywords + (list + "and" + "break" + "do" + "else" + "elseif" + "end" + "false" + "for" + "function" + "goto" + "if" + "in" + "local" + "nil" + "not" + "or" + "repeat" + "return" + "then" + "true" + "until" + "while")) + +(define lua-keyword? (fn (word) (contains? lua-keywords word))) + +(define + lua-tokenize + (fn + (src) + (let + ((tokens (list)) (pos 0) (src-len (len src))) + (define + lua-peek + (fn + (offset) + (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) + (define cur (fn () (lua-peek 0))) + (define advance! (fn (n) (set! pos (+ pos n)))) + (define + push! + (fn + (type value start) + (append! tokens (lua-make-token type value start)))) + (define + match-long-open + (fn + () + (if + (= (cur) "[") + (let + ((p (+ pos 1)) (level 0)) + (begin + (define + count-eq + (fn + () + (when + (and (< p src-len) (= (nth src p) "=")) + (begin + (set! level (+ level 1)) + (set! p (+ p 1)) + (count-eq))))) + (count-eq) + (if (and (< p src-len) (= (nth src p) "[")) level -1))) + -1))) + (define + read-long-body + (fn + (level) + (let + ((start pos) (result nil)) + (begin + (define + scan + (fn + () + (cond + ((>= pos src-len) (set! result (slice src start pos))) + ((= (cur) "]") + (let + ((p (+ pos 1)) (eq-count 0)) + (begin + (define + count-eq + (fn + () + (when + (and (< p src-len) (= (nth src p) "=")) + (begin + (set! eq-count (+ eq-count 1)) + (set! p (+ p 1)) + (count-eq))))) + (count-eq) + (if + (and + (= eq-count level) + (< p src-len) + (= (nth src p) "]")) + (begin + (set! result (slice src start pos)) + (set! pos (+ p 1))) + (begin (advance! 1) (scan)))))) + (else (begin (advance! 1) (scan)))))) + (scan) + result)))) + (define + skip-line-comment! + (fn + () + (when + (and (< pos src-len) (not (= (cur) "\n"))) + (begin (advance! 1) (skip-line-comment!))))) + (define + skip-ws! + (fn + () + (cond + ((>= pos src-len) nil) + ((lua-ws? (cur)) (begin (advance! 1) (skip-ws!))) + ((and (= (cur) "-") (< (+ pos 1) src-len) (= (lua-peek 1) "-")) + (begin + (advance! 2) + (let + ((lvl (match-long-open))) + (cond + ((>= lvl 0) + (begin + (advance! (+ 2 lvl)) + (read-long-body lvl) + (skip-ws!))) + (else (begin (skip-line-comment!) (skip-ws!))))))) + (else nil)))) + (define + read-ident + (fn + (start) + (begin + (when + (and (< pos src-len) (lua-ident-char? (cur))) + (begin (advance! 1) (read-ident start))) + (slice src start pos)))) + (define + read-decimal-digits! + (fn + () + (when + (and (< pos src-len) (lua-digit? (cur))) + (begin (advance! 1) (read-decimal-digits!))))) + (define + read-hex-digits! + (fn + () + (when + (and (< pos src-len) (lua-hex-digit? (cur))) + (begin (advance! 1) (read-hex-digits!))))) + (define + read-exp-part! + (fn + () + (when + (and (< pos src-len) (or (= (cur) "e") (= (cur) "E"))) + (let + ((p1 (lua-peek 1))) + (when + (or + (and (not (= p1 nil)) (lua-digit? p1)) + (and + (or (= p1 "+") (= p1 "-")) + (< (+ pos 2) src-len) + (lua-digit? (lua-peek 2)))) + (begin + (advance! 1) + (when + (and + (< pos src-len) + (or (= (cur) "+") (= (cur) "-"))) + (advance! 1)) + (read-decimal-digits!))))))) + (define + read-number + (fn + (start) + (cond + ((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (lua-peek 1) "x") (= (lua-peek 1) "X"))) + (begin + (advance! 2) + (read-hex-digits!) + (let + ((raw (slice src (+ start 2) pos))) + (parse-number (str "0x" raw))))) + (else + (begin + (read-decimal-digits!) + (when + (and + (< pos src-len) + (= (cur) ".") + (< (+ pos 1) src-len) + (lua-digit? (lua-peek 1))) + (begin (advance! 1) (read-decimal-digits!))) + (read-exp-part!) + (parse-number (slice src start pos))))))) + (define + read-string + (fn + (quote-char) + (let + ((chars (list))) + (begin + (advance! 1) + (define + loop + (fn + () + (cond + ((>= pos src-len) nil) + ((= (cur) "\\") + (begin + (advance! 1) + (when + (< pos src-len) + (let + ((ch (cur))) + (begin + (cond + ((= ch "n") (append! chars "\n")) + ((= ch "t") (append! chars "\t")) + ((= ch "r") (append! chars "\r")) + ((= ch "\\") (append! chars "\\")) + ((= ch "'") (append! chars "'")) + ((= ch "\"") (append! chars "\"")) + (else (append! chars ch))) + (advance! 1)))) + (loop))) + ((= (cur) quote-char) (advance! 1)) + (else + (begin (append! chars (cur)) (advance! 1) (loop)))))) + (loop) + (join "" chars))))) + (define + try-punct + (fn + (start) + (let + ((c (cur)) (c1 (lua-peek 1)) (c2 (lua-peek 2))) + (cond + ((and (= c ".") (= c1 ".") (= c2 ".")) + (begin (advance! 3) (push! "op" "..." start) true)) + ((and (= c ".") (= c1 ".")) + (begin (advance! 2) (push! "op" ".." start) true)) + ((and (= c "=") (= c1 "=")) + (begin (advance! 2) (push! "op" "==" start) true)) + ((and (= c "~") (= c1 "=")) + (begin (advance! 2) (push! "op" "~=" start) true)) + ((and (= c "<") (= c1 "=")) + (begin (advance! 2) (push! "op" "<=" start) true)) + ((and (= c ">") (= c1 "=")) + (begin (advance! 2) (push! "op" ">=" start) true)) + ((and (= c ":") (= c1 ":")) + (begin (advance! 2) (push! "op" "::" start) true)) + ((or (= c "+") (= c "-") (= c "*") (= c "/") (= c "%") (= c "^") (= c "#") (= c "<") (= c ">") (= c "=") (= c "(") (= c ")") (= c "{") (= c "}") (= c "[") (= c "]") (= c ";") (= c ":") (= c ",") (= c ".")) + (begin (advance! 1) (push! "op" c start) true)) + (else false))))) + (define + step + (fn + () + (begin + (skip-ws!) + (when + (< pos src-len) + (let + ((start pos) (c (cur))) + (cond + ((lua-ident-start? c) + (let + ((word (read-ident start))) + (begin + (if + (lua-keyword? word) + (push! "keyword" word start) + (push! "ident" word start)) + (step)))) + ((lua-digit? c) + (let + ((v (read-number start))) + (begin (push! "number" v start) (step)))) + ((and (= c ".") (< (+ pos 1) src-len) (lua-digit? (lua-peek 1))) + (begin + (advance! 1) + (read-decimal-digits!) + (read-exp-part!) + (push! + "number" + (parse-number (slice src start pos)) + start) + (step))) + ((or (= c "\"") (= c "'")) + (let + ((s (read-string c))) + (begin (push! "string" s start) (step)))) + ((= c "[") + (let + ((lvl (match-long-open))) + (cond + ((>= lvl 0) + (begin + (advance! (+ 2 lvl)) + (when (= (cur) "\n") (advance! 1)) + (let + ((s (read-long-body lvl))) + (begin (push! "string" s start) (step))))) + (else + (begin (advance! 1) (push! "op" "[" start) (step)))))) + ((try-punct start) (step)) + (else + (error + (str "lua-tokenize: unexpected char " c " at " pos))))))))) + (step) + (push! "eof" nil pos) + tokens))) diff --git a/lib/lua/transpile.sx b/lib/lua/transpile.sx new file mode 100644 index 00000000..b348bcf0 --- /dev/null +++ b/lib/lua/transpile.sx @@ -0,0 +1,436 @@ +(define + lua-tx + (fn + (node) + (cond + ((= node nil) nil) + ((not (= (type-of node) "list")) node) + (else (lua-tx-dispatch (first node) node))))) + +(define + lua-tx-dispatch + (fn + (tag node) + (cond + ((= tag (quote lua-num)) (nth node 1)) + ((= tag (quote lua-str)) (nth node 1)) + ((= tag (quote lua-nil)) nil) + ((= tag (quote lua-true)) true) + ((= tag (quote lua-false)) false) + ((= tag (quote lua-name)) (make-symbol (nth node 1))) + ((= tag (quote lua-vararg)) + (error "lua-transpile: ... not yet supported")) + ((= tag (quote lua-binop)) (lua-tx-binop node)) + ((= tag (quote lua-unop)) (lua-tx-unop node)) + ((= tag (quote lua-call)) (lua-tx-call node)) + ((= tag (quote lua-method-call)) (lua-tx-method-call node)) + ((= tag (quote lua-field)) (lua-tx-field node)) + ((= tag (quote lua-index)) (lua-tx-index node)) + ((= tag (quote lua-table)) (lua-tx-table node)) + ((= tag (quote lua-function)) (lua-tx-function node)) + ((= tag (quote lua-block)) (lua-tx-block node)) + ((= tag (quote lua-local)) (lua-tx-local node)) + ((= tag (quote lua-assign)) (lua-tx-assign node)) + ((= tag (quote lua-if)) (lua-tx-if node)) + ((= tag (quote lua-while)) (lua-tx-while node)) + ((= tag (quote lua-repeat)) (lua-tx-repeat node)) + ((= tag (quote lua-for-num)) (lua-tx-for-num node)) + ((= tag (quote lua-do)) (lua-tx-do node)) + ((= tag (quote lua-break)) (quote lua-break-marker)) + ((= tag (quote lua-return)) (lua-tx-return node)) + ((= tag (quote lua-call-stmt)) (lua-tx (nth node 1))) + ((= tag (quote lua-local-function)) (lua-tx-local-function node)) + ((= tag (quote lua-function-decl)) (lua-tx-function-decl node)) + (else (error (str "lua-transpile: unknown node " tag)))))) + +(define + lua-tx-binop + (fn + (node) + (let + ((op (nth node 1)) + (a (lua-tx (nth node 2))) + (b (lua-tx (nth node 3)))) + (cond + ((= op "+") (list (make-symbol "lua-add") a b)) + ((= op "-") (list (make-symbol "lua-sub") a b)) + ((= op "*") (list (make-symbol "lua-mul") a b)) + ((= op "/") (list (make-symbol "lua-div") a b)) + ((= op "%") (list (make-symbol "lua-mod") a b)) + ((= op "^") (list (make-symbol "lua-pow") a b)) + ((= op "..") (list (make-symbol "lua-concat") a b)) + ((= op "==") (list (make-symbol "lua-eq") a b)) + ((= op "~=") (list (make-symbol "lua-neq") a b)) + ((= op "<") (list (make-symbol "lua-lt") a b)) + ((= op "<=") (list (make-symbol "lua-le") a b)) + ((= op ">") (list (make-symbol "lua-gt") a b)) + ((= op ">=") (list (make-symbol "lua-ge") a b)) + ((= op "and") + (list + (make-symbol "let") + (list (list (make-symbol "_la") a)) + (list + (make-symbol "if") + (list (make-symbol "lua-truthy?") (make-symbol "_la")) + b + (make-symbol "_la")))) + ((= op "or") + (list + (make-symbol "let") + (list (list (make-symbol "_la") a)) + (list + (make-symbol "if") + (list (make-symbol "lua-truthy?") (make-symbol "_la")) + (make-symbol "_la") + b))) + (else (error (str "lua-transpile: unknown binop " op))))))) + +(define + lua-tx-unop + (fn + (node) + (let + ((op (nth node 1)) (e (lua-tx (nth node 2)))) + (cond + ((= op "-") (list (make-symbol "lua-neg") e)) + ((= op "not") + (list (make-symbol "not") (list (make-symbol "lua-truthy?") e))) + ((= op "#") (list (make-symbol "lua-len") e)) + (else (error (str "lua-transpile: unknown unop " op))))))) + +(define + lua-tx-call + (fn + (node) + (let + ((fn-ast (nth node 1)) (args (nth node 2))) + (cons (lua-tx fn-ast) (map lua-tx args))))) + +(define + lua-tx-method-call + (fn + (node) + (let + ((obj (lua-tx (nth node 1))) + (name (nth node 2)) + (args (nth node 3))) + (cons + (list (make-symbol "lua-get") obj name) + (cons obj (map lua-tx args)))))) + +(define + lua-tx-field + (fn + (node) + (list (make-symbol "lua-get") (lua-tx (nth node 1)) (nth node 2)))) + +(define + lua-tx-index + (fn + (node) + (list + (make-symbol "lua-get") + (lua-tx (nth node 1)) + (lua-tx (nth node 2))))) + +(define + lua-tx-table + (fn + (node) + (let + ((fields (rest node))) + (cons (make-symbol "lua-make-table") (map lua-tx-table-field fields))))) + +(define + lua-tx-table-field + (fn + (f) + (cond + ((= (first f) (quote lua-pos)) + (list (make-symbol "list") "pos" (lua-tx (nth f 1)))) + ((= (first f) (quote lua-kv)) + (list + (make-symbol "list") + "kv" + (lua-tx (nth f 1)) + (lua-tx (nth f 2)))) + (else (error "lua-transpile: unknown table field"))))) + +(define + lua-tx-function + (fn + (node) + (let + ((params (nth node 1)) + (is-vararg (nth node 2)) + (body (nth node 3))) + (let + ((sym-params (map make-symbol params))) + (list (make-symbol "fn") sym-params (lua-tx body)))))) + +(define + lua-tx-block + (fn + (node) + (let + ((stmts (rest node))) + (cond + ((= (len stmts) 0) nil) + ((= (len stmts) 1) (lua-tx (first stmts))) + (else (cons (make-symbol "begin") (map lua-tx stmts))))))) + +(define + lua-tx-local + (fn + (node) + (let + ((names (nth node 1)) (exps (nth node 2))) + (cond + ((= (len names) 1) + (list + (make-symbol "define") + (make-symbol (first names)) + (if (> (len exps) 0) (lua-tx (first exps)) nil))) + (else + (cons (make-symbol "begin") (lua-tx-local-pairs names exps 0))))))) + +(define + lua-tx-local-pairs + (fn + (names exps i) + (if + (>= i (len names)) + (list) + (cons + (list + (make-symbol "define") + (make-symbol (nth names i)) + (if (< i (len exps)) (lua-tx (nth exps i)) nil)) + (lua-tx-local-pairs names exps (+ i 1)))))) + +(define + lua-tx-assign + (fn + (node) + (let + ((lhss (nth node 1)) (rhss (nth node 2))) + (cond + ((= (len lhss) 1) + (lua-tx-single-assign (first lhss) (lua-tx (first rhss)))) + (else + (cons (make-symbol "begin") (lua-tx-assign-pairs lhss rhss 0))))))) + +(define + lua-tx-assign-pairs + (fn + (lhss rhss i) + (if + (>= i (len lhss)) + (list) + (cons + (lua-tx-single-assign + (nth lhss i) + (if (< i (len rhss)) (lua-tx (nth rhss i)) nil)) + (lua-tx-assign-pairs lhss rhss (+ i 1)))))) + +(define + lua-tx-single-assign + (fn + (lhs rhs) + (cond + ((= (first lhs) (quote lua-name)) + (list (make-symbol "set!") (make-symbol (nth lhs 1)) rhs)) + ((= (first lhs) (quote lua-field)) + (list + (make-symbol "lua-set!") + (lua-tx (nth lhs 1)) + (nth lhs 2) + rhs)) + ((= (first lhs) (quote lua-index)) + (list + (make-symbol "lua-set!") + (lua-tx (nth lhs 1)) + (lua-tx (nth lhs 2)) + rhs)) + (else (error "lua-transpile: bad assignment target"))))) + +(define + lua-tx-if + (fn + (node) + (let + ((cnd (lua-tx (nth node 1))) + (then-body (lua-tx (nth node 2))) + (elseifs (nth node 3)) + (else-body (nth node 4))) + (if + (and (= (len elseifs) 0) (= else-body nil)) + (list + (make-symbol "when") + (list (make-symbol "lua-truthy?") cnd) + then-body) + (lua-tx-if-cond cnd then-body elseifs else-body))))) + +(define + lua-tx-if-cond + (fn + (cnd then-body elseifs else-body) + (let + ((clauses (cons (list (list (make-symbol "lua-truthy?") cnd) then-body) (map lua-tx-elseif elseifs)))) + (cons + (make-symbol "cond") + (if + (= else-body nil) + clauses + (append + clauses + (list (list (make-symbol "else") (lua-tx else-body))))))))) + +(define + lua-tx-elseif + (fn + (pair) + (list + (list (make-symbol "lua-truthy?") (lua-tx (first pair))) + (lua-tx (nth pair 1))))) + +(define + lua-tx-while + (fn + (node) + (let + ((cnd (lua-tx (nth node 1))) (body (lua-tx (nth node 2)))) + (list + (make-symbol "let") + (list) + (list + (make-symbol "define") + (make-symbol "_while_loop") + (list + (make-symbol "fn") + (list) + (list + (make-symbol "when") + (list (make-symbol "lua-truthy?") cnd) + (list + (make-symbol "begin") + body + (list (make-symbol "_while_loop")))))) + (list (make-symbol "_while_loop")))))) + +(define + lua-tx-repeat + (fn + (node) + (let + ((body (lua-tx (nth node 1))) (cnd (lua-tx (nth node 2)))) + (list + (make-symbol "let") + (list) + (list + (make-symbol "define") + (make-symbol "_repeat_loop") + (list + (make-symbol "fn") + (list) + (list + (make-symbol "begin") + body + (list + (make-symbol "when") + (list + (make-symbol "not") + (list (make-symbol "lua-truthy?") cnd)) + (list (make-symbol "_repeat_loop")))))) + (list (make-symbol "_repeat_loop")))))) + +(define + lua-tx-for-num + (fn + (node) + (let + ((name (nth node 1)) + (start (lua-tx (nth node 2))) + (stop (lua-tx (nth node 3))) + (step-ast (nth node 4)) + (body (lua-tx (nth node 5)))) + (let + ((step (if (= step-ast nil) 1 (lua-tx step-ast)))) + (list + (make-symbol "let") + (list + (list (make-symbol "_for_stop") stop) + (list (make-symbol "_for_step") step)) + (list (make-symbol "define") (make-symbol name) start) + (list + (make-symbol "define") + (make-symbol "_for_loop") + (list + (make-symbol "fn") + (list) + (list + (make-symbol "when") + (list + (make-symbol "lua-for-continue?") + (make-symbol name) + (make-symbol "_for_stop") + (make-symbol "_for_step")) + (list + (make-symbol "begin") + body + (list + (make-symbol "set!") + (make-symbol name) + (list + (make-symbol "+") + (make-symbol name) + (make-symbol "_for_step"))) + (list (make-symbol "_for_loop")))))) + (list (make-symbol "_for_loop"))))))) + +(define lua-tx-do (fn (node) (lua-tx (nth node 1)))) + +(define + lua-tx-return + (fn + (node) + (let + ((exps (nth node 1))) + (cond + ((= (len exps) 0) nil) + ((= (len exps) 1) (lua-tx (first exps))) + (else (cons (make-symbol "list") (map lua-tx exps))))))) + +(define + lua-tx-local-function + (fn + (node) + (let + ((name (nth node 1)) (func (nth node 2))) + (list (make-symbol "define") (make-symbol name) (lua-tx func))))) + +(define + lua-tx-function-decl + (fn + (node) + (let + ((target (nth node 1)) (func (nth node 2))) + (cond + ((= (first target) (quote lua-name)) + (list + (make-symbol "define") + (make-symbol (nth target 1)) + (lua-tx func))) + ((= (first target) (quote lua-field)) + (list + (make-symbol "lua-set!") + (lua-tx (nth target 1)) + (nth target 2) + (lua-tx func))) + (else (error "lua-transpile: bad function decl target")))))) + +(define lua-transpile (fn (src) (lua-tx (lua-parse src)))) + +(define + lua-eval-ast + (fn (src) (let ((sx (lua-transpile src))) (eval-expr sx)))) diff --git a/lib/prolog/parser.sx b/lib/prolog/parser.sx new file mode 100644 index 00000000..d301a184 --- /dev/null +++ b/lib/prolog/parser.sx @@ -0,0 +1,265 @@ +;; lib/prolog/parser.sx — tokens → Prolog AST +;; +;; Phase 1 grammar (NO operator table yet): +;; Program := Clause* EOF +;; Clause := Term "." | Term ":-" Term "." +;; Term := Atom | Var | Number | String | Compound | List +;; Compound := atom "(" ArgList ")" +;; ArgList := Term ("," Term)* +;; List := "[" "]" | "[" Term ("," Term)* ("|" Term)? "]" +;; +;; Term AST shapes (all tagged lists for uniform dispatch): +;; ("atom" name) — atom +;; ("var" name) — variable template (parser-time only) +;; ("num" value) — integer or float +;; ("str" value) — string literal +;; ("compound" functor args) — compound term, args is list of term-ASTs +;; ("cut") — the cut atom ! +;; +;; A clause is (list "clause" head body). A fact is head with body = ("atom" "true"). +;; +;; The empty list is (atom "[]"). Cons is compound "." with two args: +;; [1, 2, 3] → .(1, .(2, .(3, []))) +;; [H|T] → .(H, T) + +;; ── Parser state helpers ──────────────────────────────────────────── +(define + pp-peek + (fn + (st) + (let + ((i (get st :idx)) (tokens (get st :tokens))) + (if (< i (len tokens)) (nth tokens i) {:pos 0 :value nil :type "eof"})))) + +(define pp-advance! (fn (st) (dict-set! st :idx (+ (get st :idx) 1)))) + +(define + pp-at? + (fn + (st type value) + (let + ((t (pp-peek st))) + (and + (= (get t :type) type) + (or (= value nil) (= (get t :value) value)))))) + +(define + pp-expect! + (fn + (st type value) + (let + ((t (pp-peek st))) + (if + (pp-at? st type value) + (do (pp-advance! st) t) + (error + (str + "Parse error at pos " + (get t :pos) + ": expected " + type + " '" + (if (= value nil) "" value) + "' got " + (get t :type) + " '" + (if (= (get t :value) nil) "" (get t :value)) + "'")))))) + +;; ── AST constructors ──────────────────────────────────────────────── +(define pl-mk-atom (fn (name) (list "atom" name))) +(define pl-mk-var (fn (name) (list "var" name))) +(define pl-mk-num (fn (n) (list "num" n))) +(define pl-mk-str (fn (s) (list "str" s))) +(define pl-mk-compound (fn (f args) (list "compound" f args))) +(define pl-mk-cut (fn () (list "cut"))) + +;; Term tag extractors +(define pl-term-tag (fn (t) (if (list? t) (first t) nil))) +(define pl-term-val (fn (t) (nth t 1))) +(define pl-compound-functor (fn (t) (nth t 1))) +(define pl-compound-args (fn (t) (nth t 2))) + +;; Empty-list atom and cons helpers +(define pl-nil-term (fn () (pl-mk-atom "[]"))) + +(define pl-mk-cons (fn (h t) (pl-mk-compound "." (list h t)))) + +;; Build cons list from a list of terms + optional tail +(define + pl-mk-list-term + (fn + (items tail) + (if + (= (len items) 0) + tail + (pl-mk-cons (first items) (pl-mk-list-term (rest items) tail))))) + +;; ── Term parser ───────────────────────────────────────────────────── +(define + pp-parse-term + (fn + (st) + (let + ((t (pp-peek st))) + (let + ((ty (get t :type)) (vv (get t :value))) + (cond + ((= ty "number") (do (pp-advance! st) (pl-mk-num vv))) + ((= ty "string") (do (pp-advance! st) (pl-mk-str vv))) + ((= ty "var") (do (pp-advance! st) (pl-mk-var vv))) + ((and (= ty "op") (= vv "!")) + (do (pp-advance! st) (pl-mk-cut))) + ((and (= ty "punct") (= vv "[")) (pp-parse-list st)) + ((= ty "atom") + (do + (pp-advance! st) + (if + (pp-at? st "punct" "(") + (do + (pp-advance! st) + (let + ((args (pp-parse-arg-list st))) + (do (pp-expect! st "punct" ")") (pl-mk-compound vv args)))) + (pl-mk-atom vv)))) + (else + (error + (str + "Parse error at pos " + (get t :pos) + ": unexpected " + ty + " '" + (if (= vv nil) "" vv) + "'")))))))) + +;; Parse one or more comma-separated terms (arguments). +(define + pp-parse-arg-list + (fn + (st) + (let + ((first-arg (pp-parse-term st)) (args (list))) + (do + (append! args first-arg) + (define + loop + (fn + () + (when + (pp-at? st "punct" ",") + (do + (pp-advance! st) + (append! args (pp-parse-term st)) + (loop))))) + (loop) + args)))) + +;; Parse a [ ... ] list literal. Consumes the "[". +(define + pp-parse-list + (fn + (st) + (do + (pp-expect! st "punct" "[") + (if + (pp-at? st "punct" "]") + (do (pp-advance! st) (pl-nil-term)) + (let + ((items (list))) + (do + (append! items (pp-parse-term st)) + (define + comma-loop + (fn + () + (when + (pp-at? st "punct" ",") + (do + (pp-advance! st) + (append! items (pp-parse-term st)) + (comma-loop))))) + (comma-loop) + (let + ((tail (if (pp-at? st "punct" "|") (do (pp-advance! st) (pp-parse-term st)) (pl-nil-term)))) + (do (pp-expect! st "punct" "]") (pl-mk-list-term items tail))))))))) + +;; ── Body parsing ──────────────────────────────────────────────────── +;; A clause body is a comma-separated list of goals. We flatten into a +;; right-associative `,` compound: (A, B, C) → ','(A, ','(B, C)) +;; If only one goal, it's that goal directly. +(define + pp-parse-body + (fn + (st) + (let + ((first-goal (pp-parse-term st)) (rest-goals (list))) + (do + (define + gloop + (fn + () + (when + (pp-at? st "punct" ",") + (do + (pp-advance! st) + (append! rest-goals (pp-parse-term st)) + (gloop))))) + (gloop) + (if + (= (len rest-goals) 0) + first-goal + (pp-build-conj first-goal rest-goals)))))) + +(define + pp-build-conj + (fn + (first-goal rest-goals) + (if + (= (len rest-goals) 0) + first-goal + (pl-mk-compound + "," + (list + first-goal + (pp-build-conj (first rest-goals) (rest rest-goals))))))) + +;; ── Clause parsing ────────────────────────────────────────────────── +(define + pp-parse-clause + (fn + (st) + (let + ((head (pp-parse-term st))) + (let + ((body (if (pp-at? st "op" ":-") (do (pp-advance! st) (pp-parse-body st)) (pl-mk-atom "true")))) + (do (pp-expect! st "punct" ".") (list "clause" head body)))))) + +;; Parse an entire program — returns list of clauses. +(define + pl-parse-program + (fn + (tokens) + (let + ((st {:idx 0 :tokens tokens}) (clauses (list))) + (do + (define + ploop + (fn + () + (when + (not (pp-at? st "eof" nil)) + (do (append! clauses (pp-parse-clause st)) (ploop))))) + (ploop) + clauses)))) + +;; Parse a single query term (no trailing "."). Returns the term. +(define + pl-parse-query + (fn (tokens) (let ((st {:idx 0 :tokens tokens})) (pp-parse-body st)))) + +;; Convenience: source → clauses +(define pl-parse (fn (src) (pl-parse-program (pl-tokenize src)))) + +;; Convenience: source → query term +(define pl-parse-goal (fn (src) (pl-parse-query (pl-tokenize src)))) diff --git a/lib/prolog/runtime.sx b/lib/prolog/runtime.sx new file mode 100644 index 00000000..d20a71cb --- /dev/null +++ b/lib/prolog/runtime.sx @@ -0,0 +1,232 @@ +;; lib/prolog/runtime.sx — unification, trail, and (later) solver +;; +;; Phase 2 focus: runtime terms + unification + trail. +;; +;; Term representation at runtime: +;; atom ("atom" name) -- immutable tagged list, same as parse AST +;; num ("num" n) -- likewise +;; str ("str" s) -- likewise +;; compound ("compound" fun args) -- args is a regular list of terms +;; var {:tag "var" -- MUTABLE dict; :binding = nil (unbound) or term +;; :name "X" +;; :id +;; :binding } +;; +;; Parse-time ("var" name) tokens must be instantiated into runtime vars +;; before unification. Fresh renaming happens per clause resolution so +;; that two separate calls to the same clause don't share variables. +;; +;; Trail: +;; {:entries ( ...) :len N} -- stack of vars that got bound +;; Mark = integer length of the entries list at checkpoint time. +;; trail-undo-to! pops entries pushed since the mark, nil'ing :binding. +;; +;; Occurs-check: off by default; configurable via trail :occurs-check flag. + +;; ── Var id counter ───────────────────────────────────────────────── +(define pl-var-counter {:n 0}) + +(define + pl-fresh-id + (fn + () + (let + ((n (dict-get pl-var-counter :n))) + (dict-set! pl-var-counter :n (+ n 1)) + n))) + +;; ── Term constructors / predicates ───────────────────────────────── +(define pl-mk-rt-var (fn (name) {:tag "var" :id (pl-fresh-id) :name name :binding nil})) + +(define pl-var? (fn (t) (and (dict? t) (= (dict-get t :tag) "var")))) + +(define + pl-atom? + (fn (t) (and (list? t) (not (empty? t)) (= (first t) "atom")))) + +(define + pl-num? + (fn (t) (and (list? t) (not (empty? t)) (= (first t) "num")))) + +(define + pl-str? + (fn (t) (and (list? t) (not (empty? t)) (= (first t) "str")))) + +(define + pl-compound? + (fn (t) (and (list? t) (not (empty? t)) (= (first t) "compound")))) + +(define pl-var-name (fn (v) (dict-get v :name))) +(define pl-var-id (fn (v) (dict-get v :id))) +(define pl-var-binding (fn (v) (dict-get v :binding))) +(define pl-var-bound? (fn (v) (not (nil? (dict-get v :binding))))) + +(define pl-atom-name (fn (t) (nth t 1))) +(define pl-num-val (fn (t) (nth t 1))) +(define pl-str-val (fn (t) (nth t 1))) +(define pl-fun (fn (t) (nth t 1))) +(define pl-args (fn (t) (nth t 2))) + +;; ── Instantiate parse AST into runtime terms ────────────────────── +;; Walk a parser AST term, replacing ("var" name) occurrences with fresh +;; runtime vars. A name->var dict is threaded so that repeated uses of +;; the same variable within a clause share the same runtime var. +;; "_" is anonymous: each occurrence gets a NEW fresh var (never shared). + +(define + pl-instantiate + (fn + (ast var-env) + (cond + ((pl-var? ast) ast) + ((not (list? ast)) ast) + ((empty? ast) ast) + ((= (first ast) "var") + (let + ((name (nth ast 1))) + (if + (= name "_") + (pl-mk-rt-var "_") + (if + (dict-has? var-env name) + (dict-get var-env name) + (let ((v (pl-mk-rt-var name))) (dict-set! var-env name v) v))))) + ((= (first ast) "compound") + (let + ((fun (nth ast 1)) (args (nth ast 2))) + (list + "compound" + fun + (map (fn (a) (pl-instantiate a var-env)) args)))) + (true ast)))) + +(define pl-instantiate-fresh (fn (ast) (pl-instantiate ast {}))) + +;; ── Walk: follow binding chain ───────────────────────────────────── +(define + pl-walk + (fn + (t) + (if + (pl-var? t) + (if (pl-var-bound? t) (pl-walk (pl-var-binding t)) t) + t))) + +;; Deep-walk: recursively resolve variables inside compound terms. +(define + pl-walk-deep + (fn + (t) + (let + ((w (pl-walk t))) + (if + (pl-compound? w) + (list "compound" (pl-fun w) (map pl-walk-deep (pl-args w))) + w)))) + +;; ── Trail ───────────────────────────────────────────────────────── +(define pl-mk-trail (fn () {:entries () :len 0 :occurs-check false})) + +(define pl-trail-mark (fn (trail) (dict-get trail :len))) + +(define + pl-trail-push! + (fn + (trail v) + (dict-set! trail :entries (cons v (dict-get trail :entries))) + (dict-set! trail :len (+ 1 (dict-get trail :len))))) + +(define + pl-trail-undo-to! + (fn + (trail mark) + (let + loop + () + (when + (> (dict-get trail :len) mark) + (let + ((entries (dict-get trail :entries))) + (let + ((top (first entries)) (rest (rest entries))) + (dict-set! top :binding nil) + (dict-set! trail :entries rest) + (dict-set! trail :len (- (dict-get trail :len) 1)) + (loop))))))) + +;; Bind variable v to term t, recording on the trail. +(define + pl-bind! + (fn (v t trail) (dict-set! v :binding t) (pl-trail-push! trail v))) + +;; ── Occurs check ────────────────────────────────────────────────── +(define + pl-occurs? + (fn + (v t) + (let + ((w (pl-walk t))) + (cond + ((pl-var? w) (= (pl-var-id v) (pl-var-id w))) + ((pl-compound? w) (some (fn (a) (pl-occurs? v a)) (pl-args w))) + (true false))))) + +;; ── Unify ───────────────────────────────────────────────────────── +;; Unify two terms, mutating trail. Returns true on success. +;; On failure, the caller must undo to a pre-unify mark. +(define + pl-unify! + (fn + (t1 t2 trail) + (let + ((a (pl-walk t1)) (b (pl-walk t2))) + (cond + ((and (pl-var? a) (pl-var? b) (= (pl-var-id a) (pl-var-id b))) + true) + ((pl-var? a) + (if + (and (dict-get trail :occurs-check) (pl-occurs? a b)) + false + (do (pl-bind! a b trail) true))) + ((pl-var? b) + (if + (and (dict-get trail :occurs-check) (pl-occurs? b a)) + false + (do (pl-bind! b a trail) true))) + ((and (pl-atom? a) (pl-atom? b)) + (= (pl-atom-name a) (pl-atom-name b))) + ((and (pl-num? a) (pl-num? b)) (= (pl-num-val a) (pl-num-val b))) + ((and (pl-str? a) (pl-str? b)) (= (pl-str-val a) (pl-str-val b))) + ((and (pl-compound? a) (pl-compound? b)) + (if + (and + (= (pl-fun a) (pl-fun b)) + (= (len (pl-args a)) (len (pl-args b)))) + (pl-unify-lists! (pl-args a) (pl-args b) trail) + false)) + (true false))))) + +(define + pl-unify-lists! + (fn + (xs ys trail) + (cond + ((and (empty? xs) (empty? ys)) true) + ((or (empty? xs) (empty? ys)) false) + (true + (if + (pl-unify! (first xs) (first ys) trail) + (pl-unify-lists! (rest xs) (rest ys) trail) + false))))) + +;; Convenience: try-unify with auto-undo on failure. +(define + pl-try-unify! + (fn + (t1 t2 trail) + (let + ((mark (pl-trail-mark trail))) + (if + (pl-unify! t1 t2 trail) + true + (do (pl-trail-undo-to! trail mark) false))))) diff --git a/lib/prolog/tests/parse.sx b/lib/prolog/tests/parse.sx new file mode 100644 index 00000000..7e4d12ee --- /dev/null +++ b/lib/prolog/tests/parse.sx @@ -0,0 +1,215 @@ +;; lib/prolog/tests/parse.sx — parser unit tests +;; +;; Run: bash lib/prolog/tests/run-parse.sh +;; Or via sx-server: (load "lib/prolog/tokenizer.sx") (load "lib/prolog/parser.sx") +;; (load "lib/prolog/tests/parse.sx") (pl-parse-tests-run!) + +(define pl-test-count 0) +(define pl-test-pass 0) +(define pl-test-fail 0) +(define pl-test-failures (list)) + +(define + pl-test! + (fn + (name got expected) + (do + (set! pl-test-count (+ pl-test-count 1)) + (if + (= got expected) + (set! pl-test-pass (+ pl-test-pass 1)) + (do + (set! pl-test-fail (+ pl-test-fail 1)) + (append! + pl-test-failures + (str name "\n expected: " expected "\n got: " got))))))) + +;; Atoms & variables +(pl-test! + "atom fact" + (pl-parse "foo.") + (list (list "clause" (list "atom" "foo") (list "atom" "true")))) + +(pl-test! "number literal" (pl-parse-goal "42") (list "num" 42)) + +(pl-test! + "negative number — not supported yet (parsed as op atom + num)" + (pl-parse-goal "-5") + (list "atom" "-")) + +(pl-test! "variable" (pl-parse-goal "X") (list "var" "X")) + +(pl-test! + "underscore variable" + (pl-parse-goal "_Ignored") + (list "var" "_Ignored")) + +(pl-test! "anonymous variable" (pl-parse-goal "_") (list "var" "_")) + +(pl-test! + "compound 1-arg" + (pl-parse-goal "foo(a)") + (list "compound" "foo" (list (list "atom" "a")))) + +(pl-test! + "compound 3-args mixed" + (pl-parse-goal "p(X, 1, hello)") + (list + "compound" + "p" + (list (list "var" "X") (list "num" 1) (list "atom" "hello")))) + +(pl-test! + "nested compound" + (pl-parse-goal "f(g(X), h(Y, Z))") + (list + "compound" + "f" + (list + (list "compound" "g" (list (list "var" "X"))) + (list "compound" "h" (list (list "var" "Y") (list "var" "Z")))))) + +;; Lists +(pl-test! "empty list" (pl-parse-goal "[]") (list "atom" "[]")) + +(pl-test! + "single-element list" + (pl-parse-goal "[a]") + (list "compound" "." (list (list "atom" "a") (list "atom" "[]")))) + +(pl-test! + "three-element list" + (pl-parse-goal "[1, 2, 3]") + (list + "compound" + "." + (list + (list "num" 1) + (list + "compound" + "." + (list + (list "num" 2) + (list "compound" "." (list (list "num" 3) (list "atom" "[]")))))))) + +(pl-test! + "head-tail list" + (pl-parse-goal "[H|T]") + (list "compound" "." (list (list "var" "H") (list "var" "T")))) + +(pl-test! + "two-head-tail list" + (pl-parse-goal "[A, B|T]") + (list + "compound" + "." + (list + (list "var" "A") + (list "compound" "." (list (list "var" "B") (list "var" "T")))))) + +;; Clauses +(pl-test! + "fact" + (pl-parse "parent(tom, bob).") + (list + (list + "clause" + (list + "compound" + "parent" + (list (list "atom" "tom") (list "atom" "bob"))) + (list "atom" "true")))) + +(pl-test! + "rule with single-goal body" + (pl-parse "q(X) :- p(X).") + (list + (list + "clause" + (list "compound" "q" (list (list "var" "X"))) + (list "compound" "p" (list (list "var" "X")))))) + +(pl-test! + "rule with conjunctive body" + (pl-parse "r(X, Y) :- p(X), q(Y).") + (list + (list + "clause" + (list "compound" "r" (list (list "var" "X") (list "var" "Y"))) + (list + "compound" + "," + (list + (list "compound" "p" (list (list "var" "X"))) + (list "compound" "q" (list (list "var" "Y")))))))) + +;; Cut in body +(pl-test! + "cut in body" + (pl-parse "foo(X) :- p(X), !, q(X).") + (list + (list + "clause" + (list "compound" "foo" (list (list "var" "X"))) + (list + "compound" + "," + (list + (list "compound" "p" (list (list "var" "X"))) + (list + "compound" + "," + (list + (list "cut") + (list "compound" "q" (list (list "var" "X")))))))))) + +;; Symbolic-atom compound terms (phase 1 form) +(pl-test! + "= as compound" + (pl-parse-goal "=(X, 5)") + (list "compound" "=" (list (list "var" "X") (list "num" 5)))) + +(pl-test! + "is with +" + (pl-parse-goal "is(Y, +(X, 1))") + (list + "compound" + "is" + (list + (list "var" "Y") + (list "compound" "+" (list (list "var" "X") (list "num" 1)))))) + +;; Strings +(pl-test! + "double-quoted string" + (pl-parse-goal "\"hello\"") + (list "str" "hello")) + +;; Single-quoted atom +(pl-test! + "quoted atom" + (pl-parse-goal "'Hello World'") + (list "atom" "Hello World")) + +;; Multi-clause program +(pl-test! + "append program" + (len + (pl-parse "append([], L, L).\nappend([H|T], L, [H|R]) :- append(T, L, R).\n")) + 2) + +;; Comments +(pl-test! + "line comment ignored" + (pl-parse "foo.\n% this is a comment\nbar.") + (list + (list "clause" (list "atom" "foo") (list "atom" "true")) + (list "clause" (list "atom" "bar") (list "atom" "true")))) + +(pl-test! + "block comment ignored" + (pl-parse "/* hello */\nfoo.") + (list (list "clause" (list "atom" "foo") (list "atom" "true")))) + +;; ── Runner ─────────────────────────────────────────────────────── +(define pl-parse-tests-run! (fn () {:failed pl-test-fail :passed pl-test-pass :total pl-test-count :failures pl-test-failures})) diff --git a/lib/prolog/tests/unify.sx b/lib/prolog/tests/unify.sx new file mode 100644 index 00000000..f7b74380 --- /dev/null +++ b/lib/prolog/tests/unify.sx @@ -0,0 +1,484 @@ +;; lib/prolog/tests/unify.sx — unification + trail unit tests +;; +;; Run via MCP: (pl-unify-tests-run!) +;; +;; Covers: atoms, vars, numbers, strings, compounds, nested compounds, +;; cons-cell lists, trail undo, occurs-check, deep walks. + +(define pl-u-count 0) +(define pl-u-pass 0) +(define pl-u-fail 0) +(define pl-u-failures (list)) + +(define + pl-u-test! + (fn + (name thunk expected) + (set! pl-u-count (+ pl-u-count 1)) + (let + ((got (thunk))) + (if + (= got expected) + (set! pl-u-pass (+ pl-u-pass 1)) + (do + (set! pl-u-fail (+ pl-u-fail 1)) + (set! + pl-u-failures + (cons (list name :expected expected :got got) pl-u-failures))))))) + +;; Shortcuts +(define pl-a (fn (n) (list "atom" n))) +(define pl-n (fn (v) (list "num" v))) +(define pl-s (fn (v) (list "str" v))) +(define pl-c (fn (f args) (list "compound" f args))) + +;; ── Primitive predicates ────────────────────────────────────────── +(pl-u-test! "var? on fresh var" (fn () (pl-var? (pl-mk-rt-var "X"))) true) + +(pl-u-test! "var? on atom" (fn () (pl-var? (pl-a "foo"))) false) + +(pl-u-test! "atom? on atom" (fn () (pl-atom? (pl-a "foo"))) true) + +(pl-u-test! "atom? on var" (fn () (pl-atom? (pl-mk-rt-var "X"))) false) + +(pl-u-test! + "compound? on compound" + (fn () (pl-compound? (pl-c "p" (list (pl-a "a"))))) + true) + +(pl-u-test! "num? on num" (fn () (pl-num? (pl-n 42))) true) + +;; ── Fresh var ids ───────────────────────────────────────────────── +(pl-u-test! + "fresh vars get distinct ids" + (fn + () + (let + ((a (pl-mk-rt-var "X")) (b (pl-mk-rt-var "X"))) + (not (= (pl-var-id a) (pl-var-id b))))) + true) + +;; ── Walk ─────────────────────────────────────────────────────────── +(pl-u-test! + "walk returns unbound var unchanged" + (fn + () + (let + ((v (pl-mk-rt-var "X"))) + (= (pl-var-id (pl-walk v)) (pl-var-id v)))) + true) + +(pl-u-test! + "walk follows single binding" + (fn + () + (let + ((t (pl-mk-trail)) (v (pl-mk-rt-var "X"))) + (pl-bind! v (pl-a "hello") t) + (pl-walk v))) + (list "atom" "hello")) + +(pl-u-test! + "walk follows chain var→var→atom" + (fn + () + (let + ((t (pl-mk-trail)) (a (pl-mk-rt-var "A")) (b (pl-mk-rt-var "B"))) + (pl-bind! a b t) + (pl-bind! b (pl-a "end") t) + (pl-walk a))) + (list "atom" "end")) + +;; ── Unify: atoms ────────────────────────────────────────────────── +(pl-u-test! + "unify same atom" + (fn () (pl-unify! (pl-a "foo") (pl-a "foo") (pl-mk-trail))) + true) + +(pl-u-test! + "unify different atoms" + (fn () (pl-unify! (pl-a "foo") (pl-a "bar") (pl-mk-trail))) + false) + +;; ── Unify: numbers ──────────────────────────────────────────────── +(pl-u-test! + "unify equal nums" + (fn () (pl-unify! (pl-n 5) (pl-n 5) (pl-mk-trail))) + true) + +(pl-u-test! + "unify different nums" + (fn () (pl-unify! (pl-n 5) (pl-n 6) (pl-mk-trail))) + false) + +(pl-u-test! + "atom vs num fails" + (fn () (pl-unify! (pl-a "5") (pl-n 5) (pl-mk-trail))) + false) + +;; ── Unify: strings ──────────────────────────────────────────────── +(pl-u-test! + "unify equal strings" + (fn () (pl-unify! (pl-s "hi") (pl-s "hi") (pl-mk-trail))) + true) + +(pl-u-test! + "unify different strings" + (fn () (pl-unify! (pl-s "hi") (pl-s "bye") (pl-mk-trail))) + false) + +;; ── Unify: variables ────────────────────────────────────────────── +(pl-u-test! + "unify var with atom binds" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-unify! x (pl-a "foo") t) + (pl-walk x))) + (list "atom" "foo")) + +(pl-u-test! + "unify atom with var binds" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-unify! (pl-a "foo") x t) + (pl-walk x))) + (list "atom" "foo")) + +(pl-u-test! + "unify var = var binds one to the other" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y"))) + (pl-unify! x y t) + (pl-unify! y (pl-a "bound") t) + (pl-walk x))) + (list "atom" "bound")) + +(pl-u-test! + "unify same var succeeds without binding" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-unify! x x t) + (list (pl-var-bound? x) (= (dict-get t :len) 0)))) + (list false true)) + +(pl-u-test! + "bound-var vs atom uses binding" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-bind! x (pl-a "a") t) + (pl-unify! x (pl-a "a") t))) + true) + +(pl-u-test! + "bound-var vs different atom fails" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-bind! x (pl-a "a") t) + (pl-unify! x (pl-a "b") t))) + false) + +;; ── Unify: compounds ────────────────────────────────────────────── +(pl-u-test! + "unify p(a) with p(a)" + (fn + () + (pl-unify! + (pl-c "p" (list (pl-a "a"))) + (pl-c "p" (list (pl-a "a"))) + (pl-mk-trail))) + true) + +(pl-u-test! + "unify p(a) with p(b)" + (fn + () + (pl-unify! + (pl-c "p" (list (pl-a "a"))) + (pl-c "p" (list (pl-a "b"))) + (pl-mk-trail))) + false) + +(pl-u-test! + "unify p(a) with q(a) — functor mismatch" + (fn + () + (pl-unify! + (pl-c "p" (list (pl-a "a"))) + (pl-c "q" (list (pl-a "a"))) + (pl-mk-trail))) + false) + +(pl-u-test! + "unify p(a) with p(a,b) — arity mismatch" + (fn + () + (pl-unify! + (pl-c "p" (list (pl-a "a"))) + (pl-c "p" (list (pl-a "a") (pl-a "b"))) + (pl-mk-trail))) + false) + +(pl-u-test! + "unify p(X) with p(foo) binds X" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-unify! (pl-c "p" (list x)) (pl-c "p" (list (pl-a "foo"))) t) + (pl-walk x))) + (list "atom" "foo")) + +(pl-u-test! + "unify p(X,Y) with p(1,2) binds both" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y"))) + (pl-unify! + (pl-c "p" (list x y)) + (pl-c "p" (list (pl-n 1) (pl-n 2))) + t) + (list (pl-walk x) (pl-walk y)))) + (list (list "num" 1) (list "num" 2))) + +(pl-u-test! + "unify p(X,X) with p(a,a)" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-unify! + (pl-c "p" (list x x)) + (pl-c "p" (list (pl-a "a") (pl-a "a"))) + t))) + true) + +(pl-u-test! + "unify p(X,X) with p(a,b) fails" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-unify! + (pl-c "p" (list x x)) + (pl-c "p" (list (pl-a "a") (pl-a "b"))) + t))) + false) + +;; ── Nested compounds ────────────────────────────────────────────── +(pl-u-test! + "unify f(g(X)) with f(g(foo))" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-unify! + (pl-c "f" (list (pl-c "g" (list x)))) + (pl-c "f" (list (pl-c "g" (list (pl-a "foo"))))) + t) + (pl-walk x))) + (list "atom" "foo")) + +;; ── Cons-cell lists ────────────────────────────────────────────── +(define pl-nil (pl-a "[]")) +(define pl-cons (fn (h t) (pl-c "." (list h t)))) + +(pl-u-test! + "unify [1,2,3] with [X|T] binds X and T" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (tl (pl-mk-rt-var "T"))) + (pl-unify! + (pl-cons (pl-n 1) (pl-cons (pl-n 2) (pl-cons (pl-n 3) pl-nil))) + (pl-cons x tl) + t) + (list (pl-walk x) (pl-walk-deep tl)))) + (list + (list "num" 1) + (list + "compound" + "." + (list + (list "num" 2) + (list "compound" "." (list (list "num" 3) (list "atom" "[]"))))))) + +;; ── Trail: mark + undo ─────────────────────────────────────────── +(pl-u-test! + "trail-undo restores unbound vars" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y"))) + (let + ((mark (pl-trail-mark t))) + (pl-unify! x (pl-a "a") t) + (pl-unify! y (pl-a "b") t) + (pl-trail-undo-to! t mark) + (list (pl-var-bound? x) (pl-var-bound? y) (dict-get t :len))))) + (list false false 0)) + +(pl-u-test! + "partial undo preserves earlier bindings" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y"))) + (pl-unify! x (pl-a "a") t) + (let + ((mark (pl-trail-mark t))) + (pl-unify! y (pl-a "b") t) + (pl-trail-undo-to! t mark) + (list (pl-var-bound? x) (pl-var-bound? y))))) + (list true false)) + +(pl-u-test! + "try-unify undoes on failure" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y"))) + (pl-try-unify! + (pl-c "p" (list x y (pl-a "a"))) + (pl-c "p" (list (pl-n 1) (pl-n 2) (pl-a "b"))) + t) + (list (pl-var-bound? x) (pl-var-bound? y)))) + (list false false)) + +(pl-u-test! + "try-unify success keeps bindings" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y"))) + (pl-try-unify! + (pl-c "p" (list x y)) + (pl-c "p" (list (pl-n 1) (pl-n 2))) + t) + (list (pl-walk x) (pl-walk y)))) + (list (list "num" 1) (list "num" 2))) + +;; ── Occurs check ────────────────────────────────────────────────── +(pl-u-test! + "no occurs check: X = f(X) succeeds" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (pl-unify! x (pl-c "f" (list x)) t))) + true) + +(pl-u-test! + "occurs check: X = f(X) fails" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (dict-set! t :occurs-check true) + (pl-unify! x (pl-c "f" (list x)) t))) + false) + +(pl-u-test! + "occurs check: X = Y, Y = f(X) fails" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y"))) + (dict-set! t :occurs-check true) + (pl-unify! x y t) + (pl-unify! y (pl-c "f" (list x)) t))) + false) + +(pl-u-test! + "occurs check: deep occurrence in compound" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X"))) + (dict-set! t :occurs-check true) + (pl-unify! x (pl-c "f" (list (pl-c "g" (list x)))) t))) + false) + +(pl-u-test! + "occurs check: X = Y (both unbound) succeeds" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y"))) + (dict-set! t :occurs-check true) + (pl-unify! x y t))) + true) + +;; ── Parse AST → runtime term ────────────────────────────────────── +(pl-u-test! + "instantiate replaces (var X) with runtime var" + (fn + () + (let ((ast (list "var" "X"))) (pl-var? (pl-instantiate-fresh ast)))) + true) + +(pl-u-test! + "instantiate shares vars within clause" + (fn + () + (let + ((env {})) + (let + ((v1 (pl-instantiate (list "var" "X") env)) + (v2 (pl-instantiate (list "var" "X") env))) + (= (pl-var-id v1) (pl-var-id v2))))) + true) + +(pl-u-test! + "instantiate makes distinct vars for _" + (fn + () + (let + ((env {})) + (let + ((v1 (pl-instantiate (list "var" "_") env)) + (v2 (pl-instantiate (list "var" "_") env))) + (not (= (pl-var-id v1) (pl-var-id v2)))))) + true) + +(pl-u-test! + "instantiate compound recurses" + (fn + () + (let + ((env {})) + (let + ((inst (pl-instantiate (list "compound" "p" (list (list "var" "X") (list "atom" "a"))) env)) + (x (dict-get env "X"))) + (pl-unify! + inst + (pl-c "p" (list (pl-a "foo") (pl-a "a"))) + (pl-mk-trail)) + (pl-walk x)))) + (list "atom" "foo")) + +(pl-u-test! + "deep-walk resolves nested vars" + (fn + () + (let + ((t (pl-mk-trail)) (x (pl-mk-rt-var "X")) (y (pl-mk-rt-var "Y"))) + (pl-unify! x (pl-c "f" (list y (pl-a "b"))) t) + (pl-unify! y (pl-a "a") t) + (pl-walk-deep x))) + (list "compound" "f" (list (list "atom" "a") (list "atom" "b")))) + +;; ── Runner ──────────────────────────────────────────────────────── +(define pl-unify-tests-run! (fn () {:failed pl-u-fail :passed pl-u-pass :total pl-u-count :failures pl-u-failures})) diff --git a/lib/prolog/tokenizer.sx b/lib/prolog/tokenizer.sx new file mode 100644 index 00000000..1d36de4d --- /dev/null +++ b/lib/prolog/tokenizer.sx @@ -0,0 +1,232 @@ +;; lib/prolog/tokenizer.sx — Prolog source → token stream +;; +;; Tokens: {:type T :value V :pos P} +;; Types: +;; "atom" — lowercase-start, quoted, or symbolic atom (=, \=, +, etc.) +;; "var" — uppercase-start or _-start variable (value is the name) +;; "number" — numeric literal (decoded to number) +;; "string" — "..." string literal +;; "punct" — ( ) , . [ ] | +;; "op" — :- ! (phase 1 only has these two "operators") +;; "eof" +;; +;; NOTE: phase 1 parser does NOT handle operator precedence (no X is Y+1). +;; All compound terms are written as f(arg1, arg2, ...) — including +;; =(X, Y), is(X, +(1,2)), and so on, using symbolic atoms as functors. + +(define pl-make-token (fn (type value pos) {:pos pos :value value :type type})) + +;; ── Character predicates ────────────────────────────────────────── +(define pl-digit? (fn (c) (and (>= c "0") (<= c "9")))) + +(define pl-lower? (fn (c) (and (>= c "a") (<= c "z")))) + +(define pl-upper? (fn (c) (and (>= c "A") (<= c "Z")))) + +(define + pl-ident-char? + (fn (c) (or (pl-lower? c) (pl-upper? c) (pl-digit? c) (= c "_")))) + +(define pl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) + +;; Characters that form "symbolic atoms" (operator-shaped atoms like +;; =, \=, +, -, *, /, <, >, etc.). A run of these becomes a single atom +;; token. In phase 1 this lets users write =(X, Y) or is(X, +(1,2)) as +;; regular compound terms without needing an operator parser. +(define + pl-sym? + (fn + (c) + (or + (= c "=") + (= c "\\") + (= c "+") + (= c "-") + (= c "*") + (= c "/") + (= c "<") + (= c ">") + (= c "@") + (= c "#") + (= c "$") + (= c "&") + (= c "?") + (= c "^") + (= c "~") + (= c ";")))) + +;; ── Main tokenizer ──────────────────────────────────────────────── +(define + pl-tokenize + (fn + (src) + (let + ((tokens (list)) (pos 0) (src-len (len src))) + (define + pl-peek + (fn + (offset) + (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) + (define cur (fn () (pl-peek 0))) + (define advance! (fn (n) (set! pos (+ pos n)))) + (define + at? + (fn + (s) + (let + ((sl (len s))) + (and (<= (+ pos sl) src-len) (= (slice src pos (+ pos sl)) s))))) + (define + pl-emit! + (fn + (type value start) + (append! tokens (pl-make-token type value start)))) + (define + skip-line-comment! + (fn + () + (when + (and (< pos src-len) (not (= (cur) "\n"))) + (do (advance! 1) (skip-line-comment!))))) + (define + skip-block-comment! + (fn + () + (cond + ((>= pos src-len) nil) + ((and (= (cur) "*") (< (+ pos 1) src-len) (= (pl-peek 1) "/")) + (advance! 2)) + (else (do (advance! 1) (skip-block-comment!)))))) + (define + skip-ws! + (fn + () + (cond + ((>= pos src-len) nil) + ((pl-ws? (cur)) (do (advance! 1) (skip-ws!))) + ((= (cur) "%") + (do (advance! 1) (skip-line-comment!) (skip-ws!))) + ((and (= (cur) "/") (< (+ pos 1) src-len) (= (pl-peek 1) "*")) + (do (advance! 2) (skip-block-comment!) (skip-ws!))) + (else nil)))) + (define + read-ident + (fn + (start) + (do + (when + (and (< pos src-len) (pl-ident-char? (cur))) + (do (advance! 1) (read-ident start))) + (slice src start pos)))) + (define + read-sym + (fn + (start) + (do + (when + (and (< pos src-len) (pl-sym? (cur))) + (do (advance! 1) (read-sym start))) + (slice src start pos)))) + (define + read-decimal-digits! + (fn + () + (when + (and (< pos src-len) (pl-digit? (cur))) + (do (advance! 1) (read-decimal-digits!))))) + (define + read-number + (fn + (start) + (do + (read-decimal-digits!) + (when + (and + (< pos src-len) + (= (cur) ".") + (< (+ pos 1) src-len) + (pl-digit? (pl-peek 1))) + (do (advance! 1) (read-decimal-digits!))) + (parse-number (slice src start pos))))) + (define + read-quoted + (fn + (quote-char) + (let + ((chars (list))) + (advance! 1) + (define + loop + (fn + () + (cond + ((>= pos src-len) nil) + ((= (cur) "\\") + (do + (advance! 1) + (when + (< pos src-len) + (let + ((ch (cur))) + (do + (cond + ((= ch "n") (append! chars "\n")) + ((= ch "t") (append! chars "\t")) + ((= ch "r") (append! chars "\r")) + ((= ch "\\") (append! chars "\\")) + ((= ch "'") (append! chars "'")) + ((= ch "\"") (append! chars "\"")) + (else (append! chars ch))) + (advance! 1)))) + (loop))) + ((= (cur) quote-char) (advance! 1)) + (else (do (append! chars (cur)) (advance! 1) (loop)))))) + (loop) + (join "" chars)))) + (define + scan! + (fn + () + (do + (skip-ws!) + (when + (< pos src-len) + (let + ((ch (cur)) (start pos)) + (cond + ((at? ":-") + (do (pl-emit! "op" ":-" start) (advance! 2) (scan!))) + ((pl-digit? ch) + (do + (pl-emit! "number" (read-number start) start) + (scan!))) + ((= ch "'") + (do (pl-emit! "atom" (read-quoted "'") start) (scan!))) + ((= ch "\"") + (do (pl-emit! "string" (read-quoted "\"") start) (scan!))) + ((pl-lower? ch) + (do (pl-emit! "atom" (read-ident start) start) (scan!))) + ((or (pl-upper? ch) (= ch "_")) + (do (pl-emit! "var" (read-ident start) start) (scan!))) + ((= ch "(") + (do (pl-emit! "punct" "(" start) (advance! 1) (scan!))) + ((= ch ")") + (do (pl-emit! "punct" ")" start) (advance! 1) (scan!))) + ((= ch ",") + (do (pl-emit! "punct" "," start) (advance! 1) (scan!))) + ((= ch ".") + (do (pl-emit! "punct" "." start) (advance! 1) (scan!))) + ((= ch "[") + (do (pl-emit! "punct" "[" start) (advance! 1) (scan!))) + ((= ch "]") + (do (pl-emit! "punct" "]" start) (advance! 1) (scan!))) + ((= ch "|") + (do (pl-emit! "punct" "|" start) (advance! 1) (scan!))) + ((= ch "!") + (do (pl-emit! "op" "!" start) (advance! 1) (scan!))) + ((pl-sym? ch) + (do (pl-emit! "atom" (read-sym start) start) (scan!))) + (else (do (advance! 1) (scan!))))))))) + (scan!) + (pl-emit! "eof" nil pos) + tokens))) diff --git a/plans/erlang-on-sx.md b/plans/erlang-on-sx.md index f3faacf2..0084a46e 100644 --- a/plans/erlang-on-sx.md +++ b/plans/erlang-on-sx.md @@ -50,10 +50,11 @@ Core mapping: ## Roadmap ### Phase 1 — tokenizer + parser -- [ ] Tokenizer: atoms (bare + single-quoted), variables (Uppercase/`_`-prefixed), numbers (int, float, `16#HEX`), strings `"..."`, chars `$c`, punct `( ) { } [ ] , ; . : :: ->` -- [ ] Parser: module declarations, `-module`/`-export`/`-import` attributes, function clauses with head patterns + guards + body -- [ ] Expressions: literals, vars, calls, tuples `{...}`, lists `[...|...]`, binaries `<<...>>`, `if`, `case`, `receive`, `fun`, `try/catch`, operators -- [ ] Unit tests in `lib/erlang/tests/parse.sx` +- [x] Tokenizer: atoms (bare + single-quoted), variables (Uppercase/`_`-prefixed), numbers (int, float, `16#HEX`), strings `"..."`, chars `$c`, punct `( ) { } [ ] , ; . : :: ->` — **62/62 tests** +- [x] Parser: module declarations, `-module`/`-export`/`-import` attributes, function clauses with head patterns + guards + body — **52/52 tests** +- [x] Expressions: literals, vars, calls, tuples `{...}`, lists `[...|...]`, `if`, `case`, `receive`, `fun`, `try/catch`, operators, precedence +- [ ] Binaries `<<...>>` — not yet parsed (deferred to Phase 6) +- [x] Unit tests in `lib/erlang/tests/parse.sx` ### Phase 2 — sequential eval + pattern matching + BIFs - [ ] `erlang-eval-ast`: evaluate sequential expressions @@ -98,7 +99,8 @@ Core mapping: _Newest first._ -- _(not started)_ +- **parser green** — `lib/erlang/parser.sx` + `parser-core.sx` + `parser-expr.sx` + `parser-module.sx`. 52/52 in `tests/parse.sx`. Covers literals, tuples, lists (incl. `[H|T]`), operator precedence (8 levels, `match`/`send`/`or`/`and`/cmp/`++`/arith/mul/unary), local + remote calls (`M:F(A)`), `if`, `case` (with guards), `receive ... after ... end`, `begin..end` blocks, anonymous `fun`, `try..of..catch..after..end` with `Class:Pattern` catch clauses. Module-level: `-module(M).`, `-export([...]).`, multi-clause functions with guards. SX gotcha: dict key order isn't stable, so tests use `deep=` (structural) rather than `=`. +- **tokenizer green** — `lib/erlang/tokenizer.sx` + `lib/erlang/tests/tokenize.sx`. Covers atoms (bare, quoted, `node@host`), variables, integers (incl. `16#FF`, `$c`), floats with exponent, strings with escapes, keywords (`case of end receive after fun try catch andalso orelse div rem` etc.), punct (`( ) { } [ ] , ; . : :: -> <- <= => << >> | ||`), ops (`+ - * / = == /= =:= =/= < > =< >= ++ -- ! ?`), `%` line comments. 62/62 green. ## Blockers diff --git a/plans/forth-on-sx.md b/plans/forth-on-sx.md index 9bdeface..1a59da2a 100644 --- a/plans/forth-on-sx.md +++ b/plans/forth-on-sx.md @@ -49,24 +49,24 @@ Representation: ## Roadmap ### Phase 1 — reader + interpret mode -- [ ] `lib/forth/reader.sx`: whitespace-split, number parsing (base-aware) -- [ ] `lib/forth/runtime.sx`: stack as SX list, push/pop/peek helpers -- [ ] Core stack words: `DUP`, `DROP`, `SWAP`, `OVER`, `ROT`, `NIP`, `TUCK`, `PICK`, `ROLL`, `?DUP`, `2DUP`, `2DROP`, `2SWAP`, `2OVER` -- [ ] Arithmetic: `+`, `-`, `*`, `/`, `MOD`, `/MOD`, `NEGATE`, `ABS`, `MIN`, `MAX`, `1+`, `1-`, `2*`, `2/` -- [ ] Comparison: `=`, `<`, `>`, `<=`, `>=`, `0=`, `0<`, `0>` -- [ ] Logical: `AND`, `OR`, `XOR`, `INVERT` -- [ ] I/O: `.` (print), `.S` (show stack), `EMIT`, `CR`, `SPACE`, `SPACES` -- [ ] Interpreter loop: read word, look up, execute, repeat -- [ ] Unit tests in `lib/forth/tests/interp.sx` +- [x] `lib/forth/reader.sx`: whitespace-split, number parsing (base-aware) +- [x] `lib/forth/runtime.sx`: stack as SX list, push/pop/peek helpers +- [x] Core stack words: `DUP`, `DROP`, `SWAP`, `OVER`, `ROT`, `-ROT`, `NIP`, `TUCK`, `PICK`, `ROLL`, `?DUP`, `DEPTH`, `2DUP`, `2DROP`, `2SWAP`, `2OVER` +- [x] Arithmetic: `+`, `-`, `*`, `/`, `MOD`, `/MOD`, `NEGATE`, `ABS`, `MIN`, `MAX`, `1+`, `1-`, `2+`, `2-`, `2*`, `2/` +- [x] Comparison: `=`, `<>`, `<`, `>`, `<=`, `>=`, `0=`, `0<>`, `0<`, `0>` +- [x] Logical: `AND`, `OR`, `XOR`, `INVERT` (32-bit two's-complement sim) +- [x] I/O: `.` (print), `.S` (show stack), `EMIT`, `CR`, `SPACE`, `SPACES`, `BL` +- [x] Interpreter loop: read word, look up, execute, repeat +- [x] Unit tests in `lib/forth/tests/test-phase1.sx` — 108/108 pass ### Phase 2 — colon definitions + compile mode -- [ ] `:` opens compile mode and starts a definition -- [ ] `;` closes it and installs into the dictionary -- [ ] Compile mode: non-IMMEDIATE words get appended as SX references; numbers get compiled as literals; IMMEDIATE words (like `IF`) run now -- [ ] `VARIABLE`, `CONSTANT`, `VALUE`, `TO` -- [ ] `@` (fetch), `!` (store), `+!` -- [ ] Compile a colon def into an SX lambda that the CEK runs directly -- [ ] Tests: define words, call them, nest definitions +- [x] `:` opens compile mode and starts a definition +- [x] `;` closes it and installs into the dictionary +- [x] Compile mode: non-IMMEDIATE words are compiled as late-binding call thunks; numbers are compiled as pushers; IMMEDIATE words run immediately +- [x] `VARIABLE`, `CONSTANT`, `VALUE`, `TO`, `RECURSE`, `IMMEDIATE` +- [x] `@` (fetch), `!` (store), `+!` +- [x] Colon-def body is `(fn (s) (for-each op body))` — runs on CEK, inherits TCO +- [x] Tests in `lib/forth/tests/test-phase2.sx` — 26/26 pass ### Phase 3 — control flow + first Hayes tests green - [ ] `IF`, `ELSE`, `THEN` — compile to SX `if` @@ -99,7 +99,25 @@ Representation: _Newest first._ -- _(not started)_ +- **Phase 2 complete — colon defs, compile mode, VARIABLE/CONSTANT/VALUE/TO, @/!/+! (+26).** + `lib/forth/compiler.sx` plus `tests/test-phase2.sx`. + Colon-def body is a list of ops (one per source token) wrapped in a single + lambda. References are late-binding thunks so forward/recursive references + work via `RECURSE`. Redefinitions take effect immediately. + VARIABLE creates a pusher for a symbolic address stored in `state.vars`. + CONSTANT compiles to `(fn (s) (forth-push s v))`. VALUE/TO share the vars dict. + Compiler rewrites `forth-interpret` to drive from a token list stored on + state so parsing words (`:`, `VARIABLE`, `TO` etc.) can consume the next + token with `forth-next-token!`. 134/134 (Phase 1 + 2) green. + +- **Phase 1 complete — reader + interpret mode + core words (+108).** + `lib/forth/{runtime,reader,interpreter}.sx` plus `tests/test-phase1.sx`. + Stack as SX list (TOS = first). Dict is `{lowercased-name -> {:kind :body :immediate?}}`. + Data + return stacks both mutable. Output buffered in state (no host IO yet). + BASE-aware number parsing with `$`, `%`, `#` prefixes and `'c'` char literals. + Bitwise AND/OR/XOR/INVERT simulated over 32-bit two's-complement. + Integer `/` is truncated-toward-zero (ANS symmetric), MOD matches. + Case-insensitive lookup. 108/108 tests green. ## Blockers diff --git a/plans/haskell-on-sx.md b/plans/haskell-on-sx.md index 8396d527..f76920fd 100644 --- a/plans/haskell-on-sx.md +++ b/plans/haskell-on-sx.md @@ -54,11 +54,11 @@ Key mappings: ## Roadmap ### Phase 1 — tokenizer + parser + layout rule -- [ ] Tokenizer: reserved words, qualified names, operators, numbers (int, float, Rational later), chars/strings, comments (`--` and `{-` nested) +- [x] Tokenizer: reserved words, qualified names, operators, numbers (int, float, Rational later), chars/strings, comments (`--` and `{-` nested) - [ ] Layout algorithm: turn indentation into virtual `{`, `;`, `}` tokens per Haskell 98 §10.3 - [ ] Parser: modules, imports (stub), top-level decls, type sigs, function clauses with patterns + guards + where-clauses, expressions with operator precedence, lambdas, `let`, `if`, `case`, `do`, list comp, sections - [ ] AST design modelled on GHC's HsSyn at a surface level -- [ ] Unit tests in `lib/haskell/tests/parse.sx` +- [x] Unit tests in `lib/haskell/tests/parse.sx` (43 tokenizer tests, all green) ### Phase 2 — desugar + eager-ish eval + ADTs (untyped) - [ ] Desugar: guards → nested `if`s; `where` → `let`; list comp → `concatMap`-based; do-notation stays for now (desugared in phase 3) @@ -107,7 +107,27 @@ Key mappings: _Newest first._ -- _(not started)_ +- **2026-04-24** — Phase 1: Haskell 98 tokenizer (`lib/haskell/tokenizer.sx`, 490 lines) + covering idents (lower/upper/qvarid/qconid), 23 reserved words, 11 reserved ops, + varsym/consym operator chains, integer/hex/octal/float literals incl. exponent + notation, char + string literals with escape sequences, nested `{- ... -}` block + comments with depth counter, `-- ... EOL` line comments (respecting the + "followed by symbol = not a comment" Haskell 98 rule), backticks, punctuation, + and explicit `newline` tokens for the upcoming layout pass. 43 structural tests + in `lib/haskell/tests/parse.sx`, a lightweight `hk-deep=?` equality helper + and a custom `lib/haskell/test.sh` runner (pipes through the OCaml epoch + protocol, falls back to the main-repo build when run from a worktree). 43/43 + green. + + Also peeked at `/root/rose-ash/sx-haskell/` per briefing: that directory is a + Haskell program implementing an **SX interpreter** (Types.hs, Eval.hs, + Primitives.hs, etc. — ~2800 lines of .hs) — the *opposite* direction from this + project. Nothing to fold in. + + Gotchas hit: `emit!` and `peek` are SX evaluator special forms, so every local + helper uses the `hk-` prefix. `cond`/`when`/`let` clauses evaluate ONLY the + last expression; multi-expression bodies MUST be wrapped in `(do ...)`. These + two together account for all the tokenizer's early crashes. ## Blockers diff --git a/plans/lua-on-sx.md b/plans/lua-on-sx.md index 04a95755..9ae8fe61 100644 --- a/plans/lua-on-sx.md +++ b/plans/lua-on-sx.md @@ -37,18 +37,18 @@ Runtime shims in `lib/lua/runtime.sx`: `lua-truthy?`, string coercion for `..`/a Each item: implement → tests → tick box → update progress log. ### Phase 1 — tokenizer + parser -- [ ] Tokenizer: numbers (int, float, hex), strings (short + long `[[…]]`), idents, keywords, operators, comments (`--`, `--[[…]]`) -- [ ] Parser: blocks, `local`, `if/elseif/else/end`, `while`, numeric `for`, `function`, `return`, expressions, table constructors, indexing (`.`, `[]`), calls (`f(…)`, `f:m(…)`) -- [ ] Skip for phase 1: generic `for … in …`, goto/labels, nested varargs `...` -- [ ] Unit tests in `lib/lua/tests/parse.sx`: source → expected AST +- [x] Tokenizer: numbers (int, float, hex), strings (short + long `[[…]]`), idents, keywords, operators, comments (`--`, `--[[…]]`) +- [x] Parser: blocks, `local`, `if/elseif/else/end`, `while`, numeric `for`, `function`, `return`, expressions, table constructors, indexing (`.`, `[]`), calls (`f(…)`, `f:m(…)`) +- [x] Skip for phase 1: generic `for … in …`, goto/labels, nested varargs `...` +- [x] Unit tests in `lib/lua/tests/parse.sx`: source → expected AST ### Phase 2 — transpile: control flow + arithmetic -- [ ] `lua-eval-ast` entry -- [ ] Arithmetic (Lua 5.1 semantics — `/` is float) -- [ ] Comparison + logical (short-circuit, Lua truthy) -- [ ] `..` concat with string/number coercion -- [ ] `if`, `while`, numeric `for`, `local`, assignment, blocks -- [ ] 30+ eval tests in `lib/lua/tests/eval.sx` +- [x] `lua-eval-ast` entry +- [x] Arithmetic (Lua 5.1 semantics — `/` is float) +- [x] Comparison + logical (short-circuit, Lua truthy) +- [x] `..` concat with string/number coercion +- [x] `if`, `while`, numeric `for`, `local`, assignment, blocks +- [x] 30+ eval tests in `lib/lua/tests/eval.sx` ### Phase 3 — tables + functions + first PUC-Rio slice - [ ] `function` (anon, local, top-level), closures @@ -82,7 +82,9 @@ Each item: implement → tests → tick box → update progress log. _Newest first. Agent appends on every commit._ -- _(awaiting phase 1)_ +- 2026-04-24: lua: phase 2 transpile — arithmetic, comparison, short-circuit logical, `..` concat, if/while/repeat/for-num/local/assign. 157 total tests green. +- 2026-04-24: lua: parser (exprs with precedence, all phase-1 statements, funcbody, table ctors, method/chained calls) — 112 total tokenizer+parser tests +- 2026-04-24: lua: tokenizer (numbers/strings/long-brackets/keywords/ops/comments) + 56 tests ## Blockers