ocaml: phase 1 expression parser (+37 tests, 95 total) — consumes lib/guest/pratt.sx
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 55s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 55s
Atoms (literals/var/con/unit/list), application (left-assoc), prefix - / not, 29-op precedence table via pratt-op-lookup (incl. keyword-spelled mod/land/ lor/lxor/lsl/lsr/asr), tuples, parens, if/then/else, fun, let, let rec with function shorthand. AST follows Haskell-on-SX (:int / :op / :fun / etc).
This commit is contained in:
418
lib/ocaml/parser.sx
Normal file
418
lib/ocaml/parser.sx
Normal file
@@ -0,0 +1,418 @@
|
||||
;; lib/ocaml/parser.sx — OCaml expression parser.
|
||||
;;
|
||||
;; Input: token list from (ocaml-tokenize src).
|
||||
;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head;
|
||||
;; keywords serialize to their string name so `(list :var "x")` is the
|
||||
;; same value as `(list "var" "x")` at runtime.
|
||||
;;
|
||||
;; Scope (this iteration — expressions only):
|
||||
;; atoms int/float/string/char, true/false, unit (), var, con, list literal
|
||||
;; application left-associative, f x y z
|
||||
;; prefix -E unary minus, not E
|
||||
;; infix standard ops via lib/guest/pratt.sx table
|
||||
;; tuple a, b, c (lower than infix, higher than let/if)
|
||||
;; parens (e)
|
||||
;; if if c then t else e (else optional → unit)
|
||||
;; fun fun x y -> body
|
||||
;; let let x = e in body (no rec)
|
||||
;; let f x y = e in body (function shorthand)
|
||||
;; let rec f x = e in body
|
||||
;;
|
||||
;; AST shapes:
|
||||
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
|
||||
;; (:var NAME) (:con NAME)
|
||||
;; (:app FN ARG) — binary, chain for multi-arg
|
||||
;; (:op OP LHS RHS) — binary infix; OP is the source string
|
||||
;; (:neg E) (:not E)
|
||||
;; (:tuple ITEMS)
|
||||
;; (:list ITEMS)
|
||||
;; (:if C T E)
|
||||
;; (:fun PARAMS BODY) — PARAMS list of strings (idents)
|
||||
;; (:let NAME PARAMS EXPR BODY)
|
||||
;; (:let-rec NAME PARAMS EXPR BODY)
|
||||
|
||||
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
|
||||
|
||||
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
|
||||
|
||||
;; Standard OCaml binary operator table.
|
||||
;; Higher precedence = tighter binding.
|
||||
;; ASSOC is :left or :right.
|
||||
(define
|
||||
ocaml-op-table
|
||||
(list
|
||||
(list "||" 2 :right)
|
||||
(list "or" 2 :right)
|
||||
(list "&&" 3 :right)
|
||||
(list "&" 3 :right)
|
||||
(list "=" 4 :left)
|
||||
(list "<" 4 :left)
|
||||
(list ">" 4 :left)
|
||||
(list "<=" 4 :left)
|
||||
(list ">=" 4 :left)
|
||||
(list "<>" 4 :left)
|
||||
(list "==" 4 :left)
|
||||
(list "!=" 4 :left)
|
||||
(list "|>" 4 :left)
|
||||
(list "@" 5 :right)
|
||||
(list "^" 5 :right)
|
||||
(list "::" 6 :right)
|
||||
(list "+" 7 :left)
|
||||
(list "-" 7 :left)
|
||||
(list "*" 8 :left)
|
||||
(list "/" 8 :left)
|
||||
(list "%" 8 :left)
|
||||
(list "mod" 8 :left)
|
||||
(list "land" 8 :left)
|
||||
(list "lor" 8 :left)
|
||||
(list "lxor" 8 :left)
|
||||
(list "**" 9 :right)
|
||||
(list "lsl" 9 :right)
|
||||
(list "lsr" 9 :right)
|
||||
(list "asr" 9 :right)))
|
||||
|
||||
(define
|
||||
ocaml-binop-prec
|
||||
(fn
|
||||
(op)
|
||||
(let
|
||||
((entry (pratt-op-lookup ocaml-op-table op)))
|
||||
(if (= entry nil) 0 (pratt-op-prec entry)))))
|
||||
|
||||
(define
|
||||
ocaml-binop-right?
|
||||
(fn
|
||||
(op)
|
||||
(let
|
||||
((entry (pratt-op-lookup ocaml-op-table op)))
|
||||
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
|
||||
|
||||
;; Some OCaml binops are spelled with keyword tokens (mod / land / lor /
|
||||
;; lxor / lsl / lsr / asr / or). Recognise both shapes.
|
||||
(define
|
||||
ocaml-tok-is-binop?
|
||||
(fn
|
||||
(tok)
|
||||
(let
|
||||
((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok)))
|
||||
(cond
|
||||
((= tt "op") (not (= (ocaml-binop-prec tv) 0)))
|
||||
((= tt "keyword") (not (= (ocaml-binop-prec tv) 0)))
|
||||
(else false)))))
|
||||
|
||||
(define
|
||||
ocaml-parse
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0))
|
||||
(begin
|
||||
(set! tok-len (len tokens))
|
||||
(define peek-tok (fn () (nth tokens idx)))
|
||||
(define advance-tok! (fn () (set! idx (+ idx 1))))
|
||||
(define
|
||||
check-tok?
|
||||
(fn
|
||||
(type value)
|
||||
(let
|
||||
((t (peek-tok)))
|
||||
(and
|
||||
(= (ocaml-tok-type t) type)
|
||||
(or (= value nil) (= (ocaml-tok-value t) value))))))
|
||||
(define
|
||||
consume!
|
||||
(fn
|
||||
(type value)
|
||||
(if
|
||||
(check-tok? type value)
|
||||
(let ((t (peek-tok))) (begin (advance-tok!) t))
|
||||
(error
|
||||
(str
|
||||
"ocaml-parse: expected "
|
||||
type
|
||||
" "
|
||||
value
|
||||
" got "
|
||||
(ocaml-tok-type (peek-tok))
|
||||
" "
|
||||
(ocaml-tok-value (peek-tok)))))))
|
||||
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
|
||||
(define at-op? (fn (op) (check-tok? "op" op)))
|
||||
(define parse-expr (fn () nil))
|
||||
(define parse-tuple (fn () nil))
|
||||
(define parse-binop-rhs (fn (lhs min-prec) lhs))
|
||||
(define parse-prefix (fn () nil))
|
||||
(define parse-app (fn () nil))
|
||||
(define parse-atom (fn () nil))
|
||||
(set!
|
||||
parse-atom
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((t (peek-tok))
|
||||
(tt (ocaml-tok-type (peek-tok)))
|
||||
(tv (ocaml-tok-value (peek-tok))))
|
||||
(cond
|
||||
((= tt "number")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(if (= (round tv) tv) (list :int tv) (list :float tv))))
|
||||
((= tt "string") (begin (advance-tok!) (list :string tv)))
|
||||
((= tt "char") (begin (advance-tok!) (list :char tv)))
|
||||
((and (= tt "keyword") (= tv "true"))
|
||||
(begin (advance-tok!) (list :bool true)))
|
||||
((and (= tt "keyword") (= tv "false"))
|
||||
(begin (advance-tok!) (list :bool false)))
|
||||
((= tt "ident") (begin (advance-tok!) (list :var tv)))
|
||||
((= tt "ctor") (begin (advance-tok!) (list :con tv)))
|
||||
((and (= tt "op") (= tv "("))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((at-op? ")") (begin (advance-tok!) (list :unit)))
|
||||
(else
|
||||
(let
|
||||
((e (parse-expr)))
|
||||
(begin (consume! "op" ")") e))))))
|
||||
((and (= tt "op") (= tv "["))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((at-op? "]") (begin (advance-tok!) (list :list)))
|
||||
(else
|
||||
(let
|
||||
((items (list)))
|
||||
(begin
|
||||
(append! items (parse-expr))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-op? ";")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(when
|
||||
(not (at-op? "]"))
|
||||
(begin
|
||||
(append! items (parse-expr))
|
||||
(loop)))))))
|
||||
(loop)
|
||||
(consume! "op" "]")
|
||||
(cons :list items)))))))
|
||||
((at-kw? "begin")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((e (parse-expr)))
|
||||
(begin (consume! "keyword" "end") e))))
|
||||
(else
|
||||
(error
|
||||
(str
|
||||
"ocaml-parse: unexpected token "
|
||||
tt
|
||||
" "
|
||||
tv
|
||||
" at idx "
|
||||
idx)))))))
|
||||
(define
|
||||
at-app-start?
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((tt (ocaml-tok-type (peek-tok)))
|
||||
(tv (ocaml-tok-value (peek-tok))))
|
||||
(cond
|
||||
((= tt "number") true)
|
||||
((= tt "string") true)
|
||||
((= tt "char") true)
|
||||
((= tt "ident") true)
|
||||
((= tt "ctor") true)
|
||||
((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin")))
|
||||
true)
|
||||
((and (= tt "op") (or (= tv "(") (= tv "["))) true)
|
||||
(else false)))))
|
||||
(set!
|
||||
parse-app
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((head (parse-atom)))
|
||||
(begin
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-app-start?)
|
||||
(let
|
||||
((arg (parse-atom)))
|
||||
(begin (set! head (list :app head arg)) (loop))))))
|
||||
(loop)
|
||||
head))))
|
||||
(set!
|
||||
parse-prefix
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at-op? "-")
|
||||
(begin (advance-tok!) (list :neg (parse-prefix))))
|
||||
((at-kw? "not")
|
||||
(begin (advance-tok!) (list :not (parse-prefix))))
|
||||
(else (parse-app)))))
|
||||
(set!
|
||||
parse-binop-rhs
|
||||
(fn
|
||||
(lhs min-prec)
|
||||
(let
|
||||
((tok (peek-tok)))
|
||||
(cond
|
||||
((not (ocaml-tok-is-binop? tok)) lhs)
|
||||
(else
|
||||
(let
|
||||
((op (ocaml-tok-value tok))
|
||||
(prec (ocaml-binop-prec (ocaml-tok-value tok))))
|
||||
(cond
|
||||
((< prec min-prec) lhs)
|
||||
(else
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((rhs (parse-prefix))
|
||||
(next-min
|
||||
(if
|
||||
(ocaml-binop-right? op)
|
||||
prec
|
||||
(+ prec 1))))
|
||||
(begin
|
||||
(set! rhs (parse-binop-rhs rhs next-min))
|
||||
(parse-binop-rhs (list :op op lhs rhs) min-prec))))))))))))
|
||||
(define
|
||||
parse-binary
|
||||
(fn
|
||||
()
|
||||
(let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1))))
|
||||
(set!
|
||||
parse-tuple
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((first (parse-binary)))
|
||||
(cond
|
||||
((at-op? ",")
|
||||
(let
|
||||
((items (list first)))
|
||||
(begin
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-op? ",")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! items (parse-binary))
|
||||
(loop)))))
|
||||
(loop)
|
||||
(cons :tuple items))))
|
||||
(else first)))))
|
||||
(define
|
||||
parse-fun
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((params (list)))
|
||||
(begin
|
||||
(define
|
||||
collect-params
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(check-tok? "ident" nil)
|
||||
(begin
|
||||
(append! params (ocaml-tok-value (peek-tok)))
|
||||
(advance-tok!)
|
||||
(collect-params)))))
|
||||
(collect-params)
|
||||
(when
|
||||
(= (len params) 0)
|
||||
(error "ocaml-parse: fun expects at least one parameter"))
|
||||
(consume! "op" "->")
|
||||
(let ((body (parse-expr))) (list :fun params body))))))
|
||||
(define
|
||||
parse-let
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((reccy false))
|
||||
(begin
|
||||
(when
|
||||
(at-kw? "rec")
|
||||
(begin (advance-tok!) (set! reccy true)))
|
||||
(let
|
||||
((name (ocaml-tok-value (consume! "ident" nil)))
|
||||
(params (list)))
|
||||
(begin
|
||||
(define
|
||||
collect-params
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(check-tok? "ident" nil)
|
||||
(begin
|
||||
(append! params (ocaml-tok-value (peek-tok)))
|
||||
(advance-tok!)
|
||||
(collect-params)))))
|
||||
(collect-params)
|
||||
(consume! "op" "=")
|
||||
(let
|
||||
((rhs (parse-expr)))
|
||||
(begin
|
||||
(consume! "keyword" "in")
|
||||
(let
|
||||
((body (parse-expr)))
|
||||
(if
|
||||
reccy
|
||||
(list :let-rec name params rhs body)
|
||||
(list :let name params rhs body)))))))))))
|
||||
(define
|
||||
parse-if
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((cond-expr (parse-expr)))
|
||||
(begin
|
||||
(consume! "keyword" "then")
|
||||
(let
|
||||
((then-expr (parse-expr)))
|
||||
(cond
|
||||
((at-kw? "else")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((else-expr (parse-expr)))
|
||||
(list :if cond-expr then-expr else-expr))))
|
||||
(else (list :if cond-expr then-expr (list :unit)))))))))
|
||||
(set!
|
||||
parse-expr
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
|
||||
((at-kw? "let") (begin (advance-tok!) (parse-let)))
|
||||
((at-kw? "if") (begin (advance-tok!) (parse-if)))
|
||||
(else (parse-tuple)))))
|
||||
(let
|
||||
((result (parse-expr)))
|
||||
(begin
|
||||
(when
|
||||
(not (= (ocaml-tok-type (peek-tok)) "eof"))
|
||||
(error
|
||||
(str
|
||||
"ocaml-parse: trailing tokens at idx "
|
||||
idx
|
||||
" — got "
|
||||
(ocaml-tok-type (peek-tok))
|
||||
" "
|
||||
(ocaml-tok-value (peek-tok)))))
|
||||
result))))))
|
||||
@@ -29,7 +29,9 @@ cat > "$TMPFILE" << 'EPOCHS'
|
||||
(epoch 1)
|
||||
(load "lib/guest/lex.sx")
|
||||
(load "lib/guest/prefix.sx")
|
||||
(load "lib/guest/pratt.sx")
|
||||
(load "lib/ocaml/tokenizer.sx")
|
||||
(load "lib/ocaml/parser.sx")
|
||||
(load "lib/ocaml/tests/tokenize.sx")
|
||||
|
||||
;; ── empty / eof ────────────────────────────────────────────────
|
||||
@@ -169,6 +171,95 @@ cat > "$TMPFILE" << 'EPOCHS'
|
||||
(epoch 195)
|
||||
(eval "(ocaml-test-tok-value \"x := !y\" 1)")
|
||||
|
||||
;; ── Phase 1.parse: parser ──────────────────────────────────────
|
||||
;; Atoms
|
||||
(epoch 200)
|
||||
(eval "(ocaml-parse \"42\")")
|
||||
(epoch 201)
|
||||
(eval "(ocaml-parse \"3.14\")")
|
||||
(epoch 202)
|
||||
(eval "(ocaml-parse \"\\\"hi\\\"\")")
|
||||
(epoch 203)
|
||||
(eval "(ocaml-parse \"'a'\")")
|
||||
(epoch 204)
|
||||
(eval "(ocaml-parse \"true\")")
|
||||
(epoch 205)
|
||||
(eval "(ocaml-parse \"false\")")
|
||||
(epoch 206)
|
||||
(eval "(ocaml-parse \"x\")")
|
||||
(epoch 207)
|
||||
(eval "(ocaml-parse \"Some\")")
|
||||
(epoch 208)
|
||||
(eval "(ocaml-parse \"()\")")
|
||||
|
||||
;; Application (left-assoc)
|
||||
(epoch 210)
|
||||
(eval "(ocaml-parse \"f x\")")
|
||||
(epoch 211)
|
||||
(eval "(ocaml-parse \"f x y\")")
|
||||
(epoch 212)
|
||||
(eval "(ocaml-parse \"f (g x)\")")
|
||||
(epoch 213)
|
||||
(eval "(ocaml-parse \"Some 42\")")
|
||||
|
||||
;; Binops with precedence
|
||||
(epoch 220)
|
||||
(eval "(ocaml-parse \"1 + 2\")")
|
||||
(epoch 221)
|
||||
(eval "(ocaml-parse \"a + b * c\")")
|
||||
(epoch 222)
|
||||
(eval "(ocaml-parse \"a * b + c\")")
|
||||
(epoch 223)
|
||||
(eval "(ocaml-parse \"a && b || c\")")
|
||||
(epoch 224)
|
||||
(eval "(ocaml-parse \"a = b\")")
|
||||
(epoch 225)
|
||||
(eval "(ocaml-parse \"a ^ b ^ c\")")
|
||||
(epoch 226)
|
||||
(eval "(ocaml-parse \"a :: b :: []\")")
|
||||
(epoch 227)
|
||||
(eval "(ocaml-parse \"(a + b) * c\")")
|
||||
(epoch 228)
|
||||
(eval "(ocaml-parse \"a |> f |> g\")")
|
||||
(epoch 229)
|
||||
(eval "(ocaml-parse \"x mod 2\")")
|
||||
|
||||
;; Prefix
|
||||
(epoch 230)
|
||||
(eval "(ocaml-parse \"-x\")")
|
||||
(epoch 231)
|
||||
(eval "(ocaml-parse \"-1 + 2\")")
|
||||
|
||||
;; Tuples & lists
|
||||
(epoch 240)
|
||||
(eval "(ocaml-parse \"(1, 2, 3)\")")
|
||||
(epoch 241)
|
||||
(eval "(ocaml-parse \"[1; 2; 3]\")")
|
||||
(epoch 242)
|
||||
(eval "(ocaml-parse \"[]\")")
|
||||
|
||||
;; if / fun / let / let rec
|
||||
(epoch 250)
|
||||
(eval "(ocaml-parse \"if x then 1 else 2\")")
|
||||
(epoch 251)
|
||||
(eval "(ocaml-parse \"if c then x\")")
|
||||
(epoch 252)
|
||||
(eval "(ocaml-parse \"fun x -> x + 1\")")
|
||||
(epoch 253)
|
||||
(eval "(ocaml-parse \"fun x y -> x + y\")")
|
||||
(epoch 254)
|
||||
(eval "(ocaml-parse \"let x = 1 in x\")")
|
||||
(epoch 255)
|
||||
(eval "(ocaml-parse \"let f x = x + 1 in f 2\")")
|
||||
(epoch 256)
|
||||
(eval "(ocaml-parse \"let rec f x = f x in f 1\")")
|
||||
(epoch 257)
|
||||
(eval "(ocaml-parse \"let f x y = x + y in f 1 2\")")
|
||||
|
||||
;; begin/end
|
||||
(epoch 260)
|
||||
(eval "(ocaml-parse \"begin 1 + 2 end\")")
|
||||
|
||||
EPOCHS
|
||||
|
||||
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
|
||||
@@ -278,9 +369,54 @@ check 193 "Some is ctor" '"ctor"'
|
||||
check 194 "first |> value" '"|>"'
|
||||
check 195 "ref assign :=" '":="'
|
||||
|
||||
# ── Parser tests ────────────────────────────────────────────────
|
||||
check 200 "parse int" '("int" 42)'
|
||||
check 201 "parse float" '("float" 3.14)'
|
||||
check 202 "parse string" '("string" "hi")'
|
||||
check 203 "parse char" '("char" "a")'
|
||||
check 204 "parse true" '("bool" true)'
|
||||
check 205 "parse false" '("bool" false)'
|
||||
check 206 "parse var" '("var" "x")'
|
||||
check 207 "parse ctor" '("con" "Some")'
|
||||
check 208 "parse unit" '("unit")'
|
||||
|
||||
check 210 "parse f x" '("app" ("var" "f") ("var" "x"))'
|
||||
check 211 "parse f x y left-assoc" '("app" ("app" ("var" "f") ("var" "x")) ("var" "y"))'
|
||||
check 212 "parse f (g x)" '("app" ("var" "f") ("app" ("var" "g") ("var" "x")))'
|
||||
check 213 "parse Some 42" '("app" ("con" "Some") ("int" 42))'
|
||||
|
||||
check 220 "parse 1+2" '("op" "+" ("int" 1) ("int" 2))'
|
||||
check 221 "parse a + b * c prec" '("op" "+" ("var" "a") ("op" "*"'
|
||||
check 222 "parse a*b + c prec" '("op" "+" ("op" "*"'
|
||||
check 223 "parse && / || prec" '("op" "||" ("op" "&&"'
|
||||
check 224 "parse a = b" '("op" "=" ("var" "a") ("var" "b"))'
|
||||
check 225 "parse ^ right-assoc" '("op" "^" ("var" "a") ("op" "^"'
|
||||
check 226 "parse :: right-assoc" '("op" "::" ("var" "a") ("op" "::"'
|
||||
check 227 "parse parens override" '("op" "*" ("op" "+"'
|
||||
check 228 "parse |> chain" '("op" "|>" ("op" "|>"'
|
||||
check 229 "parse mod kw-binop" '("op" "mod" ("var" "x") ("int" 2))'
|
||||
|
||||
check 230 "parse -x" '("neg" ("var" "x"))'
|
||||
check 231 "parse -1+2" '("op" "+" ("neg" ("int" 1)) ("int" 2))'
|
||||
|
||||
check 240 "parse tuple" '("tuple" ("int" 1) ("int" 2) ("int" 3))'
|
||||
check 241 "parse list literal" '("list" ("int" 1) ("int" 2) ("int" 3))'
|
||||
check 242 "parse []" '("list")'
|
||||
|
||||
check 250 "parse if/then/else" '("if" ("var" "x") ("int" 1) ("int" 2))'
|
||||
check 251 "parse if w/o else" '("if" ("var" "c") ("var" "x") ("unit"))'
|
||||
check 252 "parse fun x -> ..." '("fun" ("x") ("op" "+" ("var" "x") ("int" 1)))'
|
||||
check 253 "parse fun x y ->" '("fun" ("x" "y")'
|
||||
check 254 "parse let x = 1 in x" '("let" "x" () ("int" 1) ("var" "x"))'
|
||||
check 255 "parse let f x =" '("let" "f" ("x") ("op" "+"'
|
||||
check 256 "parse let rec f x =" '("let-rec" "f" ("x")'
|
||||
check 257 "parse let f x y =" '("let" "f" ("x" "y")'
|
||||
|
||||
check 260 "parse begin/end" '("op" "+" ("int" 1) ("int" 2))'
|
||||
|
||||
TOTAL=$((PASS + FAIL))
|
||||
if [ $FAIL -eq 0 ]; then
|
||||
echo "ok $PASS/$TOTAL OCaml-on-SX tokenizer tests passed"
|
||||
echo "ok $PASS/$TOTAL OCaml-on-SX tests passed"
|
||||
else
|
||||
echo "FAIL $PASS/$TOTAL passed, $FAIL failed:"
|
||||
echo ""
|
||||
|
||||
@@ -1,9 +1,8 @@
|
||||
;; lib/ocaml/tests/tokenize.sx — smoke tests for the OCaml tokenizer.
|
||||
;; lib/ocaml/tests/tokenize.sx — smoke-test helpers.
|
||||
;;
|
||||
;; Tests are exercised via lib/ocaml/test.sh, which drives sx_server.exe
|
||||
;; over the epoch protocol. This file provides a small evaluator that
|
||||
;; returns short diagnostic values for each fixture so the runner can
|
||||
;; grep them out of one batched run.
|
||||
;; over the epoch protocol. This file provides small accessors so the
|
||||
;; bash runner can grep short diagnostic values out of one batched run.
|
||||
|
||||
(define
|
||||
ocaml-test-tok-type
|
||||
@@ -14,3 +13,9 @@
|
||||
(fn (src i) (get (nth (ocaml-tokenize src) i) :value)))
|
||||
|
||||
(define ocaml-test-tok-count (fn (src) (len (ocaml-tokenize src))))
|
||||
|
||||
(define ocaml-test-parse-str (fn (src) (ocaml-parse src)))
|
||||
|
||||
(define
|
||||
ocaml-test-parse-head
|
||||
(fn (src) (nth (ocaml-parse src) 0)))
|
||||
|
||||
@@ -125,12 +125,13 @@ SX CEK evaluator (both JS and OCaml hosts)
|
||||
int/float literals (incl. hex, exponent, underscores), nested block
|
||||
comments `(* ... *)`. _(labels `~label:` / `?label:` and heredoc `{|...|}`
|
||||
deferred — surface tokens already work via `~`/`?` punct + `{`/`|` punct.)_
|
||||
- [ ] **Parser:** top-level `let`/`let rec`/`type`/`module`/`exception`/`open`/`include`
|
||||
declarations; expressions: literals, identifiers, constructor application,
|
||||
lambda, application (left-assoc), binary ops with precedence table,
|
||||
`if`/`then`/`else`, `match`/`with`, `try`/`with`, `let`/`in`, `begin`/`end`,
|
||||
`fun`/`function`, tuples, list literals, record literals/updates, field access,
|
||||
sequences `;`, unit `()`.
|
||||
- [~] **Parser:** expressions: literals, identifiers, constructor application,
|
||||
lambda, application (left-assoc), binary ops with precedence (29 ops via
|
||||
`lib/guest/pratt.sx`), `if`/`then`/`else`, `let`/`in`, `let rec`,
|
||||
`fun`/`->`, tuples, list literals, `begin`/`end`, unit `()`. _(Pending:
|
||||
top-level `let`/`type`/`module`/`exception`/`open`/`include` decls,
|
||||
`match`/`with`, `try`/`with`, `function`, record literals/updates,
|
||||
field access, sequences `;`.)_
|
||||
- [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`,
|
||||
list literal, record, `as`, or-pattern `P1 | P2`, `when` guard.
|
||||
- [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed.
|
||||
@@ -309,6 +310,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means:
|
||||
|
||||
_Newest first._
|
||||
|
||||
- 2026-05-07 Phase 1 — `lib/ocaml/parser.sx` expression parser consuming
|
||||
`lib/guest/pratt.sx` for binop precedence (29 operators across 8 levels,
|
||||
incl. keyword-spelled binops `mod`/`land`/`lor`/`lxor`/`lsl`/`lsr`/`asr`).
|
||||
Atoms (literals + var/con/unit/list), application (left-assoc), prefix
|
||||
`-`/`not`, tuples, parens, `if`/`then`/`else`, `fun x y -> body`,
|
||||
`let`/`let rec` with function shorthand. AST shapes match Haskell-on-SX
|
||||
conventions (`(:int N)` `(:op OP L R)` `(:fun PARAMS BODY)` etc.). Total
|
||||
95/95 tests now passing via `lib/ocaml/test.sh`.
|
||||
- 2026-05-07 Phase 1 — `lib/ocaml/tokenizer.sx` consuming `lib/guest/lex.sx`
|
||||
via `prefix-rename`. Covers idents, ctors, 51 keywords, numbers (int / float
|
||||
/ hex / exponent / underscored), strings (with escapes), chars (with escapes),
|
||||
|
||||
Reference in New Issue
Block a user