ocaml: phase 1 expression parser (+37 tests, 95 total) — consumes lib/guest/pratt.sx
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 55s

Atoms (literals/var/con/unit/list), application (left-assoc), prefix - / not,
29-op precedence table via pratt-op-lookup (incl. keyword-spelled mod/land/
lor/lxor/lsl/lsr/asr), tuples, parens, if/then/else, fun, let, let rec
with function shorthand. AST follows Haskell-on-SX (:int / :op / :fun / etc).
This commit is contained in:
2026-05-07 23:26:48 +00:00
parent 85b7fed4fc
commit 9a090c6e42
4 changed files with 579 additions and 11 deletions

418
lib/ocaml/parser.sx Normal file
View File

@@ -0,0 +1,418 @@
;; lib/ocaml/parser.sx — OCaml expression parser.
;;
;; Input: token list from (ocaml-tokenize src).
;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head;
;; keywords serialize to their string name so `(list :var "x")` is the
;; same value as `(list "var" "x")` at runtime.
;;
;; Scope (this iteration — expressions only):
;; atoms int/float/string/char, true/false, unit (), var, con, list literal
;; application left-associative, f x y z
;; prefix -E unary minus, not E
;; infix standard ops via lib/guest/pratt.sx table
;; tuple a, b, c (lower than infix, higher than let/if)
;; parens (e)
;; if if c then t else e (else optional → unit)
;; fun fun x y -> body
;; let let x = e in body (no rec)
;; let f x y = e in body (function shorthand)
;; let rec f x = e in body
;;
;; AST shapes:
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
;; (:var NAME) (:con NAME)
;; (:app FN ARG) — binary, chain for multi-arg
;; (:op OP LHS RHS) — binary infix; OP is the source string
;; (:neg E) (:not E)
;; (:tuple ITEMS)
;; (:list ITEMS)
;; (:if C T E)
;; (:fun PARAMS BODY) — PARAMS list of strings (idents)
;; (:let NAME PARAMS EXPR BODY)
;; (:let-rec NAME PARAMS EXPR BODY)
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
;; Standard OCaml binary operator table.
;; Higher precedence = tighter binding.
;; ASSOC is :left or :right.
(define
ocaml-op-table
(list
(list "||" 2 :right)
(list "or" 2 :right)
(list "&&" 3 :right)
(list "&" 3 :right)
(list "=" 4 :left)
(list "<" 4 :left)
(list ">" 4 :left)
(list "<=" 4 :left)
(list ">=" 4 :left)
(list "<>" 4 :left)
(list "==" 4 :left)
(list "!=" 4 :left)
(list "|>" 4 :left)
(list "@" 5 :right)
(list "^" 5 :right)
(list "::" 6 :right)
(list "+" 7 :left)
(list "-" 7 :left)
(list "*" 8 :left)
(list "/" 8 :left)
(list "%" 8 :left)
(list "mod" 8 :left)
(list "land" 8 :left)
(list "lor" 8 :left)
(list "lxor" 8 :left)
(list "**" 9 :right)
(list "lsl" 9 :right)
(list "lsr" 9 :right)
(list "asr" 9 :right)))
(define
ocaml-binop-prec
(fn
(op)
(let
((entry (pratt-op-lookup ocaml-op-table op)))
(if (= entry nil) 0 (pratt-op-prec entry)))))
(define
ocaml-binop-right?
(fn
(op)
(let
((entry (pratt-op-lookup ocaml-op-table op)))
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
;; Some OCaml binops are spelled with keyword tokens (mod / land / lor /
;; lxor / lsl / lsr / asr / or). Recognise both shapes.
(define
ocaml-tok-is-binop?
(fn
(tok)
(let
((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok)))
(cond
((= tt "op") (not (= (ocaml-binop-prec tv) 0)))
((= tt "keyword") (not (= (ocaml-binop-prec tv) 0)))
(else false)))))
(define
ocaml-parse
(fn
(src)
(let
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0))
(begin
(set! tok-len (len tokens))
(define peek-tok (fn () (nth tokens idx)))
(define advance-tok! (fn () (set! idx (+ idx 1))))
(define
check-tok?
(fn
(type value)
(let
((t (peek-tok)))
(and
(= (ocaml-tok-type t) type)
(or (= value nil) (= (ocaml-tok-value t) value))))))
(define
consume!
(fn
(type value)
(if
(check-tok? type value)
(let ((t (peek-tok))) (begin (advance-tok!) t))
(error
(str
"ocaml-parse: expected "
type
" "
value
" got "
(ocaml-tok-type (peek-tok))
" "
(ocaml-tok-value (peek-tok)))))))
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
(define at-op? (fn (op) (check-tok? "op" op)))
(define parse-expr (fn () nil))
(define parse-tuple (fn () nil))
(define parse-binop-rhs (fn (lhs min-prec) lhs))
(define parse-prefix (fn () nil))
(define parse-app (fn () nil))
(define parse-atom (fn () nil))
(set!
parse-atom
(fn
()
(let
((t (peek-tok))
(tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number")
(begin
(advance-tok!)
(if (= (round tv) tv) (list :int tv) (list :float tv))))
((= tt "string") (begin (advance-tok!) (list :string tv)))
((= tt "char") (begin (advance-tok!) (list :char tv)))
((and (= tt "keyword") (= tv "true"))
(begin (advance-tok!) (list :bool true)))
((and (= tt "keyword") (= tv "false"))
(begin (advance-tok!) (list :bool false)))
((= tt "ident") (begin (advance-tok!) (list :var tv)))
((= tt "ctor") (begin (advance-tok!) (list :con tv)))
((and (= tt "op") (= tv "("))
(begin
(advance-tok!)
(cond
((at-op? ")") (begin (advance-tok!) (list :unit)))
(else
(let
((e (parse-expr)))
(begin (consume! "op" ")") e))))))
((and (= tt "op") (= tv "["))
(begin
(advance-tok!)
(cond
((at-op? "]") (begin (advance-tok!) (list :list)))
(else
(let
((items (list)))
(begin
(append! items (parse-expr))
(define
loop
(fn
()
(when
(at-op? ";")
(begin
(advance-tok!)
(when
(not (at-op? "]"))
(begin
(append! items (parse-expr))
(loop)))))))
(loop)
(consume! "op" "]")
(cons :list items)))))))
((at-kw? "begin")
(begin
(advance-tok!)
(let
((e (parse-expr)))
(begin (consume! "keyword" "end") e))))
(else
(error
(str
"ocaml-parse: unexpected token "
tt
" "
tv
" at idx "
idx)))))))
(define
at-app-start?
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number") true)
((= tt "string") true)
((= tt "char") true)
((= tt "ident") true)
((= tt "ctor") true)
((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin")))
true)
((and (= tt "op") (or (= tv "(") (= tv "["))) true)
(else false)))))
(set!
parse-app
(fn
()
(let
((head (parse-atom)))
(begin
(define
loop
(fn
()
(when
(at-app-start?)
(let
((arg (parse-atom)))
(begin (set! head (list :app head arg)) (loop))))))
(loop)
head))))
(set!
parse-prefix
(fn
()
(cond
((at-op? "-")
(begin (advance-tok!) (list :neg (parse-prefix))))
((at-kw? "not")
(begin (advance-tok!) (list :not (parse-prefix))))
(else (parse-app)))))
(set!
parse-binop-rhs
(fn
(lhs min-prec)
(let
((tok (peek-tok)))
(cond
((not (ocaml-tok-is-binop? tok)) lhs)
(else
(let
((op (ocaml-tok-value tok))
(prec (ocaml-binop-prec (ocaml-tok-value tok))))
(cond
((< prec min-prec) lhs)
(else
(begin
(advance-tok!)
(let
((rhs (parse-prefix))
(next-min
(if
(ocaml-binop-right? op)
prec
(+ prec 1))))
(begin
(set! rhs (parse-binop-rhs rhs next-min))
(parse-binop-rhs (list :op op lhs rhs) min-prec))))))))))))
(define
parse-binary
(fn
()
(let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1))))
(set!
parse-tuple
(fn
()
(let
((first (parse-binary)))
(cond
((at-op? ",")
(let
((items (list first)))
(begin
(define
loop
(fn
()
(when
(at-op? ",")
(begin
(advance-tok!)
(append! items (parse-binary))
(loop)))))
(loop)
(cons :tuple items))))
(else first)))))
(define
parse-fun
(fn
()
(let
((params (list)))
(begin
(define
collect-params
(fn
()
(when
(check-tok? "ident" nil)
(begin
(append! params (ocaml-tok-value (peek-tok)))
(advance-tok!)
(collect-params)))))
(collect-params)
(when
(= (len params) 0)
(error "ocaml-parse: fun expects at least one parameter"))
(consume! "op" "->")
(let ((body (parse-expr))) (list :fun params body))))))
(define
parse-let
(fn
()
(let
((reccy false))
(begin
(when
(at-kw? "rec")
(begin (advance-tok!) (set! reccy true)))
(let
((name (ocaml-tok-value (consume! "ident" nil)))
(params (list)))
(begin
(define
collect-params
(fn
()
(when
(check-tok? "ident" nil)
(begin
(append! params (ocaml-tok-value (peek-tok)))
(advance-tok!)
(collect-params)))))
(collect-params)
(consume! "op" "=")
(let
((rhs (parse-expr)))
(begin
(consume! "keyword" "in")
(let
((body (parse-expr)))
(if
reccy
(list :let-rec name params rhs body)
(list :let name params rhs body)))))))))))
(define
parse-if
(fn
()
(let
((cond-expr (parse-expr)))
(begin
(consume! "keyword" "then")
(let
((then-expr (parse-expr)))
(cond
((at-kw? "else")
(begin
(advance-tok!)
(let
((else-expr (parse-expr)))
(list :if cond-expr then-expr else-expr))))
(else (list :if cond-expr then-expr (list :unit)))))))))
(set!
parse-expr
(fn
()
(cond
((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
((at-kw? "let") (begin (advance-tok!) (parse-let)))
((at-kw? "if") (begin (advance-tok!) (parse-if)))
(else (parse-tuple)))))
(let
((result (parse-expr)))
(begin
(when
(not (= (ocaml-tok-type (peek-tok)) "eof"))
(error
(str
"ocaml-parse: trailing tokens at idx "
idx
" — got "
(ocaml-tok-type (peek-tok))
" "
(ocaml-tok-value (peek-tok)))))
result))))))

View File

@@ -29,7 +29,9 @@ cat > "$TMPFILE" << 'EPOCHS'
(epoch 1)
(load "lib/guest/lex.sx")
(load "lib/guest/prefix.sx")
(load "lib/guest/pratt.sx")
(load "lib/ocaml/tokenizer.sx")
(load "lib/ocaml/parser.sx")
(load "lib/ocaml/tests/tokenize.sx")
;; ── empty / eof ────────────────────────────────────────────────
@@ -169,6 +171,95 @@ cat > "$TMPFILE" << 'EPOCHS'
(epoch 195)
(eval "(ocaml-test-tok-value \"x := !y\" 1)")
;; ── Phase 1.parse: parser ──────────────────────────────────────
;; Atoms
(epoch 200)
(eval "(ocaml-parse \"42\")")
(epoch 201)
(eval "(ocaml-parse \"3.14\")")
(epoch 202)
(eval "(ocaml-parse \"\\\"hi\\\"\")")
(epoch 203)
(eval "(ocaml-parse \"'a'\")")
(epoch 204)
(eval "(ocaml-parse \"true\")")
(epoch 205)
(eval "(ocaml-parse \"false\")")
(epoch 206)
(eval "(ocaml-parse \"x\")")
(epoch 207)
(eval "(ocaml-parse \"Some\")")
(epoch 208)
(eval "(ocaml-parse \"()\")")
;; Application (left-assoc)
(epoch 210)
(eval "(ocaml-parse \"f x\")")
(epoch 211)
(eval "(ocaml-parse \"f x y\")")
(epoch 212)
(eval "(ocaml-parse \"f (g x)\")")
(epoch 213)
(eval "(ocaml-parse \"Some 42\")")
;; Binops with precedence
(epoch 220)
(eval "(ocaml-parse \"1 + 2\")")
(epoch 221)
(eval "(ocaml-parse \"a + b * c\")")
(epoch 222)
(eval "(ocaml-parse \"a * b + c\")")
(epoch 223)
(eval "(ocaml-parse \"a && b || c\")")
(epoch 224)
(eval "(ocaml-parse \"a = b\")")
(epoch 225)
(eval "(ocaml-parse \"a ^ b ^ c\")")
(epoch 226)
(eval "(ocaml-parse \"a :: b :: []\")")
(epoch 227)
(eval "(ocaml-parse \"(a + b) * c\")")
(epoch 228)
(eval "(ocaml-parse \"a |> f |> g\")")
(epoch 229)
(eval "(ocaml-parse \"x mod 2\")")
;; Prefix
(epoch 230)
(eval "(ocaml-parse \"-x\")")
(epoch 231)
(eval "(ocaml-parse \"-1 + 2\")")
;; Tuples & lists
(epoch 240)
(eval "(ocaml-parse \"(1, 2, 3)\")")
(epoch 241)
(eval "(ocaml-parse \"[1; 2; 3]\")")
(epoch 242)
(eval "(ocaml-parse \"[]\")")
;; if / fun / let / let rec
(epoch 250)
(eval "(ocaml-parse \"if x then 1 else 2\")")
(epoch 251)
(eval "(ocaml-parse \"if c then x\")")
(epoch 252)
(eval "(ocaml-parse \"fun x -> x + 1\")")
(epoch 253)
(eval "(ocaml-parse \"fun x y -> x + y\")")
(epoch 254)
(eval "(ocaml-parse \"let x = 1 in x\")")
(epoch 255)
(eval "(ocaml-parse \"let f x = x + 1 in f 2\")")
(epoch 256)
(eval "(ocaml-parse \"let rec f x = f x in f 1\")")
(epoch 257)
(eval "(ocaml-parse \"let f x y = x + y in f 1 2\")")
;; begin/end
(epoch 260)
(eval "(ocaml-parse \"begin 1 + 2 end\")")
EPOCHS
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
@@ -278,9 +369,54 @@ check 193 "Some is ctor" '"ctor"'
check 194 "first |> value" '"|>"'
check 195 "ref assign :=" '":="'
# ── Parser tests ────────────────────────────────────────────────
check 200 "parse int" '("int" 42)'
check 201 "parse float" '("float" 3.14)'
check 202 "parse string" '("string" "hi")'
check 203 "parse char" '("char" "a")'
check 204 "parse true" '("bool" true)'
check 205 "parse false" '("bool" false)'
check 206 "parse var" '("var" "x")'
check 207 "parse ctor" '("con" "Some")'
check 208 "parse unit" '("unit")'
check 210 "parse f x" '("app" ("var" "f") ("var" "x"))'
check 211 "parse f x y left-assoc" '("app" ("app" ("var" "f") ("var" "x")) ("var" "y"))'
check 212 "parse f (g x)" '("app" ("var" "f") ("app" ("var" "g") ("var" "x")))'
check 213 "parse Some 42" '("app" ("con" "Some") ("int" 42))'
check 220 "parse 1+2" '("op" "+" ("int" 1) ("int" 2))'
check 221 "parse a + b * c prec" '("op" "+" ("var" "a") ("op" "*"'
check 222 "parse a*b + c prec" '("op" "+" ("op" "*"'
check 223 "parse && / || prec" '("op" "||" ("op" "&&"'
check 224 "parse a = b" '("op" "=" ("var" "a") ("var" "b"))'
check 225 "parse ^ right-assoc" '("op" "^" ("var" "a") ("op" "^"'
check 226 "parse :: right-assoc" '("op" "::" ("var" "a") ("op" "::"'
check 227 "parse parens override" '("op" "*" ("op" "+"'
check 228 "parse |> chain" '("op" "|>" ("op" "|>"'
check 229 "parse mod kw-binop" '("op" "mod" ("var" "x") ("int" 2))'
check 230 "parse -x" '("neg" ("var" "x"))'
check 231 "parse -1+2" '("op" "+" ("neg" ("int" 1)) ("int" 2))'
check 240 "parse tuple" '("tuple" ("int" 1) ("int" 2) ("int" 3))'
check 241 "parse list literal" '("list" ("int" 1) ("int" 2) ("int" 3))'
check 242 "parse []" '("list")'
check 250 "parse if/then/else" '("if" ("var" "x") ("int" 1) ("int" 2))'
check 251 "parse if w/o else" '("if" ("var" "c") ("var" "x") ("unit"))'
check 252 "parse fun x -> ..." '("fun" ("x") ("op" "+" ("var" "x") ("int" 1)))'
check 253 "parse fun x y ->" '("fun" ("x" "y")'
check 254 "parse let x = 1 in x" '("let" "x" () ("int" 1) ("var" "x"))'
check 255 "parse let f x =" '("let" "f" ("x") ("op" "+"'
check 256 "parse let rec f x =" '("let-rec" "f" ("x")'
check 257 "parse let f x y =" '("let" "f" ("x" "y")'
check 260 "parse begin/end" '("op" "+" ("int" 1) ("int" 2))'
TOTAL=$((PASS + FAIL))
if [ $FAIL -eq 0 ]; then
echo "ok $PASS/$TOTAL OCaml-on-SX tokenizer tests passed"
echo "ok $PASS/$TOTAL OCaml-on-SX tests passed"
else
echo "FAIL $PASS/$TOTAL passed, $FAIL failed:"
echo ""

View File

@@ -1,9 +1,8 @@
;; lib/ocaml/tests/tokenize.sx — smoke tests for the OCaml tokenizer.
;; lib/ocaml/tests/tokenize.sx — smoke-test helpers.
;;
;; Tests are exercised via lib/ocaml/test.sh, which drives sx_server.exe
;; over the epoch protocol. This file provides a small evaluator that
;; returns short diagnostic values for each fixture so the runner can
;; grep them out of one batched run.
;; over the epoch protocol. This file provides small accessors so the
;; bash runner can grep short diagnostic values out of one batched run.
(define
ocaml-test-tok-type
@@ -14,3 +13,9 @@
(fn (src i) (get (nth (ocaml-tokenize src) i) :value)))
(define ocaml-test-tok-count (fn (src) (len (ocaml-tokenize src))))
(define ocaml-test-parse-str (fn (src) (ocaml-parse src)))
(define
ocaml-test-parse-head
(fn (src) (nth (ocaml-parse src) 0)))

View File

@@ -125,12 +125,13 @@ SX CEK evaluator (both JS and OCaml hosts)
int/float literals (incl. hex, exponent, underscores), nested block
comments `(* ... *)`. _(labels `~label:` / `?label:` and heredoc `{|...|}`
deferred — surface tokens already work via `~`/`?` punct + `{`/`|` punct.)_
- [ ] **Parser:** top-level `let`/`let rec`/`type`/`module`/`exception`/`open`/`include`
declarations; expressions: literals, identifiers, constructor application,
lambda, application (left-assoc), binary ops with precedence table,
`if`/`then`/`else`, `match`/`with`, `try`/`with`, `let`/`in`, `begin`/`end`,
`fun`/`function`, tuples, list literals, record literals/updates, field access,
sequences `;`, unit `()`.
- [~] **Parser:** expressions: literals, identifiers, constructor application,
lambda, application (left-assoc), binary ops with precedence (29 ops via
`lib/guest/pratt.sx`), `if`/`then`/`else`, `let`/`in`, `let rec`,
`fun`/`->`, tuples, list literals, `begin`/`end`, unit `()`. _(Pending:
top-level `let`/`type`/`module`/`exception`/`open`/`include` decls,
`match`/`with`, `try`/`with`, `function`, record literals/updates,
field access, sequences `;`.)_
- [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`,
list literal, record, `as`, or-pattern `P1 | P2`, `when` guard.
- [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed.
@@ -309,6 +310,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means:
_Newest first._
- 2026-05-07 Phase 1 — `lib/ocaml/parser.sx` expression parser consuming
`lib/guest/pratt.sx` for binop precedence (29 operators across 8 levels,
incl. keyword-spelled binops `mod`/`land`/`lor`/`lxor`/`lsl`/`lsr`/`asr`).
Atoms (literals + var/con/unit/list), application (left-assoc), prefix
`-`/`not`, tuples, parens, `if`/`then`/`else`, `fun x y -> body`,
`let`/`let rec` with function shorthand. AST shapes match Haskell-on-SX
conventions (`(:int N)` `(:op OP L R)` `(:fun PARAMS BODY)` etc.). Total
95/95 tests now passing via `lib/ocaml/test.sh`.
- 2026-05-07 Phase 1 — `lib/ocaml/tokenizer.sx` consuming `lib/guest/lex.sx`
via `prefix-rename`. Covers idents, ctors, 51 keywords, numbers (int / float
/ hex / exponent / underscored), strings (with escapes), chars (with escapes),