ocaml: phase 1 match/with + pattern parser (+9 tests, 113 total)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 52s

Patterns: wildcard, literal, var, ctor (nullary + arg, flattens tuple
args so Pair(a,b) -> (:pcon "Pair" PA PB)), tuple, list literal, cons
:: (right-assoc), unit. Match: leading | optional, (:match SCRUT
CLAUSES) with each clause (:case PAT BODY). Body parsed via parse-expr
because | is below level-1 binop precedence.
This commit is contained in:
2026-05-08 07:29:02 +00:00
parent 9648dac88d
commit 9102e57d89
3 changed files with 241 additions and 20 deletions

View File

@@ -5,39 +5,44 @@
;; keywords serialize to their string name so `(list :var "x")` is the
;; same value as `(list "var" "x")` at runtime.
;;
;; Scope (this iteration — expressions only):
;; atoms int/float/string/char, true/false, unit (), var, con, list literal
;; Expression scope:
;; atoms int/float/string/char/bool, unit (), var, con, list literal
;; application left-associative, f x y z
;; prefix -E unary minus, not E
;; infix standard ops via lib/guest/pratt.sx table
;; infix 29 ops via lib/guest/pratt.sx
;; tuple a, b, c (lower than infix, higher than let/if)
;; parens (e)
;; if if c then t else e (else optional → unit)
;; fun fun x y -> body
;; let let x = e in body (no rec)
;; let f x y = e in body (function shorthand)
;; let let x = e in body (no rec, function shorthand)
;; let rec f x = e in body
;; match match e with [|] p -> body | p -> body | ...
;;
;; Pattern scope:
;; _ (wildcard), int/string/char/bool literals, ident (var binding),
;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…),
;; list literal [pat;pat;…], cons p1 :: p2.
;;
;; AST shapes:
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
;; (:var NAME) (:con NAME)
;; (:app FN ARG) — binary, chain for multi-arg
;; (:op OP LHS RHS) — binary infix; OP is the source string
;; (:app FN ARG)
;; (:op OP LHS RHS)
;; (:neg E) (:not E)
;; (:tuple ITEMS)
;; (:list ITEMS)
;; (:tuple ITEMS) (:list ITEMS)
;; (:if C T E)
;; (:fun PARAMS BODY) — PARAMS list of strings (idents)
;; (:let NAME PARAMS EXPR BODY)
;; (:let-rec NAME PARAMS EXPR BODY)
;; (:fun PARAMS BODY)
;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY)
;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...)
;;
;; (:pwild) (:pvar N) (:plit LIT)
;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary
;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL)
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
;; Standard OCaml binary operator table.
;; Higher precedence = tighter binding.
;; ASSOC is :left or :right.
(define
ocaml-op-table
(list
@@ -87,8 +92,6 @@
((entry (pratt-op-lookup ocaml-op-table op)))
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
;; Some OCaml binops are spelled with keyword tokens (mod / land / lor /
;; lxor / lsl / lsr / asr / or). Recognise both shapes.
(define
ocaml-tok-is-binop?
(fn
@@ -138,6 +141,144 @@
(ocaml-tok-value (peek-tok)))))))
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
(define at-op? (fn (op) (check-tok? "op" op)))
(define parse-pattern (fn () nil))
(define parse-pattern-cons (fn () nil))
(define parse-pattern-app (fn () nil))
(define parse-pattern-atom (fn () nil))
(define
at-pattern-atom?
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number") true)
((= tt "string") true)
((= tt "char") true)
((= tt "ident") true)
((= tt "ctor") true)
((and (= tt "keyword") (or (= tv "true") (= tv "false")))
true)
((and (= tt "op") (or (= tv "(") (= tv "["))) true)
(else false)))))
(set!
parse-pattern-atom
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number")
(begin
(advance-tok!)
(if
(= (round tv) tv)
(list :plit (list :int tv))
(list :plit (list :float tv)))))
((= tt "string")
(begin (advance-tok!) (list :plit (list :string tv))))
((= tt "char")
(begin (advance-tok!) (list :plit (list :char tv))))
((and (= tt "keyword") (= tv "true"))
(begin (advance-tok!) (list :plit (list :bool true))))
((and (= tt "keyword") (= tv "false"))
(begin (advance-tok!) (list :plit (list :bool false))))
((and (= tt "ident") (= tv "_"))
(begin (advance-tok!) (list :pwild)))
((= tt "ident") (begin (advance-tok!) (list :pvar tv)))
((= tt "ctor") (begin (advance-tok!) (list :pcon tv)))
((and (= tt "op") (= tv "("))
(begin
(advance-tok!)
(cond
((at-op? ")")
(begin (advance-tok!) (list :plit (list :unit))))
(else
(let
((first (parse-pattern)))
(cond
((at-op? ",")
(let
((items (list first)))
(begin
(define
loop
(fn
()
(when
(at-op? ",")
(begin
(advance-tok!)
(append! items (parse-pattern))
(loop)))))
(loop)
(consume! "op" ")")
(cons :ptuple items))))
(else (begin (consume! "op" ")") first))))))))
((and (= tt "op") (= tv "["))
(begin
(advance-tok!)
(cond
((at-op? "]") (begin (advance-tok!) (list :plist)))
(else
(let
((items (list)))
(begin
(append! items (parse-pattern))
(define
loop
(fn
()
(when
(at-op? ";")
(begin
(advance-tok!)
(when
(not (at-op? "]"))
(begin
(append! items (parse-pattern))
(loop)))))))
(loop)
(consume! "op" "]")
(cons :plist items)))))))
(else
(error
(str
"ocaml-parse: unexpected pattern token "
tt
" "
tv
" at idx "
idx)))))))
(set!
parse-pattern-app
(fn
()
(let
((head (parse-pattern-atom)))
(cond
((and (= (nth head 0) :pcon) (at-pattern-atom?))
(let
((arg (parse-pattern-atom)))
(let
((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg)))))
(concat (list :pcon (nth head 1)) args))))
(else head)))))
(set!
parse-pattern-cons
(fn
()
(let
((lhs (parse-pattern-app)))
(cond
((at-op? "::")
(begin
(advance-tok!)
(list :pcons lhs (parse-pattern-cons))))
(else lhs)))))
(set! parse-pattern (fn () (parse-pattern-cons)))
(define parse-expr (fn () nil))
(define parse-tuple (fn () nil))
(define parse-binop-rhs (fn (lhs min-prec) lhs))
@@ -393,6 +534,40 @@
((else-expr (parse-expr)))
(list :if cond-expr then-expr else-expr))))
(else (list :if cond-expr then-expr (list :unit)))))))))
(define
parse-match
(fn
()
(let
((scrut (parse-expr)))
(begin
(consume! "keyword" "with")
(when (at-op? "|") (advance-tok!))
(let
((cases (list)))
(begin
(define
one
(fn
()
(let
((p (parse-pattern)))
(begin
(consume! "op" "->")
(let
((body (parse-match-body)))
(append! cases (list :case p body)))))))
(one)
(define
loop
(fn
()
(when
(at-op? "|")
(begin (advance-tok!) (one) (loop)))))
(loop)
(cons :match (cons scrut (list cases)))))))))
(define parse-match-body (fn () (parse-expr)))
(set!
parse-expr
(fn
@@ -401,6 +576,7 @@
((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
((at-kw? "let") (begin (advance-tok!) (parse-let)))
((at-kw? "if") (begin (advance-tok!) (parse-if)))
((at-kw? "match") (begin (advance-tok!) (parse-match)))
(else (parse-tuple)))))
(let
((result (parse-expr)))
@@ -422,7 +598,10 @@
(fn
(src)
(let
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list)))
((tokens (ocaml-tokenize src))
(idx 0)
(tok-len 0)
(decls (list)))
(begin
(set! tok-len (len tokens))
(define peek-tok (fn () (nth tokens idx)))

View File

@@ -280,6 +280,26 @@ cat > "$TMPFILE" << 'EPOCHS'
(epoch 278)
(eval "(ocaml-parse-program \"\")")
;; ── Match / patterns ───────────────────────────────────────────
(epoch 300)
(eval "(ocaml-parse \"match x with | None -> 0 | Some y -> y\")")
(epoch 301)
(eval "(ocaml-parse \"match x with None -> 0 | Some y -> y\")")
(epoch 302)
(eval "(ocaml-parse \"match l with | [] -> 0 | h :: t -> 1\")")
(epoch 303)
(eval "(ocaml-parse \"match p with | (a, b) -> a + b\")")
(epoch 304)
(eval "(ocaml-parse \"match n with | 0 -> 1 | _ -> n\")")
(epoch 305)
(eval "(ocaml-parse \"match x with | true -> 1 | false -> 0\")")
(epoch 306)
(eval "(ocaml-parse \"match x with | Pair (a, b) -> a + b\")")
(epoch 307)
(eval "(ocaml-parse \"match x with | \\\"hi\\\" -> 1 | _ -> 0\")")
(epoch 308)
(eval "(ocaml-parse \"match x with | () -> 0\")")
EPOCHS
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
@@ -445,6 +465,17 @@ check 276 "program: mixed decls + expr" '("def" "y" () ("int" 2)) ("expr"'
check 277 "program: 4 forms incl head" '4'
check 278 "program: empty" '("program")'
# ── Match / patterns ────────────────────────────────────────────
check 300 "match Some/None" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some" ("pvar" "y")) ("var" "y")))'
check 301 "match no leading bar" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some"'
check 302 "match list cons" '("case" ("plist") ("int" 0)) ("case" ("pcons" ("pvar" "h") ("pvar" "t")) ("int" 1))'
check 303 "match tuple pat" '("ptuple" ("pvar" "a") ("pvar" "b"))'
check 304 "match int + wildcard" '("case" ("plit" ("int" 0)) ("int" 1)) ("case" ("pwild")'
check 305 "match bool literals" '("plit" ("bool" true))'
check 306 "match ctor with tuple arg" '("pcon" "Pair" ("pvar" "a") ("pvar" "b"))'
check 307 "match string literal" '("plit" ("string" "hi"))'
check 308 "match unit pattern" '("plit" ("unit"))'
TOTAL=$((PASS + FAIL))
if [ $FAIL -eq 0 ]; then
echo "ok $PASS/$TOTAL OCaml-on-SX tests passed"

View File

@@ -133,8 +133,11 @@ SX CEK evaluator (both JS and OCaml hosts)
via `ocaml-parse-program`. _(Pending: `type`/`module`/`exception`/`open`/
`include` decls, `match`/`with`, `try`/`with`, `function`, record literals/
updates, field access, sequences `;`, `and` mutually-recursive bindings.)_
- [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`,
list literal, record, `as`, or-pattern `P1 | P2`, `when` guard.
- [~] **Patterns:** constructor (nullary + with args, incl. flattened tuple
args `Pair (a, b)``(:pcon "Pair" PA PB)`), literal (int/string/char/
bool/unit), variable, wildcard `_`, tuple, list cons `::`, list literal.
_(Pending: record patterns, `as` binding, or-pattern `P1 | P2`, `when`
guard.)_
- [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed.
- [ ] Tests in `lib/ocaml/tests/parse.sx` — 50+ round-trip parse tests.
@@ -311,6 +314,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means:
_Newest first._
- 2026-05-07 Phase 1 — `match`/`with` + pattern parser. Patterns: wildcard,
literal, var, ctor (nullary + with arg, with tuple-arg flattening so
`Pair (a, b)``(:pcon "Pair" PA PB)`), tuple, list literal, cons `::`
(right-assoc), parens, unit. Match clauses: leading `|` optional, body
parsed via `parse-expr`. AST: `(:match SCRUT CLAUSES)` where each clause
is `(:case PAT BODY)`. 113/113 tests passing (+9). Note: parse-expr is
used for case bodies, so a trailing `| pat -> body` after a complex body
will be reached because `|` is not in the binop table for level 1.
- 2026-05-07 Phase 1 — top-level program parser `ocaml-parse-program`. Parses
a sequence of `let [rec] name params* = expr` decls and bare expressions
separated by `;;`. Output `(:program DECLS)` with each decl one of `(:def …)`,