ocaml: phase 1 match/with + pattern parser (+9 tests, 113 total)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 52s

Patterns: wildcard, literal, var, ctor (nullary + arg, flattens tuple
args so Pair(a,b) -> (:pcon "Pair" PA PB)), tuple, list literal, cons
:: (right-assoc), unit. Match: leading | optional, (:match SCRUT
CLAUSES) with each clause (:case PAT BODY). Body parsed via parse-expr
because | is below level-1 binop precedence.
This commit is contained in:
2026-05-08 07:29:02 +00:00
parent 9648dac88d
commit 9102e57d89
3 changed files with 241 additions and 20 deletions

View File

@@ -5,39 +5,44 @@
;; keywords serialize to their string name so `(list :var "x")` is the ;; keywords serialize to their string name so `(list :var "x")` is the
;; same value as `(list "var" "x")` at runtime. ;; same value as `(list "var" "x")` at runtime.
;; ;;
;; Scope (this iteration — expressions only): ;; Expression scope:
;; atoms int/float/string/char, true/false, unit (), var, con, list literal ;; atoms int/float/string/char/bool, unit (), var, con, list literal
;; application left-associative, f x y z ;; application left-associative, f x y z
;; prefix -E unary minus, not E ;; prefix -E unary minus, not E
;; infix standard ops via lib/guest/pratt.sx table ;; infix 29 ops via lib/guest/pratt.sx
;; tuple a, b, c (lower than infix, higher than let/if) ;; tuple a, b, c (lower than infix, higher than let/if)
;; parens (e) ;; parens (e)
;; if if c then t else e (else optional → unit) ;; if if c then t else e (else optional → unit)
;; fun fun x y -> body ;; fun fun x y -> body
;; let let x = e in body (no rec) ;; let let x = e in body (no rec, function shorthand)
;; let f x y = e in body (function shorthand)
;; let rec f x = e in body ;; let rec f x = e in body
;; match match e with [|] p -> body | p -> body | ...
;;
;; Pattern scope:
;; _ (wildcard), int/string/char/bool literals, ident (var binding),
;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…),
;; list literal [pat;pat;…], cons p1 :: p2.
;; ;;
;; AST shapes: ;; AST shapes:
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit) ;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
;; (:var NAME) (:con NAME) ;; (:var NAME) (:con NAME)
;; (:app FN ARG) — binary, chain for multi-arg ;; (:app FN ARG)
;; (:op OP LHS RHS) — binary infix; OP is the source string ;; (:op OP LHS RHS)
;; (:neg E) (:not E) ;; (:neg E) (:not E)
;; (:tuple ITEMS) ;; (:tuple ITEMS) (:list ITEMS)
;; (:list ITEMS)
;; (:if C T E) ;; (:if C T E)
;; (:fun PARAMS BODY) — PARAMS list of strings (idents) ;; (:fun PARAMS BODY)
;; (:let NAME PARAMS EXPR BODY) ;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY)
;; (:let-rec NAME PARAMS EXPR BODY) ;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...)
;;
;; (:pwild) (:pvar N) (:plit LIT)
;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary
;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL)
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) (define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value)))) (define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
;; Standard OCaml binary operator table.
;; Higher precedence = tighter binding.
;; ASSOC is :left or :right.
(define (define
ocaml-op-table ocaml-op-table
(list (list
@@ -87,8 +92,6 @@
((entry (pratt-op-lookup ocaml-op-table op))) ((entry (pratt-op-lookup ocaml-op-table op)))
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right))))) (and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
;; Some OCaml binops are spelled with keyword tokens (mod / land / lor /
;; lxor / lsl / lsr / asr / or). Recognise both shapes.
(define (define
ocaml-tok-is-binop? ocaml-tok-is-binop?
(fn (fn
@@ -138,6 +141,144 @@
(ocaml-tok-value (peek-tok))))))) (ocaml-tok-value (peek-tok)))))))
(define at-kw? (fn (kw) (check-tok? "keyword" kw))) (define at-kw? (fn (kw) (check-tok? "keyword" kw)))
(define at-op? (fn (op) (check-tok? "op" op))) (define at-op? (fn (op) (check-tok? "op" op)))
(define parse-pattern (fn () nil))
(define parse-pattern-cons (fn () nil))
(define parse-pattern-app (fn () nil))
(define parse-pattern-atom (fn () nil))
(define
at-pattern-atom?
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number") true)
((= tt "string") true)
((= tt "char") true)
((= tt "ident") true)
((= tt "ctor") true)
((and (= tt "keyword") (or (= tv "true") (= tv "false")))
true)
((and (= tt "op") (or (= tv "(") (= tv "["))) true)
(else false)))))
(set!
parse-pattern-atom
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number")
(begin
(advance-tok!)
(if
(= (round tv) tv)
(list :plit (list :int tv))
(list :plit (list :float tv)))))
((= tt "string")
(begin (advance-tok!) (list :plit (list :string tv))))
((= tt "char")
(begin (advance-tok!) (list :plit (list :char tv))))
((and (= tt "keyword") (= tv "true"))
(begin (advance-tok!) (list :plit (list :bool true))))
((and (= tt "keyword") (= tv "false"))
(begin (advance-tok!) (list :plit (list :bool false))))
((and (= tt "ident") (= tv "_"))
(begin (advance-tok!) (list :pwild)))
((= tt "ident") (begin (advance-tok!) (list :pvar tv)))
((= tt "ctor") (begin (advance-tok!) (list :pcon tv)))
((and (= tt "op") (= tv "("))
(begin
(advance-tok!)
(cond
((at-op? ")")
(begin (advance-tok!) (list :plit (list :unit))))
(else
(let
((first (parse-pattern)))
(cond
((at-op? ",")
(let
((items (list first)))
(begin
(define
loop
(fn
()
(when
(at-op? ",")
(begin
(advance-tok!)
(append! items (parse-pattern))
(loop)))))
(loop)
(consume! "op" ")")
(cons :ptuple items))))
(else (begin (consume! "op" ")") first))))))))
((and (= tt "op") (= tv "["))
(begin
(advance-tok!)
(cond
((at-op? "]") (begin (advance-tok!) (list :plist)))
(else
(let
((items (list)))
(begin
(append! items (parse-pattern))
(define
loop
(fn
()
(when
(at-op? ";")
(begin
(advance-tok!)
(when
(not (at-op? "]"))
(begin
(append! items (parse-pattern))
(loop)))))))
(loop)
(consume! "op" "]")
(cons :plist items)))))))
(else
(error
(str
"ocaml-parse: unexpected pattern token "
tt
" "
tv
" at idx "
idx)))))))
(set!
parse-pattern-app
(fn
()
(let
((head (parse-pattern-atom)))
(cond
((and (= (nth head 0) :pcon) (at-pattern-atom?))
(let
((arg (parse-pattern-atom)))
(let
((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg)))))
(concat (list :pcon (nth head 1)) args))))
(else head)))))
(set!
parse-pattern-cons
(fn
()
(let
((lhs (parse-pattern-app)))
(cond
((at-op? "::")
(begin
(advance-tok!)
(list :pcons lhs (parse-pattern-cons))))
(else lhs)))))
(set! parse-pattern (fn () (parse-pattern-cons)))
(define parse-expr (fn () nil)) (define parse-expr (fn () nil))
(define parse-tuple (fn () nil)) (define parse-tuple (fn () nil))
(define parse-binop-rhs (fn (lhs min-prec) lhs)) (define parse-binop-rhs (fn (lhs min-prec) lhs))
@@ -393,6 +534,40 @@
((else-expr (parse-expr))) ((else-expr (parse-expr)))
(list :if cond-expr then-expr else-expr)))) (list :if cond-expr then-expr else-expr))))
(else (list :if cond-expr then-expr (list :unit))))))))) (else (list :if cond-expr then-expr (list :unit)))))))))
(define
parse-match
(fn
()
(let
((scrut (parse-expr)))
(begin
(consume! "keyword" "with")
(when (at-op? "|") (advance-tok!))
(let
((cases (list)))
(begin
(define
one
(fn
()
(let
((p (parse-pattern)))
(begin
(consume! "op" "->")
(let
((body (parse-match-body)))
(append! cases (list :case p body)))))))
(one)
(define
loop
(fn
()
(when
(at-op? "|")
(begin (advance-tok!) (one) (loop)))))
(loop)
(cons :match (cons scrut (list cases)))))))))
(define parse-match-body (fn () (parse-expr)))
(set! (set!
parse-expr parse-expr
(fn (fn
@@ -401,6 +576,7 @@
((at-kw? "fun") (begin (advance-tok!) (parse-fun))) ((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
((at-kw? "let") (begin (advance-tok!) (parse-let))) ((at-kw? "let") (begin (advance-tok!) (parse-let)))
((at-kw? "if") (begin (advance-tok!) (parse-if))) ((at-kw? "if") (begin (advance-tok!) (parse-if)))
((at-kw? "match") (begin (advance-tok!) (parse-match)))
(else (parse-tuple))))) (else (parse-tuple)))))
(let (let
((result (parse-expr))) ((result (parse-expr)))
@@ -422,7 +598,10 @@
(fn (fn
(src) (src)
(let (let
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list))) ((tokens (ocaml-tokenize src))
(idx 0)
(tok-len 0)
(decls (list)))
(begin (begin
(set! tok-len (len tokens)) (set! tok-len (len tokens))
(define peek-tok (fn () (nth tokens idx))) (define peek-tok (fn () (nth tokens idx)))

View File

@@ -280,6 +280,26 @@ cat > "$TMPFILE" << 'EPOCHS'
(epoch 278) (epoch 278)
(eval "(ocaml-parse-program \"\")") (eval "(ocaml-parse-program \"\")")
;; ── Match / patterns ───────────────────────────────────────────
(epoch 300)
(eval "(ocaml-parse \"match x with | None -> 0 | Some y -> y\")")
(epoch 301)
(eval "(ocaml-parse \"match x with None -> 0 | Some y -> y\")")
(epoch 302)
(eval "(ocaml-parse \"match l with | [] -> 0 | h :: t -> 1\")")
(epoch 303)
(eval "(ocaml-parse \"match p with | (a, b) -> a + b\")")
(epoch 304)
(eval "(ocaml-parse \"match n with | 0 -> 1 | _ -> n\")")
(epoch 305)
(eval "(ocaml-parse \"match x with | true -> 1 | false -> 0\")")
(epoch 306)
(eval "(ocaml-parse \"match x with | Pair (a, b) -> a + b\")")
(epoch 307)
(eval "(ocaml-parse \"match x with | \\\"hi\\\" -> 1 | _ -> 0\")")
(epoch 308)
(eval "(ocaml-parse \"match x with | () -> 0\")")
EPOCHS EPOCHS
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
@@ -445,6 +465,17 @@ check 276 "program: mixed decls + expr" '("def" "y" () ("int" 2)) ("expr"'
check 277 "program: 4 forms incl head" '4' check 277 "program: 4 forms incl head" '4'
check 278 "program: empty" '("program")' check 278 "program: empty" '("program")'
# ── Match / patterns ────────────────────────────────────────────
check 300 "match Some/None" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some" ("pvar" "y")) ("var" "y")))'
check 301 "match no leading bar" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some"'
check 302 "match list cons" '("case" ("plist") ("int" 0)) ("case" ("pcons" ("pvar" "h") ("pvar" "t")) ("int" 1))'
check 303 "match tuple pat" '("ptuple" ("pvar" "a") ("pvar" "b"))'
check 304 "match int + wildcard" '("case" ("plit" ("int" 0)) ("int" 1)) ("case" ("pwild")'
check 305 "match bool literals" '("plit" ("bool" true))'
check 306 "match ctor with tuple arg" '("pcon" "Pair" ("pvar" "a") ("pvar" "b"))'
check 307 "match string literal" '("plit" ("string" "hi"))'
check 308 "match unit pattern" '("plit" ("unit"))'
TOTAL=$((PASS + FAIL)) TOTAL=$((PASS + FAIL))
if [ $FAIL -eq 0 ]; then if [ $FAIL -eq 0 ]; then
echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" echo "ok $PASS/$TOTAL OCaml-on-SX tests passed"

View File

@@ -133,8 +133,11 @@ SX CEK evaluator (both JS and OCaml hosts)
via `ocaml-parse-program`. _(Pending: `type`/`module`/`exception`/`open`/ via `ocaml-parse-program`. _(Pending: `type`/`module`/`exception`/`open`/
`include` decls, `match`/`with`, `try`/`with`, `function`, record literals/ `include` decls, `match`/`with`, `try`/`with`, `function`, record literals/
updates, field access, sequences `;`, `and` mutually-recursive bindings.)_ updates, field access, sequences `;`, `and` mutually-recursive bindings.)_
- [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`, - [~] **Patterns:** constructor (nullary + with args, incl. flattened tuple
list literal, record, `as`, or-pattern `P1 | P2`, `when` guard. args `Pair (a, b)``(:pcon "Pair" PA PB)`), literal (int/string/char/
bool/unit), variable, wildcard `_`, tuple, list cons `::`, list literal.
_(Pending: record patterns, `as` binding, or-pattern `P1 | P2`, `when`
guard.)_
- [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed. - [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed.
- [ ] Tests in `lib/ocaml/tests/parse.sx` — 50+ round-trip parse tests. - [ ] Tests in `lib/ocaml/tests/parse.sx` — 50+ round-trip parse tests.
@@ -311,6 +314,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means:
_Newest first._ _Newest first._
- 2026-05-07 Phase 1 — `match`/`with` + pattern parser. Patterns: wildcard,
literal, var, ctor (nullary + with arg, with tuple-arg flattening so
`Pair (a, b)``(:pcon "Pair" PA PB)`), tuple, list literal, cons `::`
(right-assoc), parens, unit. Match clauses: leading `|` optional, body
parsed via `parse-expr`. AST: `(:match SCRUT CLAUSES)` where each clause
is `(:case PAT BODY)`. 113/113 tests passing (+9). Note: parse-expr is
used for case bodies, so a trailing `| pat -> body` after a complex body
will be reached because `|` is not in the binop table for level 1.
- 2026-05-07 Phase 1 — top-level program parser `ocaml-parse-program`. Parses - 2026-05-07 Phase 1 — top-level program parser `ocaml-parse-program`. Parses
a sequence of `let [rec] name params* = expr` decls and bare expressions a sequence of `let [rec] name params* = expr` decls and bare expressions
separated by `;;`. Output `(:program DECLS)` with each decl one of `(:def …)`, separated by `;;`. Output `(:program DECLS)` with each decl one of `(:def …)`,