ocaml: phase 1 match/with + pattern parser (+9 tests, 113 total)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 52s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 52s
Patterns: wildcard, literal, var, ctor (nullary + arg, flattens tuple args so Pair(a,b) -> (:pcon "Pair" PA PB)), tuple, list literal, cons :: (right-assoc), unit. Match: leading | optional, (:match SCRUT CLAUSES) with each clause (:case PAT BODY). Body parsed via parse-expr because | is below level-1 binop precedence.
This commit is contained in:
@@ -5,39 +5,44 @@
|
||||
;; keywords serialize to their string name so `(list :var "x")` is the
|
||||
;; same value as `(list "var" "x")` at runtime.
|
||||
;;
|
||||
;; Scope (this iteration — expressions only):
|
||||
;; atoms int/float/string/char, true/false, unit (), var, con, list literal
|
||||
;; Expression scope:
|
||||
;; atoms int/float/string/char/bool, unit (), var, con, list literal
|
||||
;; application left-associative, f x y z
|
||||
;; prefix -E unary minus, not E
|
||||
;; infix standard ops via lib/guest/pratt.sx table
|
||||
;; infix 29 ops via lib/guest/pratt.sx
|
||||
;; tuple a, b, c (lower than infix, higher than let/if)
|
||||
;; parens (e)
|
||||
;; if if c then t else e (else optional → unit)
|
||||
;; fun fun x y -> body
|
||||
;; let let x = e in body (no rec)
|
||||
;; let f x y = e in body (function shorthand)
|
||||
;; let let x = e in body (no rec, function shorthand)
|
||||
;; let rec f x = e in body
|
||||
;; match match e with [|] p -> body | p -> body | ...
|
||||
;;
|
||||
;; Pattern scope:
|
||||
;; _ (wildcard), int/string/char/bool literals, ident (var binding),
|
||||
;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…),
|
||||
;; list literal [pat;pat;…], cons p1 :: p2.
|
||||
;;
|
||||
;; AST shapes:
|
||||
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
|
||||
;; (:var NAME) (:con NAME)
|
||||
;; (:app FN ARG) — binary, chain for multi-arg
|
||||
;; (:op OP LHS RHS) — binary infix; OP is the source string
|
||||
;; (:app FN ARG)
|
||||
;; (:op OP LHS RHS)
|
||||
;; (:neg E) (:not E)
|
||||
;; (:tuple ITEMS)
|
||||
;; (:list ITEMS)
|
||||
;; (:tuple ITEMS) (:list ITEMS)
|
||||
;; (:if C T E)
|
||||
;; (:fun PARAMS BODY) — PARAMS list of strings (idents)
|
||||
;; (:let NAME PARAMS EXPR BODY)
|
||||
;; (:let-rec NAME PARAMS EXPR BODY)
|
||||
;; (:fun PARAMS BODY)
|
||||
;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY)
|
||||
;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...)
|
||||
;;
|
||||
;; (:pwild) (:pvar N) (:plit LIT)
|
||||
;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary
|
||||
;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL)
|
||||
|
||||
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
|
||||
|
||||
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
|
||||
|
||||
;; Standard OCaml binary operator table.
|
||||
;; Higher precedence = tighter binding.
|
||||
;; ASSOC is :left or :right.
|
||||
(define
|
||||
ocaml-op-table
|
||||
(list
|
||||
@@ -87,8 +92,6 @@
|
||||
((entry (pratt-op-lookup ocaml-op-table op)))
|
||||
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
|
||||
|
||||
;; Some OCaml binops are spelled with keyword tokens (mod / land / lor /
|
||||
;; lxor / lsl / lsr / asr / or). Recognise both shapes.
|
||||
(define
|
||||
ocaml-tok-is-binop?
|
||||
(fn
|
||||
@@ -138,6 +141,144 @@
|
||||
(ocaml-tok-value (peek-tok)))))))
|
||||
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
|
||||
(define at-op? (fn (op) (check-tok? "op" op)))
|
||||
(define parse-pattern (fn () nil))
|
||||
(define parse-pattern-cons (fn () nil))
|
||||
(define parse-pattern-app (fn () nil))
|
||||
(define parse-pattern-atom (fn () nil))
|
||||
(define
|
||||
at-pattern-atom?
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((tt (ocaml-tok-type (peek-tok)))
|
||||
(tv (ocaml-tok-value (peek-tok))))
|
||||
(cond
|
||||
((= tt "number") true)
|
||||
((= tt "string") true)
|
||||
((= tt "char") true)
|
||||
((= tt "ident") true)
|
||||
((= tt "ctor") true)
|
||||
((and (= tt "keyword") (or (= tv "true") (= tv "false")))
|
||||
true)
|
||||
((and (= tt "op") (or (= tv "(") (= tv "["))) true)
|
||||
(else false)))))
|
||||
(set!
|
||||
parse-pattern-atom
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((tt (ocaml-tok-type (peek-tok)))
|
||||
(tv (ocaml-tok-value (peek-tok))))
|
||||
(cond
|
||||
((= tt "number")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(if
|
||||
(= (round tv) tv)
|
||||
(list :plit (list :int tv))
|
||||
(list :plit (list :float tv)))))
|
||||
((= tt "string")
|
||||
(begin (advance-tok!) (list :plit (list :string tv))))
|
||||
((= tt "char")
|
||||
(begin (advance-tok!) (list :plit (list :char tv))))
|
||||
((and (= tt "keyword") (= tv "true"))
|
||||
(begin (advance-tok!) (list :plit (list :bool true))))
|
||||
((and (= tt "keyword") (= tv "false"))
|
||||
(begin (advance-tok!) (list :plit (list :bool false))))
|
||||
((and (= tt "ident") (= tv "_"))
|
||||
(begin (advance-tok!) (list :pwild)))
|
||||
((= tt "ident") (begin (advance-tok!) (list :pvar tv)))
|
||||
((= tt "ctor") (begin (advance-tok!) (list :pcon tv)))
|
||||
((and (= tt "op") (= tv "("))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((at-op? ")")
|
||||
(begin (advance-tok!) (list :plit (list :unit))))
|
||||
(else
|
||||
(let
|
||||
((first (parse-pattern)))
|
||||
(cond
|
||||
((at-op? ",")
|
||||
(let
|
||||
((items (list first)))
|
||||
(begin
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-op? ",")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! items (parse-pattern))
|
||||
(loop)))))
|
||||
(loop)
|
||||
(consume! "op" ")")
|
||||
(cons :ptuple items))))
|
||||
(else (begin (consume! "op" ")") first))))))))
|
||||
((and (= tt "op") (= tv "["))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((at-op? "]") (begin (advance-tok!) (list :plist)))
|
||||
(else
|
||||
(let
|
||||
((items (list)))
|
||||
(begin
|
||||
(append! items (parse-pattern))
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-op? ";")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(when
|
||||
(not (at-op? "]"))
|
||||
(begin
|
||||
(append! items (parse-pattern))
|
||||
(loop)))))))
|
||||
(loop)
|
||||
(consume! "op" "]")
|
||||
(cons :plist items)))))))
|
||||
(else
|
||||
(error
|
||||
(str
|
||||
"ocaml-parse: unexpected pattern token "
|
||||
tt
|
||||
" "
|
||||
tv
|
||||
" at idx "
|
||||
idx)))))))
|
||||
(set!
|
||||
parse-pattern-app
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((head (parse-pattern-atom)))
|
||||
(cond
|
||||
((and (= (nth head 0) :pcon) (at-pattern-atom?))
|
||||
(let
|
||||
((arg (parse-pattern-atom)))
|
||||
(let
|
||||
((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg)))))
|
||||
(concat (list :pcon (nth head 1)) args))))
|
||||
(else head)))))
|
||||
(set!
|
||||
parse-pattern-cons
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((lhs (parse-pattern-app)))
|
||||
(cond
|
||||
((at-op? "::")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(list :pcons lhs (parse-pattern-cons))))
|
||||
(else lhs)))))
|
||||
(set! parse-pattern (fn () (parse-pattern-cons)))
|
||||
(define parse-expr (fn () nil))
|
||||
(define parse-tuple (fn () nil))
|
||||
(define parse-binop-rhs (fn (lhs min-prec) lhs))
|
||||
@@ -393,6 +534,40 @@
|
||||
((else-expr (parse-expr)))
|
||||
(list :if cond-expr then-expr else-expr))))
|
||||
(else (list :if cond-expr then-expr (list :unit)))))))))
|
||||
(define
|
||||
parse-match
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((scrut (parse-expr)))
|
||||
(begin
|
||||
(consume! "keyword" "with")
|
||||
(when (at-op? "|") (advance-tok!))
|
||||
(let
|
||||
((cases (list)))
|
||||
(begin
|
||||
(define
|
||||
one
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((p (parse-pattern)))
|
||||
(begin
|
||||
(consume! "op" "->")
|
||||
(let
|
||||
((body (parse-match-body)))
|
||||
(append! cases (list :case p body)))))))
|
||||
(one)
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-op? "|")
|
||||
(begin (advance-tok!) (one) (loop)))))
|
||||
(loop)
|
||||
(cons :match (cons scrut (list cases)))))))))
|
||||
(define parse-match-body (fn () (parse-expr)))
|
||||
(set!
|
||||
parse-expr
|
||||
(fn
|
||||
@@ -401,6 +576,7 @@
|
||||
((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
|
||||
((at-kw? "let") (begin (advance-tok!) (parse-let)))
|
||||
((at-kw? "if") (begin (advance-tok!) (parse-if)))
|
||||
((at-kw? "match") (begin (advance-tok!) (parse-match)))
|
||||
(else (parse-tuple)))))
|
||||
(let
|
||||
((result (parse-expr)))
|
||||
@@ -422,7 +598,10 @@
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list)))
|
||||
((tokens (ocaml-tokenize src))
|
||||
(idx 0)
|
||||
(tok-len 0)
|
||||
(decls (list)))
|
||||
(begin
|
||||
(set! tok-len (len tokens))
|
||||
(define peek-tok (fn () (nth tokens idx)))
|
||||
|
||||
@@ -280,6 +280,26 @@ cat > "$TMPFILE" << 'EPOCHS'
|
||||
(epoch 278)
|
||||
(eval "(ocaml-parse-program \"\")")
|
||||
|
||||
;; ── Match / patterns ───────────────────────────────────────────
|
||||
(epoch 300)
|
||||
(eval "(ocaml-parse \"match x with | None -> 0 | Some y -> y\")")
|
||||
(epoch 301)
|
||||
(eval "(ocaml-parse \"match x with None -> 0 | Some y -> y\")")
|
||||
(epoch 302)
|
||||
(eval "(ocaml-parse \"match l with | [] -> 0 | h :: t -> 1\")")
|
||||
(epoch 303)
|
||||
(eval "(ocaml-parse \"match p with | (a, b) -> a + b\")")
|
||||
(epoch 304)
|
||||
(eval "(ocaml-parse \"match n with | 0 -> 1 | _ -> n\")")
|
||||
(epoch 305)
|
||||
(eval "(ocaml-parse \"match x with | true -> 1 | false -> 0\")")
|
||||
(epoch 306)
|
||||
(eval "(ocaml-parse \"match x with | Pair (a, b) -> a + b\")")
|
||||
(epoch 307)
|
||||
(eval "(ocaml-parse \"match x with | \\\"hi\\\" -> 1 | _ -> 0\")")
|
||||
(epoch 308)
|
||||
(eval "(ocaml-parse \"match x with | () -> 0\")")
|
||||
|
||||
EPOCHS
|
||||
|
||||
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
|
||||
@@ -445,6 +465,17 @@ check 276 "program: mixed decls + expr" '("def" "y" () ("int" 2)) ("expr"'
|
||||
check 277 "program: 4 forms incl head" '4'
|
||||
check 278 "program: empty" '("program")'
|
||||
|
||||
# ── Match / patterns ────────────────────────────────────────────
|
||||
check 300 "match Some/None" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some" ("pvar" "y")) ("var" "y")))'
|
||||
check 301 "match no leading bar" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some"'
|
||||
check 302 "match list cons" '("case" ("plist") ("int" 0)) ("case" ("pcons" ("pvar" "h") ("pvar" "t")) ("int" 1))'
|
||||
check 303 "match tuple pat" '("ptuple" ("pvar" "a") ("pvar" "b"))'
|
||||
check 304 "match int + wildcard" '("case" ("plit" ("int" 0)) ("int" 1)) ("case" ("pwild")'
|
||||
check 305 "match bool literals" '("plit" ("bool" true))'
|
||||
check 306 "match ctor with tuple arg" '("pcon" "Pair" ("pvar" "a") ("pvar" "b"))'
|
||||
check 307 "match string literal" '("plit" ("string" "hi"))'
|
||||
check 308 "match unit pattern" '("plit" ("unit"))'
|
||||
|
||||
TOTAL=$((PASS + FAIL))
|
||||
if [ $FAIL -eq 0 ]; then
|
||||
echo "ok $PASS/$TOTAL OCaml-on-SX tests passed"
|
||||
|
||||
@@ -133,8 +133,11 @@ SX CEK evaluator (both JS and OCaml hosts)
|
||||
via `ocaml-parse-program`. _(Pending: `type`/`module`/`exception`/`open`/
|
||||
`include` decls, `match`/`with`, `try`/`with`, `function`, record literals/
|
||||
updates, field access, sequences `;`, `and` mutually-recursive bindings.)_
|
||||
- [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`,
|
||||
list literal, record, `as`, or-pattern `P1 | P2`, `when` guard.
|
||||
- [~] **Patterns:** constructor (nullary + with args, incl. flattened tuple
|
||||
args `Pair (a, b)` → `(:pcon "Pair" PA PB)`), literal (int/string/char/
|
||||
bool/unit), variable, wildcard `_`, tuple, list cons `::`, list literal.
|
||||
_(Pending: record patterns, `as` binding, or-pattern `P1 | P2`, `when`
|
||||
guard.)_
|
||||
- [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed.
|
||||
- [ ] Tests in `lib/ocaml/tests/parse.sx` — 50+ round-trip parse tests.
|
||||
|
||||
@@ -311,6 +314,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means:
|
||||
|
||||
_Newest first._
|
||||
|
||||
- 2026-05-07 Phase 1 — `match`/`with` + pattern parser. Patterns: wildcard,
|
||||
literal, var, ctor (nullary + with arg, with tuple-arg flattening so
|
||||
`Pair (a, b)` → `(:pcon "Pair" PA PB)`), tuple, list literal, cons `::`
|
||||
(right-assoc), parens, unit. Match clauses: leading `|` optional, body
|
||||
parsed via `parse-expr`. AST: `(:match SCRUT CLAUSES)` where each clause
|
||||
is `(:case PAT BODY)`. 113/113 tests passing (+9). Note: parse-expr is
|
||||
used for case bodies, so a trailing `| pat -> body` after a complex body
|
||||
will be reached because `|` is not in the binop table for level 1.
|
||||
- 2026-05-07 Phase 1 — top-level program parser `ocaml-parse-program`. Parses
|
||||
a sequence of `let [rec] name params* = expr` decls and bare expressions
|
||||
separated by `;;`. Output `(:program DECLS)` with each decl one of `(:def …)`,
|
||||
|
||||
Reference in New Issue
Block a user