diff --git a/lib/ocaml/parser.sx b/lib/ocaml/parser.sx index 59634387..d1a56dff 100644 --- a/lib/ocaml/parser.sx +++ b/lib/ocaml/parser.sx @@ -5,39 +5,44 @@ ;; keywords serialize to their string name so `(list :var "x")` is the ;; same value as `(list "var" "x")` at runtime. ;; -;; Scope (this iteration — expressions only): -;; atoms int/float/string/char, true/false, unit (), var, con, list literal +;; Expression scope: +;; atoms int/float/string/char/bool, unit (), var, con, list literal ;; application left-associative, f x y z ;; prefix -E unary minus, not E -;; infix standard ops via lib/guest/pratt.sx table +;; infix 29 ops via lib/guest/pratt.sx ;; tuple a, b, c (lower than infix, higher than let/if) ;; parens (e) ;; if if c then t else e (else optional → unit) ;; fun fun x y -> body -;; let let x = e in body (no rec) -;; let f x y = e in body (function shorthand) +;; let let x = e in body (no rec, function shorthand) ;; let rec f x = e in body +;; match match e with [|] p -> body | p -> body | ... +;; +;; Pattern scope: +;; _ (wildcard), int/string/char/bool literals, ident (var binding), +;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…), +;; list literal [pat;pat;…], cons p1 :: p2. ;; ;; AST shapes: ;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit) ;; (:var NAME) (:con NAME) -;; (:app FN ARG) — binary, chain for multi-arg -;; (:op OP LHS RHS) — binary infix; OP is the source string +;; (:app FN ARG) +;; (:op OP LHS RHS) ;; (:neg E) (:not E) -;; (:tuple ITEMS) -;; (:list ITEMS) +;; (:tuple ITEMS) (:list ITEMS) ;; (:if C T E) -;; (:fun PARAMS BODY) — PARAMS list of strings (idents) -;; (:let NAME PARAMS EXPR BODY) -;; (:let-rec NAME PARAMS EXPR BODY) +;; (:fun PARAMS BODY) +;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY) +;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...) +;; +;; (:pwild) (:pvar N) (:plit LIT) +;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary +;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL) (define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) (define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value)))) -;; Standard OCaml binary operator table. -;; Higher precedence = tighter binding. -;; ASSOC is :left or :right. (define ocaml-op-table (list @@ -87,8 +92,6 @@ ((entry (pratt-op-lookup ocaml-op-table op))) (and (not (= entry nil)) (= (pratt-op-assoc entry) :right))))) -;; Some OCaml binops are spelled with keyword tokens (mod / land / lor / -;; lxor / lsl / lsr / asr / or). Recognise both shapes. (define ocaml-tok-is-binop? (fn @@ -138,6 +141,144 @@ (ocaml-tok-value (peek-tok))))))) (define at-kw? (fn (kw) (check-tok? "keyword" kw))) (define at-op? (fn (op) (check-tok? "op" op))) + (define parse-pattern (fn () nil)) + (define parse-pattern-cons (fn () nil)) + (define parse-pattern-app (fn () nil)) + (define parse-pattern-atom (fn () nil)) + (define + at-pattern-atom? + (fn + () + (let + ((tt (ocaml-tok-type (peek-tok))) + (tv (ocaml-tok-value (peek-tok)))) + (cond + ((= tt "number") true) + ((= tt "string") true) + ((= tt "char") true) + ((= tt "ident") true) + ((= tt "ctor") true) + ((and (= tt "keyword") (or (= tv "true") (= tv "false"))) + true) + ((and (= tt "op") (or (= tv "(") (= tv "["))) true) + (else false))))) + (set! + parse-pattern-atom + (fn + () + (let + ((tt (ocaml-tok-type (peek-tok))) + (tv (ocaml-tok-value (peek-tok)))) + (cond + ((= tt "number") + (begin + (advance-tok!) + (if + (= (round tv) tv) + (list :plit (list :int tv)) + (list :plit (list :float tv))))) + ((= tt "string") + (begin (advance-tok!) (list :plit (list :string tv)))) + ((= tt "char") + (begin (advance-tok!) (list :plit (list :char tv)))) + ((and (= tt "keyword") (= tv "true")) + (begin (advance-tok!) (list :plit (list :bool true)))) + ((and (= tt "keyword") (= tv "false")) + (begin (advance-tok!) (list :plit (list :bool false)))) + ((and (= tt "ident") (= tv "_")) + (begin (advance-tok!) (list :pwild))) + ((= tt "ident") (begin (advance-tok!) (list :pvar tv))) + ((= tt "ctor") (begin (advance-tok!) (list :pcon tv))) + ((and (= tt "op") (= tv "(")) + (begin + (advance-tok!) + (cond + ((at-op? ")") + (begin (advance-tok!) (list :plit (list :unit)))) + (else + (let + ((first (parse-pattern))) + (cond + ((at-op? ",") + (let + ((items (list first))) + (begin + (define + loop + (fn + () + (when + (at-op? ",") + (begin + (advance-tok!) + (append! items (parse-pattern)) + (loop))))) + (loop) + (consume! "op" ")") + (cons :ptuple items)))) + (else (begin (consume! "op" ")") first)))))))) + ((and (= tt "op") (= tv "[")) + (begin + (advance-tok!) + (cond + ((at-op? "]") (begin (advance-tok!) (list :plist))) + (else + (let + ((items (list))) + (begin + (append! items (parse-pattern)) + (define + loop + (fn + () + (when + (at-op? ";") + (begin + (advance-tok!) + (when + (not (at-op? "]")) + (begin + (append! items (parse-pattern)) + (loop))))))) + (loop) + (consume! "op" "]") + (cons :plist items))))))) + (else + (error + (str + "ocaml-parse: unexpected pattern token " + tt + " " + tv + " at idx " + idx))))))) + (set! + parse-pattern-app + (fn + () + (let + ((head (parse-pattern-atom))) + (cond + ((and (= (nth head 0) :pcon) (at-pattern-atom?)) + (let + ((arg (parse-pattern-atom))) + (let + ((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg))))) + (concat (list :pcon (nth head 1)) args)))) + (else head))))) + (set! + parse-pattern-cons + (fn + () + (let + ((lhs (parse-pattern-app))) + (cond + ((at-op? "::") + (begin + (advance-tok!) + (list :pcons lhs (parse-pattern-cons)))) + (else lhs))))) + (set! parse-pattern (fn () (parse-pattern-cons))) (define parse-expr (fn () nil)) (define parse-tuple (fn () nil)) (define parse-binop-rhs (fn (lhs min-prec) lhs)) @@ -393,6 +534,40 @@ ((else-expr (parse-expr))) (list :if cond-expr then-expr else-expr)))) (else (list :if cond-expr then-expr (list :unit))))))))) + (define + parse-match + (fn + () + (let + ((scrut (parse-expr))) + (begin + (consume! "keyword" "with") + (when (at-op? "|") (advance-tok!)) + (let + ((cases (list))) + (begin + (define + one + (fn + () + (let + ((p (parse-pattern))) + (begin + (consume! "op" "->") + (let + ((body (parse-match-body))) + (append! cases (list :case p body))))))) + (one) + (define + loop + (fn + () + (when + (at-op? "|") + (begin (advance-tok!) (one) (loop))))) + (loop) + (cons :match (cons scrut (list cases))))))))) + (define parse-match-body (fn () (parse-expr))) (set! parse-expr (fn @@ -401,6 +576,7 @@ ((at-kw? "fun") (begin (advance-tok!) (parse-fun))) ((at-kw? "let") (begin (advance-tok!) (parse-let))) ((at-kw? "if") (begin (advance-tok!) (parse-if))) + ((at-kw? "match") (begin (advance-tok!) (parse-match))) (else (parse-tuple))))) (let ((result (parse-expr))) @@ -422,7 +598,10 @@ (fn (src) (let - ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list))) + ((tokens (ocaml-tokenize src)) + (idx 0) + (tok-len 0) + (decls (list))) (begin (set! tok-len (len tokens)) (define peek-tok (fn () (nth tokens idx))) diff --git a/lib/ocaml/test.sh b/lib/ocaml/test.sh index cf55ac8a..bb6bd860 100755 --- a/lib/ocaml/test.sh +++ b/lib/ocaml/test.sh @@ -280,6 +280,26 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 278) (eval "(ocaml-parse-program \"\")") +;; ── Match / patterns ─────────────────────────────────────────── +(epoch 300) +(eval "(ocaml-parse \"match x with | None -> 0 | Some y -> y\")") +(epoch 301) +(eval "(ocaml-parse \"match x with None -> 0 | Some y -> y\")") +(epoch 302) +(eval "(ocaml-parse \"match l with | [] -> 0 | h :: t -> 1\")") +(epoch 303) +(eval "(ocaml-parse \"match p with | (a, b) -> a + b\")") +(epoch 304) +(eval "(ocaml-parse \"match n with | 0 -> 1 | _ -> n\")") +(epoch 305) +(eval "(ocaml-parse \"match x with | true -> 1 | false -> 0\")") +(epoch 306) +(eval "(ocaml-parse \"match x with | Pair (a, b) -> a + b\")") +(epoch 307) +(eval "(ocaml-parse \"match x with | \\\"hi\\\" -> 1 | _ -> 0\")") +(epoch 308) +(eval "(ocaml-parse \"match x with | () -> 0\")") + EPOCHS OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -445,6 +465,17 @@ check 276 "program: mixed decls + expr" '("def" "y" () ("int" 2)) ("expr"' check 277 "program: 4 forms incl head" '4' check 278 "program: empty" '("program")' +# ── Match / patterns ──────────────────────────────────────────── +check 300 "match Some/None" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some" ("pvar" "y")) ("var" "y")))' +check 301 "match no leading bar" '("match" ("var" "x") (("case" ("pcon" "None") ("int" 0)) ("case" ("pcon" "Some"' +check 302 "match list cons" '("case" ("plist") ("int" 0)) ("case" ("pcons" ("pvar" "h") ("pvar" "t")) ("int" 1))' +check 303 "match tuple pat" '("ptuple" ("pvar" "a") ("pvar" "b"))' +check 304 "match int + wildcard" '("case" ("plit" ("int" 0)) ("int" 1)) ("case" ("pwild")' +check 305 "match bool literals" '("plit" ("bool" true))' +check 306 "match ctor with tuple arg" '("pcon" "Pair" ("pvar" "a") ("pvar" "b"))' +check 307 "match string literal" '("plit" ("string" "hi"))' +check 308 "match unit pattern" '("plit" ("unit"))' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL OCaml-on-SX tests passed" diff --git a/plans/ocaml-on-sx.md b/plans/ocaml-on-sx.md index 7c1534e0..7294cd83 100644 --- a/plans/ocaml-on-sx.md +++ b/plans/ocaml-on-sx.md @@ -133,8 +133,11 @@ SX CEK evaluator (both JS and OCaml hosts) via `ocaml-parse-program`. _(Pending: `type`/`module`/`exception`/`open`/ `include` decls, `match`/`with`, `try`/`with`, `function`, record literals/ updates, field access, sequences `;`, `and` mutually-recursive bindings.)_ -- [ ] **Patterns:** constructor, literal, variable, wildcard `_`, tuple, list cons `::`, - list literal, record, `as`, or-pattern `P1 | P2`, `when` guard. +- [~] **Patterns:** constructor (nullary + with args, incl. flattened tuple + args `Pair (a, b)` → `(:pcon "Pair" PA PB)`), literal (int/string/char/ + bool/unit), variable, wildcard `_`, tuple, list cons `::`, list literal. + _(Pending: record patterns, `as` binding, or-pattern `P1 | P2`, `when` + guard.)_ - [ ] OCaml is **not** indentation-sensitive — no layout algorithm needed. - [ ] Tests in `lib/ocaml/tests/parse.sx` — 50+ round-trip parse tests. @@ -311,6 +314,14 @@ the "mother tongue" closure: OCaml → SX → OCaml. This means: _Newest first._ +- 2026-05-07 Phase 1 — `match`/`with` + pattern parser. Patterns: wildcard, + literal, var, ctor (nullary + with arg, with tuple-arg flattening so + `Pair (a, b)` → `(:pcon "Pair" PA PB)`), tuple, list literal, cons `::` + (right-assoc), parens, unit. Match clauses: leading `|` optional, body + parsed via `parse-expr`. AST: `(:match SCRUT CLAUSES)` where each clause + is `(:case PAT BODY)`. 113/113 tests passing (+9). Note: parse-expr is + used for case bodies, so a trailing `| pat -> body` after a complex body + will be reached because `|` is not in the binop table for level 1. - 2026-05-07 Phase 1 — top-level program parser `ocaml-parse-program`. Parses a sequence of `let [rec] name params* = expr` decls and bare expressions separated by `;;`. Output `(:program DECLS)` with each decl one of `(:def …)`,