ocaml: phase 1 match/with + pattern parser (+9 tests, 113 total)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 52s

Patterns: wildcard, literal, var, ctor (nullary + arg, flattens tuple
args so Pair(a,b) -> (:pcon "Pair" PA PB)), tuple, list literal, cons
:: (right-assoc), unit. Match: leading | optional, (:match SCRUT
CLAUSES) with each clause (:case PAT BODY). Body parsed via parse-expr
because | is below level-1 binop precedence.
This commit is contained in:
2026-05-08 07:29:02 +00:00
parent 9648dac88d
commit 9102e57d89
3 changed files with 241 additions and 20 deletions

View File

@@ -5,39 +5,44 @@
;; keywords serialize to their string name so `(list :var "x")` is the
;; same value as `(list "var" "x")` at runtime.
;;
;; Scope (this iteration — expressions only):
;; atoms int/float/string/char, true/false, unit (), var, con, list literal
;; Expression scope:
;; atoms int/float/string/char/bool, unit (), var, con, list literal
;; application left-associative, f x y z
;; prefix -E unary minus, not E
;; infix standard ops via lib/guest/pratt.sx table
;; infix 29 ops via lib/guest/pratt.sx
;; tuple a, b, c (lower than infix, higher than let/if)
;; parens (e)
;; if if c then t else e (else optional → unit)
;; fun fun x y -> body
;; let let x = e in body (no rec)
;; let f x y = e in body (function shorthand)
;; let let x = e in body (no rec, function shorthand)
;; let rec f x = e in body
;; match match e with [|] p -> body | p -> body | ...
;;
;; Pattern scope:
;; _ (wildcard), int/string/char/bool literals, ident (var binding),
;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…),
;; list literal [pat;pat;…], cons p1 :: p2.
;;
;; AST shapes:
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
;; (:var NAME) (:con NAME)
;; (:app FN ARG) — binary, chain for multi-arg
;; (:op OP LHS RHS) — binary infix; OP is the source string
;; (:app FN ARG)
;; (:op OP LHS RHS)
;; (:neg E) (:not E)
;; (:tuple ITEMS)
;; (:list ITEMS)
;; (:tuple ITEMS) (:list ITEMS)
;; (:if C T E)
;; (:fun PARAMS BODY) — PARAMS list of strings (idents)
;; (:let NAME PARAMS EXPR BODY)
;; (:let-rec NAME PARAMS EXPR BODY)
;; (:fun PARAMS BODY)
;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY)
;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...)
;;
;; (:pwild) (:pvar N) (:plit LIT)
;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary
;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL)
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
;; Standard OCaml binary operator table.
;; Higher precedence = tighter binding.
;; ASSOC is :left or :right.
(define
ocaml-op-table
(list
@@ -87,8 +92,6 @@
((entry (pratt-op-lookup ocaml-op-table op)))
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
;; Some OCaml binops are spelled with keyword tokens (mod / land / lor /
;; lxor / lsl / lsr / asr / or). Recognise both shapes.
(define
ocaml-tok-is-binop?
(fn
@@ -138,6 +141,144 @@
(ocaml-tok-value (peek-tok)))))))
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
(define at-op? (fn (op) (check-tok? "op" op)))
(define parse-pattern (fn () nil))
(define parse-pattern-cons (fn () nil))
(define parse-pattern-app (fn () nil))
(define parse-pattern-atom (fn () nil))
(define
at-pattern-atom?
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number") true)
((= tt "string") true)
((= tt "char") true)
((= tt "ident") true)
((= tt "ctor") true)
((and (= tt "keyword") (or (= tv "true") (= tv "false")))
true)
((and (= tt "op") (or (= tv "(") (= tv "["))) true)
(else false)))))
(set!
parse-pattern-atom
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number")
(begin
(advance-tok!)
(if
(= (round tv) tv)
(list :plit (list :int tv))
(list :plit (list :float tv)))))
((= tt "string")
(begin (advance-tok!) (list :plit (list :string tv))))
((= tt "char")
(begin (advance-tok!) (list :plit (list :char tv))))
((and (= tt "keyword") (= tv "true"))
(begin (advance-tok!) (list :plit (list :bool true))))
((and (= tt "keyword") (= tv "false"))
(begin (advance-tok!) (list :plit (list :bool false))))
((and (= tt "ident") (= tv "_"))
(begin (advance-tok!) (list :pwild)))
((= tt "ident") (begin (advance-tok!) (list :pvar tv)))
((= tt "ctor") (begin (advance-tok!) (list :pcon tv)))
((and (= tt "op") (= tv "("))
(begin
(advance-tok!)
(cond
((at-op? ")")
(begin (advance-tok!) (list :plit (list :unit))))
(else
(let
((first (parse-pattern)))
(cond
((at-op? ",")
(let
((items (list first)))
(begin
(define
loop
(fn
()
(when
(at-op? ",")
(begin
(advance-tok!)
(append! items (parse-pattern))
(loop)))))
(loop)
(consume! "op" ")")
(cons :ptuple items))))
(else (begin (consume! "op" ")") first))))))))
((and (= tt "op") (= tv "["))
(begin
(advance-tok!)
(cond
((at-op? "]") (begin (advance-tok!) (list :plist)))
(else
(let
((items (list)))
(begin
(append! items (parse-pattern))
(define
loop
(fn
()
(when
(at-op? ";")
(begin
(advance-tok!)
(when
(not (at-op? "]"))
(begin
(append! items (parse-pattern))
(loop)))))))
(loop)
(consume! "op" "]")
(cons :plist items)))))))
(else
(error
(str
"ocaml-parse: unexpected pattern token "
tt
" "
tv
" at idx "
idx)))))))
(set!
parse-pattern-app
(fn
()
(let
((head (parse-pattern-atom)))
(cond
((and (= (nth head 0) :pcon) (at-pattern-atom?))
(let
((arg (parse-pattern-atom)))
(let
((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg)))))
(concat (list :pcon (nth head 1)) args))))
(else head)))))
(set!
parse-pattern-cons
(fn
()
(let
((lhs (parse-pattern-app)))
(cond
((at-op? "::")
(begin
(advance-tok!)
(list :pcons lhs (parse-pattern-cons))))
(else lhs)))))
(set! parse-pattern (fn () (parse-pattern-cons)))
(define parse-expr (fn () nil))
(define parse-tuple (fn () nil))
(define parse-binop-rhs (fn (lhs min-prec) lhs))
@@ -393,6 +534,40 @@
((else-expr (parse-expr)))
(list :if cond-expr then-expr else-expr))))
(else (list :if cond-expr then-expr (list :unit)))))))))
(define
parse-match
(fn
()
(let
((scrut (parse-expr)))
(begin
(consume! "keyword" "with")
(when (at-op? "|") (advance-tok!))
(let
((cases (list)))
(begin
(define
one
(fn
()
(let
((p (parse-pattern)))
(begin
(consume! "op" "->")
(let
((body (parse-match-body)))
(append! cases (list :case p body)))))))
(one)
(define
loop
(fn
()
(when
(at-op? "|")
(begin (advance-tok!) (one) (loop)))))
(loop)
(cons :match (cons scrut (list cases)))))))))
(define parse-match-body (fn () (parse-expr)))
(set!
parse-expr
(fn
@@ -401,6 +576,7 @@
((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
((at-kw? "let") (begin (advance-tok!) (parse-let)))
((at-kw? "if") (begin (advance-tok!) (parse-if)))
((at-kw? "match") (begin (advance-tok!) (parse-match)))
(else (parse-tuple)))))
(let
((result (parse-expr)))
@@ -422,7 +598,10 @@
(fn
(src)
(let
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list)))
((tokens (ocaml-tokenize src))
(idx 0)
(tok-len 0)
(decls (list)))
(begin
(set! tok-len (len tokens))
(define peek-tok (fn () (nth tokens idx)))