Files
rose-ash/lib/ocaml/parser.sx
giles 6dc535dde3
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 21s
ocaml: phase 4 'let open M in body' local opens (+3 tests, 478 total)
Parser detects 'let open' as a separate let-form, parses M as a path
(Ctor(.Ctor)*) directly via inline AST construction (no source slicing
since cur-pos is only available in ocaml-parse-program), and emits
(:let-open PATH BODY).

Eval resolves the path to a module dict and merges its bindings into
the env for body evaluation. Now:

  let open List in map (fun x -> x * 2) [1;2;3]   = [2;4;6]
  let open Option in map (fun x -> x + 1) (Some 5) = Some 6
2026-05-08 21:33:14 +00:00

1592 lines
67 KiB
Plaintext

;; lib/ocaml/parser.sx — OCaml expression parser.
;;
;; Input: token list from (ocaml-tokenize src).
;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head;
;; keywords serialize to their string name so `(list :var "x")` is the
;; same value as `(list "var" "x")` at runtime.
;;
;; Expression scope:
;; atoms int/float/string/char/bool, unit (), var, con, list literal
;; application left-associative, f x y z
;; prefix -E unary minus, not E
;; infix 29 ops via lib/guest/pratt.sx
;; tuple a, b, c (lower than infix, higher than let/if)
;; parens (e)
;; if if c then t else e (else optional → unit)
;; fun fun x y -> body
;; let let x = e in body (no rec, function shorthand)
;; let rec f x = e in body
;; match match e with [|] p -> body | p -> body | ...
;; sequence e1 ; e2 → (:seq e1 e2 …) (lowest-precedence binary)
;;
;; Pattern scope:
;; _ (wildcard), int/string/char/bool literals, ident (var binding),
;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…),
;; list literal [pat;pat;…], cons p1 :: p2.
;;
;; AST shapes:
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
;; (:var NAME) (:con NAME)
;; (:app FN ARG)
;; (:op OP LHS RHS)
;; (:neg E) (:not E)
;; (:tuple ITEMS) (:list ITEMS)
;; (:seq EXPRS)
;; (:if C T E)
;; (:fun PARAMS BODY)
;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY)
;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...)
;;
;; (:pwild) (:pvar N) (:plit LIT)
;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary
;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL)
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
(define
ocaml-op-table
(list
(list ":=" 1 :right)
(list "<-" 1 :right)
(list "||" 2 :right)
(list "or" 2 :right)
(list "&&" 3 :right)
(list "&" 3 :right)
(list "=" 4 :left)
(list "<" 4 :left)
(list ">" 4 :left)
(list "<=" 4 :left)
(list ">=" 4 :left)
(list "<>" 4 :left)
(list "==" 4 :left)
(list "!=" 4 :left)
(list "|>" 4 :left)
(list "@" 5 :right)
(list "^" 5 :right)
(list "::" 6 :right)
(list "+" 7 :left)
(list "-" 7 :left)
(list "+." 7 :left)
(list "-." 7 :left)
(list "*" 8 :left)
(list "/" 8 :left)
(list "*." 8 :left)
(list "/." 8 :left)
(list "%" 8 :left)
(list "mod" 8 :left)
(list "land" 8 :left)
(list "lor" 8 :left)
(list "lxor" 8 :left)
(list "**" 9 :right)
(list "lsl" 9 :right)
(list "lsr" 9 :right)
(list "asr" 9 :right)))
(define
ocaml-binop-prec
(fn
(op)
(let
((entry (pratt-op-lookup ocaml-op-table op)))
(if (= entry nil) 0 (pratt-op-prec entry)))))
(define
ocaml-binop-right?
(fn
(op)
(let
((entry (pratt-op-lookup ocaml-op-table op)))
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
(define
ocaml-tok-is-binop?
(fn
(tok)
(let
((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok)))
(cond
((= tt "op") (not (= (ocaml-binop-prec tv) 0)))
((= tt "keyword") (not (= (ocaml-binop-prec tv) 0)))
(else false)))))
(define
ocaml-parse
(fn
(src)
(let
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0))
(begin
(set! tok-len (len tokens))
(define peek-tok (fn () (nth tokens idx)))
(define advance-tok! (fn () (set! idx (+ idx 1))))
(define
check-tok?
(fn
(type value)
(let
((t (peek-tok)))
(and
(= (ocaml-tok-type t) type)
(or (= value nil) (= (ocaml-tok-value t) value))))))
(define
consume!
(fn
(type value)
(if
(check-tok? type value)
(let ((t (peek-tok))) (begin (advance-tok!) t))
(error
(str
"ocaml-parse: expected "
type
" "
value
" got "
(ocaml-tok-type (peek-tok))
" "
(ocaml-tok-value (peek-tok)))))))
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
(define at-op? (fn (op) (check-tok? "op" op)))
(define parse-pattern (fn () nil))
(define parse-pattern-cons (fn () nil))
(define parse-pattern-app (fn () nil))
(define parse-pattern-atom (fn () nil))
(define
at-pattern-atom?
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number") true)
((= tt "string") true)
((= tt "char") true)
((= tt "ident") true)
((= tt "ctor") true)
((and (= tt "keyword") (or (= tv "true") (= tv "false")))
true)
((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{") (= tv "!"))) true)
(else false)))))
(set!
parse-pattern-atom
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number")
(begin
(advance-tok!)
(if
(= (round tv) tv)
(list :plit (list :int tv))
(list :plit (list :float tv)))))
((= tt "string")
(begin (advance-tok!) (list :plit (list :string tv))))
((= tt "char")
(begin (advance-tok!) (list :plit (list :char tv))))
((and (= tt "keyword") (= tv "true"))
(begin (advance-tok!) (list :plit (list :bool true))))
((and (= tt "keyword") (= tv "false"))
(begin (advance-tok!) (list :plit (list :bool false))))
((and (= tt "ident") (= tv "_"))
(begin (advance-tok!) (list :pwild)))
((= tt "ident") (begin (advance-tok!) (list :pvar tv)))
((= tt "ctor") (begin (advance-tok!) (list :pcon tv)))
((and (= tt "op") (= tv "("))
(begin
(advance-tok!)
(cond
((at-op? ")")
(begin (advance-tok!) (list :plit (list :unit))))
(else
(let
((first (parse-pattern)))
(cond
((at-op? ",")
(let
((items (list first)))
(begin
(define
loop
(fn
()
(when
(at-op? ",")
(begin
(advance-tok!)
(append! items (parse-pattern))
(loop)))))
(loop)
(consume! "op" ")")
(cons :ptuple items))))
;; Parens-only or-pattern: (P1 | P2 | ...).
((at-op? "|")
(let ((alts (list first)))
(begin
(define loop-or
(fn ()
(when (at-op? "|")
(begin (advance-tok!)
(append! alts (parse-pattern))
(loop-or)))))
(loop-or)
(consume! "op" ")")
(cons :por alts))))
(else (begin (consume! "op" ")") first))))))))
((and (= tt "op") (= tv "["))
(begin
(advance-tok!)
(cond
((at-op? "]") (begin (advance-tok!) (list :plist)))
(else
(let
((items (list)))
(begin
(append! items (parse-pattern))
(define
loop
(fn
()
(when
(at-op? ";")
(begin
(advance-tok!)
(when
(not (at-op? "]"))
(begin
(append! items (parse-pattern))
(loop)))))))
(loop)
(consume! "op" "]")
(cons :plist items)))))))
((and (= tt "op") (= tv "{"))
;; Record pattern: { f1 = pat1; f2 = pat2; ... }
(begin
(advance-tok!)
(let ((fields (list)))
(begin
(define one
(fn ()
(let ((fname (ocaml-tok-value (consume! "ident" nil))))
(begin
(consume! "op" "=")
(let ((fp (parse-pattern)))
(append! fields (list fname fp)))))))
(one)
(define more
(fn ()
(when (at-op? ";")
(begin (advance-tok!)
(when (not (at-op? "}"))
(begin (one) (more)))))))
(more)
(consume! "op" "}")
(cons :precord fields)))))
(else
(error
(str
"ocaml-parse: unexpected pattern token "
tt
" "
tv
" at idx "
idx)))))))
(set!
parse-pattern-app
(fn
()
(let
((head (parse-pattern-atom)))
(cond
((and (= (nth head 0) :pcon) (at-pattern-atom?))
(let
((arg (parse-pattern-atom)))
(let
((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg)))))
(concat (list :pcon (nth head 1)) args))))
(else head)))))
(set!
parse-pattern-cons
(fn
()
(let
((lhs (parse-pattern-app)))
(cond
((at-op? "::")
(begin
(advance-tok!)
(list :pcons lhs (parse-pattern-cons))))
(else lhs)))))
;; Top-level pattern is the cons-pat layer wrapped with optional
;; `pat as name` aliasing. Or-patterns are not supported at the
;; top level due to ambiguity with the match clause separator;
;; use `(A | B)` if needed in the future via a parens-only or.
(set! parse-pattern
(fn ()
(let ((p (parse-pattern-cons)))
(cond
((at-kw? "as")
(begin
(advance-tok!)
(let ((n (ocaml-tok-value (consume! "ident" nil))))
(list :pas p n))))
(else p)))))
(define peek-tok-at
(fn (n)
(if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil)))
;; Param consumption — matches ident, `_` (wildcard), or `()`
;; (unit). Returns a fresh ident name or nil if no param at cursor.
(define wild-counter (list 0))
(define try-consume-param!
(fn ()
(cond
((and (check-tok? "ident" nil)
(= (ocaml-tok-value (peek-tok)) "_"))
(begin
(advance-tok!)
(set-nth! wild-counter 0 (+ (nth wild-counter 0) 1))
(str "__wild_" (nth wild-counter 0))))
((check-tok? "ident" nil)
(let ((nm (ocaml-tok-value (peek-tok))))
(begin (advance-tok!) nm)))
((and (at-op? "(") (= (ocaml-tok-value (peek-tok-at 1)) ")"))
(begin
(advance-tok!) (advance-tok!)
(set-nth! wild-counter 0 (+ (nth wild-counter 0) 1))
(str "__unit_" (nth wild-counter 0))))
((and (at-op? "(") (= (ocaml-tok-type (peek-tok-at 1)) "ident"))
;; (x : T) — typed param. Skip the `: T` part.
(let ((nm (ocaml-tok-value (peek-tok-at 1))))
(begin
(advance-tok!) (advance-tok!)
(when (at-op? ":")
(begin
;; Skip until matching `)`.
(let ((d 1))
(begin
(define skip
(fn ()
(cond
((>= idx tok-len) nil)
((at-op? "(")
(begin (set! d (+ d 1)) (advance-tok!) (skip)))
((at-op? ")")
(cond
((= d 1) nil)
(else (begin (set! d (- d 1)) (advance-tok!) (skip)))))
(else (begin (advance-tok!) (skip))))))
(skip)))))
(consume! "op" ")")
nm)))
(else nil))))
(define parse-expr (fn () nil))
(define parse-expr-no-seq (fn () nil))
(define parse-tuple (fn () nil))
(define parse-binop-rhs (fn (lhs min-prec) lhs))
(define parse-prefix (fn () nil))
(define parse-app (fn () nil))
(define parse-atom (fn () nil))
(set!
parse-atom
(fn
()
(let
((t (peek-tok))
(tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number")
(begin
(advance-tok!)
(if (= (round tv) tv) (list :int tv) (list :float tv))))
((= tt "string") (begin (advance-tok!) (list :string tv)))
((= tt "char") (begin (advance-tok!) (list :char tv)))
((and (= tt "keyword") (= tv "true"))
(begin (advance-tok!) (list :bool true)))
((and (= tt "keyword") (= tv "false"))
(begin (advance-tok!) (list :bool false)))
((= tt "ident") (begin (advance-tok!) (list :var tv)))
((= tt "ctor") (begin (advance-tok!) (list :con tv)))
((and (= tt "op") (= tv "("))
(begin
(advance-tok!)
(cond
((at-op? ")") (begin (advance-tok!) (list :unit)))
(else
(let
((e (parse-expr)))
(begin
;; Optional type annotation `(e : T)` — skip
;; the type source before `)`.
(when (at-op? ":")
(begin
(advance-tok!)
(define skip-pty
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-op? ")") nil)
(else (begin (advance-tok!) (skip-pty))))))
(skip-pty)))
(consume! "op" ")") e))))))
((and (= tt "op") (= tv "["))
(begin
(advance-tok!)
(cond
((at-op? "]") (begin (advance-tok!) (list :list)))
(else
(let
((items (list)))
(begin
(append! items (parse-expr-no-seq))
(define
loop
(fn
()
(when
(at-op? ";")
(begin
(advance-tok!)
(when
(not (at-op? "]"))
(begin
(append! items (parse-expr-no-seq))
(loop)))))))
(loop)
(consume! "op" "]")
(cons :list items)))))))
((at-kw? "begin")
(begin
(advance-tok!)
(let
((e (parse-expr)))
(begin (consume! "keyword" "end") e))))
;; Record literal { f1 = e1; f2 = e2 } or update
;; { r with f1 = e1; f2 = e2 }.
((and (= tt "op") (= tv "{"))
(begin
(advance-tok!)
(cond
;; { r with field = expr; ... } — base ident + with.
((and (= (ocaml-tok-type (peek-tok)) "ident")
(= (ocaml-tok-value (peek-tok-at 1)) "with"))
(let ((base-name (ocaml-tok-value (peek-tok))))
(begin
(advance-tok!) ;; ident
(advance-tok!) ;; with
(let ((fields (list)))
(begin
(define one
(fn ()
(let ((fname (ocaml-tok-value (consume! "ident" nil))))
(begin
(consume! "op" "=")
(let ((fexpr (parse-expr-no-seq)))
(append! fields (list fname fexpr)))))))
(one)
(define more
(fn ()
(when (at-op? ";")
(begin (advance-tok!)
(when (not (at-op? "}"))
(begin (one) (more)))))))
(more)
(consume! "op" "}")
(cons :record-update
(cons (list :var base-name) fields)))))))
(else
;; Plain record literal { f = e; f = e; ... }.
(let ((fields (list)))
(begin
(define one
(fn ()
(let ((fname (ocaml-tok-value (consume! "ident" nil))))
(begin
(consume! "op" "=")
(let ((fexpr (parse-expr-no-seq)))
(append! fields (list fname fexpr)))))))
(one)
(define more
(fn ()
(when (at-op? ";")
(begin (advance-tok!)
(when (not (at-op? "}"))
(begin (one) (more)))))))
(more)
(consume! "op" "}")
(cons :record fields)))))))
(else
(error
(str
"ocaml-parse: unexpected token "
tt
" "
tv
" at idx "
idx)))))))
(define
at-app-start?
(fn
()
(let
((tt (ocaml-tok-type (peek-tok)))
(tv (ocaml-tok-value (peek-tok))))
(cond
((= tt "number") true)
((= tt "string") true)
((= tt "char") true)
((= tt "ident") true)
((= tt "ctor") true)
((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin")))
true)
((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{") (= tv "!"))) true)
(else false)))))
(define parse-atom-postfix
(fn ()
;; After a primary atom, consume `.field` chains. Field name
;; may be lower (record field, module value) or upper (module
;; or constructor reference). Note: `M.x.y` is left-assoc:
;; `(:field (:field M "x") "y")`.
(let ((head (parse-atom)))
(begin
(define loop
(fn ()
(when (at-op? ".")
(begin
(advance-tok!)
(let ((tok (peek-tok)))
(begin
(advance-tok!)
(set! head (list :field head (ocaml-tok-value tok)))
(loop)))))))
(loop)
head))))
(set!
parse-app
(fn
()
(let
((head (parse-atom-postfix)))
(begin
(define
loop
(fn
()
(when
(at-app-start?)
(let
((arg
(cond
((at-op? "!")
(begin (advance-tok!)
(list :deref (parse-atom-postfix))))
(else (parse-atom-postfix)))))
(begin (set! head (list :app head arg)) (loop))))))
(loop)
head))))
(set!
parse-prefix
(fn
()
(cond
((at-op? "-")
(begin (advance-tok!) (list :neg (parse-prefix))))
((at-op? "!")
(begin (advance-tok!) (list :deref (parse-prefix))))
((at-kw? "not")
(begin (advance-tok!) (list :not (parse-prefix))))
(else (parse-app)))))
(set!
parse-binop-rhs
(fn
(lhs min-prec)
(let
((tok (peek-tok)))
(cond
((not (ocaml-tok-is-binop? tok)) lhs)
(else
(let
((op (ocaml-tok-value tok))
(prec (ocaml-binop-prec (ocaml-tok-value tok))))
(cond
((< prec min-prec) lhs)
(else
(begin
(advance-tok!)
(let
((rhs (parse-prefix))
(next-min
(if
(ocaml-binop-right? op)
prec
(+ prec 1))))
(begin
(set! rhs (parse-binop-rhs rhs next-min))
(parse-binop-rhs (list :op op lhs rhs) min-prec))))))))))))
(define
parse-binary
(fn
()
(let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1))))
(set!
parse-tuple
(fn
()
(let
((first (parse-binary)))
(cond
((at-op? ",")
(let
((items (list first)))
(begin
(define
loop
(fn
()
(when
(at-op? ",")
(begin
(advance-tok!)
(append! items (parse-binary))
(loop)))))
(loop)
(cons :tuple items))))
(else first)))))
(define
parse-fun
(fn
()
(let
((params (list)))
(begin
(define
collect-params
(fn ()
(let ((nm (try-consume-param!)))
(when (not (= nm nil))
(begin (append! params nm) (collect-params))))))
(collect-params)
(when
(= (len params) 0)
(error "ocaml-parse: fun expects at least one parameter"))
(consume! "op" "->")
(let ((body (parse-expr))) (list :fun params body))))))
(define
parse-let
(fn ()
;; `let open M in body` — local open. Detect early so the
;; rest of the let-handler doesn't try to parse `open` as
;; an ident name.
(cond
((at-kw? "open")
(begin
(advance-tok!)
;; Read path as Ctor(.Ctor)* and build :field-chain AST.
(let ((path nil))
(begin
(when (= (ocaml-tok-type (peek-tok)) "ctor")
(begin
(set! path (list :con (ocaml-tok-value (peek-tok))))
(advance-tok!)))
(define more
(fn ()
(when (and (at-op? ".")
(= (ocaml-tok-type
(nth tokens (+ idx 1))) "ctor"))
(begin
(advance-tok!) ;; .
(let ((nm (ocaml-tok-value (peek-tok))))
(begin
(advance-tok!)
(set! path (list :field path nm))))
(more)))))
(more)
(consume! "keyword" "in")
(let ((body (parse-expr)))
(list :let-open path body))))))
(else
(let ((reccy false) (bindings (list)))
(begin
(when (at-kw? "rec")
(begin (advance-tok!) (set! reccy true)))
(define parse-one!
(fn ()
(let ((nm (ocaml-tok-value (consume! "ident" nil)))
(ps (list)))
(begin
(define collect-params
(fn ()
(let ((p (try-consume-param!)))
(when (not (= p nil))
(begin (append! ps p) (collect-params))))))
(collect-params)
;; Optional type annotation: skip `: TYPE` before `=`.
(when (at-op? ":")
(begin
(advance-tok!)
(define skip-tann
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-op? "=") nil)
(else (begin (advance-tok!) (skip-tann))))))
(skip-tann)))
(consume! "op" "=")
(let ((rhs (parse-expr)))
(append! bindings (list nm ps rhs)))))))
(parse-one!)
(define more
(fn ()
(when (at-kw? "and")
(begin (advance-tok!) (parse-one!) (more)))))
(more)
(consume! "keyword" "in")
(let ((body (parse-expr)))
(cond
((= (len bindings) 1)
(let ((b (first bindings)))
(if reccy
(list :let-rec (nth b 0) (nth b 1) (nth b 2) body)
(list :let (nth b 0) (nth b 1) (nth b 2) body))))
(else
(if reccy
(list :let-rec-mut bindings body)
(list :let-mut bindings body)))))))))))
(define
parse-if
(fn
()
(let
((cond-expr (parse-expr-no-seq)))
(begin
(consume! "keyword" "then")
(let
((then-expr (parse-expr-no-seq)))
(cond
((at-kw? "else")
(begin
(advance-tok!)
(let
((else-expr (parse-expr-no-seq)))
(list :if cond-expr then-expr else-expr))))
(else (list :if cond-expr then-expr (list :unit)))))))))
(define
parse-match
(fn
()
(let
((scrut (parse-expr-no-seq)))
(begin
(consume! "keyword" "with")
(when (at-op? "|") (advance-tok!))
(let
((cases (list)))
(begin
(define
one
(fn
()
(let
((p (parse-pattern)) (guard nil))
(begin
(when (at-kw? "when")
(begin
(advance-tok!)
(set! guard (parse-expr-no-seq))))
(consume! "op" "->")
(let
((body (parse-expr)))
(cond
((= guard nil)
(append! cases (list :case p body)))
(else
(append! cases (list :case-when p guard body)))))))))
(one)
(define
loop
(fn
()
(when
(at-op? "|")
(begin (advance-tok!) (one) (loop)))))
(loop)
(cons :match (cons scrut (list cases)))))))))
(define parse-try
(fn ()
(let ((expr (parse-expr-no-seq)))
(begin
(consume! "keyword" "with")
(when (at-op? "|") (advance-tok!))
(let ((cases (list)))
(begin
(define one
(fn ()
(let ((p (parse-pattern)) (guard nil))
(begin
(when (at-kw? "when")
(begin (advance-tok!)
(set! guard (parse-expr-no-seq))))
(consume! "op" "->")
(let ((body (parse-expr)))
(cond
((= guard nil)
(append! cases (list :case p body)))
(else
(append! cases (list :case-when p guard body)))))))))
(one)
(define loop
(fn ()
(when (at-op? "|")
(begin (advance-tok!) (one) (loop)))))
(loop)
(list :try expr cases)))))))
(define parse-function
(fn ()
;; `function | pat [when GUARD] -> body | …`
(let ()
(begin
(when (at-op? "|") (advance-tok!))
(let ((cases (list)))
(begin
(define one
(fn ()
(let ((p (parse-pattern)) (guard nil))
(begin
(when (at-kw? "when")
(begin (advance-tok!)
(set! guard (parse-expr-no-seq))))
(consume! "op" "->")
(let ((body (parse-expr)))
(cond
((= guard nil)
(append! cases (list :case p body)))
(else
(append! cases (list :case-when p guard body)))))))))
(one)
(define loop
(fn ()
(when (at-op? "|")
(begin (advance-tok!) (one) (loop)))))
(loop)
(list :function cases)))))))
(define parse-for
(fn ()
(let ((name (ocaml-tok-value (consume! "ident" nil))))
(begin
(consume! "op" "=")
(let ((lo (parse-expr-no-seq)))
(let ((dir
(cond
((at-kw? "to") (begin (advance-tok!) :ascend))
((at-kw? "downto") (begin (advance-tok!) :descend))
(else (error "ocaml-parse: expected to/downto in for")))))
(let ((hi (parse-expr-no-seq)))
(begin
(consume! "keyword" "do")
(let ((body (parse-expr)))
(begin
(consume! "keyword" "done")
(list :for name lo hi dir body)))))))))))
(define parse-while
(fn ()
(let ((cond-expr (parse-expr-no-seq)))
(begin
(consume! "keyword" "do")
(let ((body (parse-expr)))
(begin
(consume! "keyword" "done")
(list :while cond-expr body)))))))
(set!
parse-expr-no-seq
(fn
()
(cond
((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
((at-kw? "let") (begin (advance-tok!) (parse-let)))
((at-kw? "if") (begin (advance-tok!) (parse-if)))
((at-kw? "match") (begin (advance-tok!) (parse-match)))
((at-kw? "function") (begin (advance-tok!) (parse-function)))
((at-kw? "for") (begin (advance-tok!) (parse-for)))
((at-kw? "while") (begin (advance-tok!) (parse-while)))
((at-kw? "try") (begin (advance-tok!) (parse-try)))
(else (parse-tuple)))))
(set!
parse-expr
(fn
()
(let
((lhs (parse-expr-no-seq)))
(cond
((at-op? ";")
(let
((items (list lhs)))
(begin
(define
loop
(fn
()
(when
(at-op? ";")
(begin
(advance-tok!)
(cond
((at-kw? "end") nil)
((at-op? ")") nil)
((at-op? "|") nil)
((at-kw? "in") nil)
((at-kw? "then") nil)
((at-kw? "else") nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
(else
(begin
(append! items (parse-expr-no-seq))
(loop))))))))
(loop)
(cons :seq items))))
(else lhs)))))
(let
((result (parse-expr)))
(begin
(when
(not (= (ocaml-tok-type (peek-tok)) "eof"))
(error
(str
"ocaml-parse: trailing tokens at idx "
idx
" — got "
(ocaml-tok-type (peek-tok))
" "
(ocaml-tok-value (peek-tok)))))
result))))))
(define
ocaml-parse-program
(fn
(src)
(let
((tokens (ocaml-tokenize src))
(idx 0)
(tok-len 0)
(decls (list)))
(begin
(set! tok-len (len tokens))
(define peek-tok (fn () (nth tokens idx)))
(define advance-tok! (fn () (set! idx (+ idx 1))))
(define
check-tok?
(fn
(type value)
(let
((t (peek-tok)))
(and
(= (ocaml-tok-type t) type)
(or (= value nil) (= (ocaml-tok-value t) value))))))
(define
consume!
(fn
(type value)
(if
(check-tok? type value)
(let ((t (peek-tok))) (begin (advance-tok!) t))
(error
(str
"ocaml-parse-program: expected "
type
" "
value
" got "
(ocaml-tok-type (peek-tok))
" "
(ocaml-tok-value (peek-tok)))))))
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
(define at-op? (fn (op) (check-tok? "op" op)))
(define
skip-double-semi!
(fn
()
(when (at-op? ";;") (begin (advance-tok!) (skip-double-semi!)))))
(define
cur-pos
(fn
()
(let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos)))))
;; Two flavors of boundary skipping:
;;
;; * `skip-to-decl-boundary!` — used by parse-decl-expr. Stops
;; at the start of the next top-level decl: ;;, let, module,
;; open, include, and, type, exception, or eof.
;;
;; * `skip-let-rhs-boundary!` — used inside parse-decl-let after
;; the `=`. Treats `let` as the opener of a nested let..in
;; block (NOT a decl boundary), so `let f x = let y = 0 in y`
;; parses correctly. Boundary tokens (depth 0): ;;, module,
;; open, include, and, type, exception, or eof.
;; Lookahead: starting just past a `let` at the cursor, scan
;; for a matching `in` before the next decl boundary. Returns
;; true iff such an `in` exists — meaning the let is nested,
;; not a new decl.
(define has-matching-in?
(fn ()
(let ((p (+ idx 1)) (d 1) (result false) (done false))
(begin
(define scan
(fn ()
(when (not done)
(cond
((>= p tok-len) (set! done true))
(else
(let ((t (nth tokens p)))
(let ((tt (ocaml-tok-type t)) (tv (ocaml-tok-value t)))
(cond
((= tt "eof") (set! done true))
((and (= tt "op") (= tv ";;")) (set! done true))
((and (= tt "keyword") (= tv "module")) (set! done true))
((and (= tt "keyword") (= tv "type")) (set! done true))
((and (= tt "keyword") (= tv "exception")) (set! done true))
((and (= tt "keyword") (= tv "open")) (set! done true))
((and (= tt "keyword") (= tv "include")) (set! done true))
((and (= tt "keyword") (= tv "let"))
(begin (set! d (+ d 1)) (set! p (+ p 1)) (scan)))
((and (= tt "keyword") (= tv "in"))
(cond
((= d 1) (begin (set! result true) (set! done true)))
(else
(begin (set! d (- d 1)) (set! p (+ p 1)) (scan)))))
(else (begin (set! p (+ p 1)) (scan)))))))))))
(scan)
result))))
;; Same as skip-to-boundary but treats inner `let` as the start
;; of a nested let..in (open depth) IF a matching `in` exists
;; before any decl boundary; otherwise stops.
(define
skip-let-rhs-boundary!
(fn ()
(let ((depth 0))
(begin
(define step
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((and (= depth 0) (at-op? ";;")) nil)
((and (= depth 0) (at-kw? "module")) nil)
((and (= depth 0) (at-kw? "open")) nil)
((and (= depth 0) (at-kw? "include")) nil)
((and (= depth 0) (at-kw? "and")) nil)
((and (= depth 0) (at-kw? "type")) nil)
((and (= depth 0) (at-kw? "exception")) nil)
((and (= depth 0) (at-kw? "let"))
(cond
((has-matching-in?)
(begin (set! depth (+ depth 1)) (advance-tok!) (step)))
(else nil)))
((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct")
(at-kw? "sig") (at-kw? "for") (at-kw? "while"))
(begin (set! depth (+ depth 1)) (advance-tok!) (step)))
((or (at-kw? "in") (at-kw? "end") (at-kw? "done"))
(begin
(when (> depth 0) (set! depth (- depth 1)))
(advance-tok!) (step)))
(else (begin (advance-tok!) (step))))))
(step)))))
(define
skip-to-boundary!
(fn ()
(let ((depth 0))
(begin
(define step
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((and (= depth 0) (at-op? ";;")) nil)
((and (= depth 0) (at-kw? "let")) nil)
((and (= depth 0) (at-kw? "module")) nil)
((and (= depth 0) (at-kw? "open")) nil)
((and (= depth 0) (at-kw? "include")) nil)
((and (= depth 0) (at-kw? "and")) nil)
((and (= depth 0) (at-kw? "type")) nil)
((and (= depth 0) (at-kw? "exception")) nil)
((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct")
(at-kw? "sig") (at-kw? "for") (at-kw? "while"))
(begin (set! depth (+ depth 1)) (advance-tok!) (step)))
((or (at-kw? "in") (at-kw? "end") (at-kw? "done"))
(begin
(when (> depth 0) (set! depth (- depth 1)))
(advance-tok!) (step)))
(else (begin (advance-tok!) (step))))))
(step)))))
(define
parse-decl-let
(fn ()
(advance-tok!)
(let ((reccy false) (bindings (list)))
(begin
(when (at-kw? "rec") (begin (advance-tok!) (set! reccy true)))
(define parse-one!
(fn ()
(let ((nm (ocaml-tok-value (consume! "ident" nil)))
(ps (list)))
(begin
(define collect-params
(fn ()
(cond
((check-tok? "ident" nil)
(begin
(append! ps (ocaml-tok-value (peek-tok)))
(advance-tok!)
(collect-params)))
((and (at-op? "(")
(< (+ idx 1) tok-len)
(let ((t1 (nth tokens (+ idx 1))))
(and (= (ocaml-tok-type t1) "op")
(= (ocaml-tok-value t1) ")"))))
(begin
(advance-tok!) (advance-tok!)
(append! ps (str "__unit_" idx))
(collect-params)))
(else nil))))
(collect-params)
;; Optional type annotation: skip `: TYPE` before `=`.
(when (at-op? ":")
(begin
(advance-tok!)
(define skip-tann
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-op? "=") nil)
(else (begin (advance-tok!) (skip-tann))))))
(skip-tann)))
(consume! "op" "=")
(let ((expr-start (cur-pos)))
(begin
(skip-let-rhs-boundary!)
(let ((expr-src (slice src expr-start (cur-pos))))
(let ((expr (ocaml-parse expr-src)))
(append! bindings (list nm ps expr))))))))))
(parse-one!)
(define more
(fn ()
(when (at-kw? "and")
(begin (advance-tok!) (parse-one!) (more)))))
(more)
(cond
((= (len bindings) 1)
(let ((b (first bindings)))
(if reccy
(list :def-rec (nth b 0) (nth b 1) (nth b 2))
(list :def (nth b 0) (nth b 1) (nth b 2)))))
(else
(if reccy
(list :def-rec-mut bindings)
(list :def-mut bindings))))))))
(define
parse-decl-expr
(fn
()
(let
((expr-start (cur-pos)))
(begin
(skip-to-boundary!)
(let
((expr-src (slice src expr-start (cur-pos))))
(let ((expr (ocaml-parse expr-src))) (list :expr expr)))))))
;; module M = struct DECLS end
;; Parsed by sub-tokenising the body source between `struct` and
;; the matching `end`. Nested modules / sigs increment depth.
;; exception NAME [of TYPE [* TYPE]*]
(define
parse-decl-exception
(fn ()
(advance-tok!) ;; consume 'exception'
(let ((name (ocaml-tok-value (consume! "ctor" nil)))
(arg-srcs (list)))
(begin
(when (at-kw? "of")
(begin
(advance-tok!)
(let ((arg-start (cur-pos)))
(begin
(define skip-type
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-op? ";;") nil)
((at-kw? "let") nil)
((at-kw? "type") nil)
((at-kw? "and") nil)
((at-kw? "module") nil)
((at-kw? "exception") nil)
(else (begin (advance-tok!) (skip-type))))))
(skip-type)
(append! arg-srcs (slice src arg-start (cur-pos)))))))
(cons :exception-def (cons name arg-srcs))))))
;; type [PARAMS] NAME = | Ctor [of T1 [* T2]*] | …
;;
;; PARAMS is `'a` or `('a, 'b)` (single or paren-tuple of tyvars).
;; We parse the structure and emit `(:type-def NAME PARAMS CTORS)`
;; where each CTOR is `(NAME ARG-TYPES)` (ARG-TYPES list of source
;; strings — types are treated opaquely at runtime).
(define
parse-decl-type
(fn ()
(advance-tok!) ;; consume 'type'
(let ((tparams (list)))
(begin
;; Optional type-vars before the type name.
(cond
((= (ocaml-tok-type (peek-tok)) "tyvar")
(begin
(append! tparams (ocaml-tok-value (peek-tok)))
(advance-tok!)))
((at-op? "(")
(begin
(advance-tok!)
(define more
(fn ()
(when (= (ocaml-tok-type (peek-tok)) "tyvar")
(begin
(append! tparams (ocaml-tok-value (peek-tok)))
(advance-tok!)
(when (at-op? ",")
(begin (advance-tok!) (more)))))))
(more)
(consume! "op" ")"))))
(let ((name (ocaml-tok-value (consume! "ident" nil))))
(begin
(consume! "op" "=")
(cond
;; Record type: type NAME = { f1 [: T1]; f2 [: T2]; ... }
((at-op? "{")
(begin
(advance-tok!)
(let ((fields (list)))
(begin
(define field-one
(fn ()
(let ((mut false))
(begin
(when (at-kw? "mutable")
(begin (advance-tok!) (set! mut true)))
(let ((fname (ocaml-tok-value (consume! "ident" nil))))
(begin
(when (at-op? ":")
(begin
(advance-tok!)
(define skip-fty
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-op? ";") nil)
((at-op? "}") nil)
(else (begin (advance-tok!) (skip-fty))))))
(skip-fty)))
(append! fields
(if mut
(list :mutable fname)
(list fname)))))))))
(field-one)
(define field-more
(fn ()
(when (at-op? ";")
(begin (advance-tok!)
(when (not (at-op? "}"))
(begin (field-one) (field-more)))))))
(field-more)
(consume! "op" "}")
(list :type-def-record name tparams fields)))))
;; Type alias: type t = int / type t = 'a list / etc.
;; Detected when next token is NOT `|` and NOT a ctor.
((and (not (at-op? "|"))
(not (= (ocaml-tok-type (peek-tok)) "ctor")))
(begin
;; Skip the alias source up to the next boundary.
(define skip-alias
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-op? ";;") nil)
((at-kw? "let") nil)
((at-kw? "type") nil)
((at-kw? "and") nil)
((at-kw? "module") nil)
((at-kw? "exception") nil)
((at-kw? "open") nil)
((at-kw? "include") nil)
((at-kw? "end") nil)
(else (begin (advance-tok!) (skip-alias))))))
(skip-alias)
(list :type-alias name tparams)))
(else
(begin
(when (at-op? "|") (advance-tok!))
;; Sum type: Ctor [of TYPE [* TYPE]*] (| Ctor …)*
(let ((ctors (list)))
(begin
(define one
(fn ()
(let ((cname (ocaml-tok-value (consume! "ctor" nil)))
(arg-srcs (list)))
(begin
(when (at-kw? "of")
(begin
(advance-tok!)
(let ((arg-start (cur-pos)))
(begin
(define skip-type
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-op? "|") nil)
((at-op? ";;") nil)
((at-kw? "let") nil)
((at-kw? "type") nil)
((at-kw? "and") nil)
((at-kw? "module") nil)
(else (begin (advance-tok!) (skip-type))))))
(skip-type)
(append! arg-srcs (slice src arg-start (cur-pos)))))))
(append! ctors (cons cname arg-srcs))))))
(one)
(define more
(fn ()
(when (at-op? "|")
(begin (advance-tok!) (one) (more)))))
(more)
(list :type-def name tparams ctors))))))))))))
;; open M / include M — collect a path Ctor(.SubCtor)* and emit
;; (:open PATH) or (:include PATH).
(define
parse-decl-open
(fn (include?)
(advance-tok!)
(let ((path-start (cur-pos)))
(begin
;; Walk until end of the path. A path is Ctor (. Ctor)*.
(define skip-path
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "ctor")
(begin (advance-tok!) (skip-path)))
((at-op? ".") (begin (advance-tok!) (skip-path)))
(else nil))))
(skip-path)
(let ((path-src (slice src path-start (cur-pos))))
(let ((path-expr (ocaml-parse path-src)))
(if include?
(list :include path-expr)
(list :open path-expr))))))))
;; Parse a `struct DECLS end` body and return the decls list.
(define
parse-struct-body
(fn ()
(consume! "keyword" "struct")
(let ((body-start (cur-pos)) (depth 1))
(begin
(define skip
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-kw? "struct")
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
((at-kw? "begin")
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
((at-kw? "sig")
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
((at-kw? "end")
(cond
((= depth 1) nil)
(else
(begin (set! depth (- depth 1)) (advance-tok!) (skip)))))
(else (begin (advance-tok!) (skip))))))
(skip)
(let ((body-end (cur-pos)))
(begin
(consume! "keyword" "end")
(let ((body-src (slice src body-start body-end)))
(let ((body-prog (ocaml-parse-program body-src)))
(rest body-prog)))))))))
;; Skip an optional `: Sig` constraint (parens-balanced; we
;; ignore signatures in this iteration).
(define
skip-optional-sig
(fn ()
(when (at-op? ":")
(begin
(advance-tok!)
(let ((depth 0))
(begin
(define skip
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((and (= depth 0) (at-op? ")")) nil)
((and (= depth 0) (at-op? "=")) nil)
((at-op? "(")
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
((at-kw? "sig")
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
((at-op? ")")
(begin (set! depth (- depth 1)) (advance-tok!) (skip)))
((at-kw? "end")
(begin (set! depth (- depth 1)) (advance-tok!) (skip)))
(else (begin (advance-tok!) (skip))))))
(skip)))))))
;; module type S = sig ... end
;; Parsed-and-discarded (signatures are type-level only). Returns
;; a (:module-type-def NAME) marker for the eval loop to ignore.
(define
parse-decl-module-type
(fn ()
(advance-tok!) ;; "type"
(let ((name (ocaml-tok-value (consume! "ctor" nil))))
(begin
(consume! "op" "=")
(cond
((at-kw? "sig")
(begin
(advance-tok!)
(let ((depth 1))
(begin
(define skip
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((or (at-kw? "sig") (at-kw? "struct") (at-kw? "begin"))
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
((at-kw? "end")
(cond
((= depth 1) nil)
(else
(begin (set! depth (- depth 1)) (advance-tok!) (skip)))))
(else (begin (advance-tok!) (skip))))))
(skip)
(consume! "keyword" "end")))))
(else
;; module type S = AnotherSig — skip-to-boundary.
(skip-to-boundary!)))
(list :module-type-def name)))))
(define
parse-decl-module
(fn ()
(advance-tok!)
(cond
((at-kw? "type") (parse-decl-module-type))
(else (parse-decl-module-rest)))))
(define
parse-decl-module-rest
(fn ()
(let ((name (ocaml-tok-value (consume! "ctor" nil)))
(params (list)))
(begin
;; Functor parameters: `(P)` or `(P : Sig)`, repeated.
(define collect-params
(fn ()
(when (at-op? "(")
(begin
(advance-tok!)
(when (= (ocaml-tok-type (peek-tok)) "ctor")
(begin
(append! params (ocaml-tok-value (peek-tok)))
(advance-tok!)))
(skip-optional-sig)
(consume! "op" ")")
(collect-params)))))
(collect-params)
(skip-optional-sig)
(consume! "op" "=")
(cond
;; Body is `struct DECLS end` — possibly a functor body.
((at-kw? "struct")
(let ((decls (parse-struct-body)))
(cond
((= (len params) 0) (list :module-def name decls))
(else (list :functor-def name params decls)))))
;; Body is a path possibly applied: `M`, `M.Sub`, `F(A)`, `F(A)(B)`.
(else
(let ((body-start (cur-pos)))
(begin
(define skip-path-app
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "ctor")
(begin (advance-tok!) (skip-path-app)))
((at-op? ".")
(begin (advance-tok!) (skip-path-app)))
((at-op? "(")
;; Paren-balanced argument list.
(let ((d 1))
(begin
(advance-tok!)
(define skip-args
(fn ()
(cond
((>= idx tok-len) nil)
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-op? "(")
(begin (set! d (+ d 1)) (advance-tok!) (skip-args)))
((at-op? ")")
(cond
((= d 1) (begin (advance-tok!) nil))
(else (begin (set! d (- d 1)) (advance-tok!) (skip-args)))))
(else (begin (advance-tok!) (skip-args))))))
(skip-args)
(skip-path-app))))
(else nil))))
(skip-path-app)
(let ((body-src (slice src body-start (cur-pos))))
(list :module-alias name body-src))))))))))
(define
loop
(fn
()
(begin
(skip-double-semi!)
(when
(< idx tok-len)
(cond
((= (ocaml-tok-type (peek-tok)) "eof") nil)
((at-kw? "let")
(begin (append! decls (parse-decl-let)) (loop)))
((at-kw? "module")
(begin (append! decls (parse-decl-module)) (loop)))
((at-kw? "open")
(begin (append! decls (parse-decl-open false)) (loop)))
((at-kw? "include")
(begin (append! decls (parse-decl-open true)) (loop)))
((at-kw? "type")
(begin (append! decls (parse-decl-type)) (loop)))
((at-kw? "exception")
(begin (append! decls (parse-decl-exception)) (loop)))
(else (begin (append! decls (parse-decl-expr)) (loop))))))))
(loop)
(cons :program decls)))))