;; lib/ocaml/parser.sx — OCaml expression parser. ;; ;; Input: token list from (ocaml-tokenize src). ;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head; ;; keywords serialize to their string name so `(list :var "x")` is the ;; same value as `(list "var" "x")` at runtime. ;; ;; Expression scope: ;; atoms int/float/string/char/bool, unit (), var, con, list literal ;; application left-associative, f x y z ;; prefix -E unary minus, not E ;; infix 29 ops via lib/guest/pratt.sx ;; tuple a, b, c (lower than infix, higher than let/if) ;; parens (e) ;; if if c then t else e (else optional → unit) ;; fun fun x y -> body ;; let let x = e in body (no rec, function shorthand) ;; let rec f x = e in body ;; match match e with [|] p -> body | p -> body | ... ;; sequence e1 ; e2 → (:seq e1 e2 …) (lowest-precedence binary) ;; ;; Pattern scope: ;; _ (wildcard), int/string/char/bool literals, ident (var binding), ;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…), ;; list literal [pat;pat;…], cons p1 :: p2. ;; ;; AST shapes: ;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit) ;; (:var NAME) (:con NAME) ;; (:app FN ARG) ;; (:op OP LHS RHS) ;; (:neg E) (:not E) ;; (:tuple ITEMS) (:list ITEMS) ;; (:seq EXPRS) ;; (:if C T E) ;; (:fun PARAMS BODY) ;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY) ;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...) ;; ;; (:pwild) (:pvar N) (:plit LIT) ;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary ;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL) (define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) (define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value)))) (define ocaml-op-table (list (list ":=" 1 :right) (list "<-" 1 :right) (list "||" 2 :right) (list "or" 2 :right) (list "&&" 3 :right) (list "&" 3 :right) (list "=" 4 :left) (list "<" 4 :left) (list ">" 4 :left) (list "<=" 4 :left) (list ">=" 4 :left) (list "<>" 4 :left) (list "==" 4 :left) (list "!=" 4 :left) (list "|>" 4 :left) (list "@" 5 :right) (list "^" 5 :right) (list "::" 6 :right) (list "+" 7 :left) (list "-" 7 :left) (list "+." 7 :left) (list "-." 7 :left) (list "*" 8 :left) (list "/" 8 :left) (list "*." 8 :left) (list "/." 8 :left) (list "%" 8 :left) (list "mod" 8 :left) (list "land" 8 :left) (list "lor" 8 :left) (list "lxor" 8 :left) (list "**" 9 :right) (list "lsl" 9 :right) (list "lsr" 9 :right) (list "asr" 9 :right))) (define ocaml-binop-prec (fn (op) (let ((entry (pratt-op-lookup ocaml-op-table op))) (if (= entry nil) 0 (pratt-op-prec entry))))) (define ocaml-binop-right? (fn (op) (let ((entry (pratt-op-lookup ocaml-op-table op))) (and (not (= entry nil)) (= (pratt-op-assoc entry) :right))))) (define ocaml-tok-is-binop? (fn (tok) (let ((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok))) (cond ((= tt "op") (not (= (ocaml-binop-prec tv) 0))) ((= tt "keyword") (not (= (ocaml-binop-prec tv) 0))) (else false))))) (define ocaml-parse (fn (src) (let ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0)) (begin (set! tok-len (len tokens)) (define peek-tok (fn () (nth tokens idx))) (define advance-tok! (fn () (set! idx (+ idx 1)))) (define check-tok? (fn (type value) (let ((t (peek-tok))) (and (= (ocaml-tok-type t) type) (or (= value nil) (= (ocaml-tok-value t) value)))))) (define consume! (fn (type value) (if (check-tok? type value) (let ((t (peek-tok))) (begin (advance-tok!) t)) (error (str "ocaml-parse: expected " type " " value " got " (ocaml-tok-type (peek-tok)) " " (ocaml-tok-value (peek-tok))))))) (define at-kw? (fn (kw) (check-tok? "keyword" kw))) (define at-op? (fn (op) (check-tok? "op" op))) (define parse-pattern (fn () nil)) (define parse-pattern-cons (fn () nil)) (define parse-pattern-app (fn () nil)) (define parse-pattern-atom (fn () nil)) (define at-pattern-atom? (fn () (let ((tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") true) ((= tt "string") true) ((= tt "char") true) ((= tt "ident") true) ((= tt "ctor") true) ((and (= tt "keyword") (or (= tv "true") (= tv "false"))) true) ((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{") (= tv "!"))) true) (else false))))) (set! parse-pattern-atom (fn () (let ((tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") (begin (advance-tok!) (if (= (round tv) tv) (list :plit (list :int tv)) (list :plit (list :float tv))))) ((= tt "string") (begin (advance-tok!) (list :plit (list :string tv)))) ((= tt "char") (begin (advance-tok!) (list :plit (list :char tv)))) ((and (= tt "keyword") (= tv "true")) (begin (advance-tok!) (list :plit (list :bool true)))) ((and (= tt "keyword") (= tv "false")) (begin (advance-tok!) (list :plit (list :bool false)))) ((and (= tt "ident") (= tv "_")) (begin (advance-tok!) (list :pwild))) ((= tt "ident") (begin (advance-tok!) (list :pvar tv))) ((= tt "ctor") (begin (advance-tok!) (list :pcon tv))) ((and (= tt "op") (= tv "(")) (begin (advance-tok!) (cond ((at-op? ")") (begin (advance-tok!) (list :plit (list :unit)))) (else (let ((first (parse-pattern))) (cond ((at-op? ",") (let ((items (list first))) (begin (define loop (fn () (when (at-op? ",") (begin (advance-tok!) (append! items (parse-pattern)) (loop))))) (loop) (consume! "op" ")") (cons :ptuple items)))) ;; Parens-only or-pattern: (P1 | P2 | ...). ((at-op? "|") (let ((alts (list first))) (begin (define loop-or (fn () (when (at-op? "|") (begin (advance-tok!) (append! alts (parse-pattern)) (loop-or))))) (loop-or) (consume! "op" ")") (cons :por alts)))) (else (begin (consume! "op" ")") first)))))))) ((and (= tt "op") (= tv "[")) (begin (advance-tok!) (cond ((at-op? "]") (begin (advance-tok!) (list :plist))) (else (let ((items (list))) (begin (append! items (parse-pattern)) (define loop (fn () (when (at-op? ";") (begin (advance-tok!) (when (not (at-op? "]")) (begin (append! items (parse-pattern)) (loop))))))) (loop) (consume! "op" "]") (cons :plist items))))))) ((and (= tt "op") (= tv "{")) ;; Record pattern: { f1 = pat1; f2 = pat2; ... } (begin (advance-tok!) (let ((fields (list))) (begin (define one (fn () (let ((fname (ocaml-tok-value (consume! "ident" nil)))) (begin (consume! "op" "=") (let ((fp (parse-pattern))) (append! fields (list fname fp))))))) (one) (define more (fn () (when (at-op? ";") (begin (advance-tok!) (when (not (at-op? "}")) (begin (one) (more))))))) (more) (consume! "op" "}") (cons :precord fields))))) (else (error (str "ocaml-parse: unexpected pattern token " tt " " tv " at idx " idx))))))) (set! parse-pattern-app (fn () (let ((head (parse-pattern-atom))) (cond ((and (= (nth head 0) :pcon) (at-pattern-atom?)) (let ((arg (parse-pattern-atom))) (let ((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg))))) (concat (list :pcon (nth head 1)) args)))) (else head))))) (set! parse-pattern-cons (fn () (let ((lhs (parse-pattern-app))) (cond ((at-op? "::") (begin (advance-tok!) (list :pcons lhs (parse-pattern-cons)))) (else lhs))))) ;; Top-level pattern is the cons-pat layer wrapped with optional ;; `pat as name` aliasing. Or-patterns are not supported at the ;; top level due to ambiguity with the match clause separator; ;; use `(A | B)` if needed in the future via a parens-only or. (set! parse-pattern (fn () (let ((p (parse-pattern-cons))) (cond ((at-kw? "as") (begin (advance-tok!) (let ((n (ocaml-tok-value (consume! "ident" nil)))) (list :pas p n)))) (else p))))) (define peek-tok-at (fn (n) (if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil))) ;; Param consumption — matches ident, `_` (wildcard), or `()` ;; (unit). Returns a fresh ident name or nil if no param at cursor. (define wild-counter (list 0)) (define try-consume-param! (fn () (cond ((and (check-tok? "ident" nil) (= (ocaml-tok-value (peek-tok)) "_")) (begin (advance-tok!) (set-nth! wild-counter 0 (+ (nth wild-counter 0) 1)) (str "__wild_" (nth wild-counter 0)))) ((check-tok? "ident" nil) (let ((nm (ocaml-tok-value (peek-tok)))) (begin (advance-tok!) nm))) ((and (at-op? "(") (= (ocaml-tok-value (peek-tok-at 1)) ")")) (begin (advance-tok!) (advance-tok!) (set-nth! wild-counter 0 (+ (nth wild-counter 0) 1)) (str "__unit_" (nth wild-counter 0)))) ((and (at-op? "(") (= (ocaml-tok-type (peek-tok-at 1)) "ident")) ;; (x : T) — typed param. Skip the `: T` part. (let ((nm (ocaml-tok-value (peek-tok-at 1)))) (begin (advance-tok!) (advance-tok!) (when (at-op? ":") (begin ;; Skip until matching `)`. (let ((d 1)) (begin (define skip (fn () (cond ((>= idx tok-len) nil) ((at-op? "(") (begin (set! d (+ d 1)) (advance-tok!) (skip))) ((at-op? ")") (cond ((= d 1) nil) (else (begin (set! d (- d 1)) (advance-tok!) (skip))))) (else (begin (advance-tok!) (skip)))))) (skip))))) (consume! "op" ")") nm))) (else nil)))) (define parse-expr (fn () nil)) (define parse-expr-no-seq (fn () nil)) (define parse-tuple (fn () nil)) (define parse-binop-rhs (fn (lhs min-prec) lhs)) (define parse-prefix (fn () nil)) (define parse-app (fn () nil)) (define parse-atom (fn () nil)) (set! parse-atom (fn () (let ((t (peek-tok)) (tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") (begin (advance-tok!) (if (= (round tv) tv) (list :int tv) (list :float tv)))) ((= tt "string") (begin (advance-tok!) (list :string tv))) ((= tt "char") (begin (advance-tok!) (list :char tv))) ((and (= tt "keyword") (= tv "true")) (begin (advance-tok!) (list :bool true))) ((and (= tt "keyword") (= tv "false")) (begin (advance-tok!) (list :bool false))) ((= tt "ident") (begin (advance-tok!) (list :var tv))) ((= tt "ctor") (begin (advance-tok!) (list :con tv))) ((and (= tt "op") (= tv "(")) (begin (advance-tok!) (cond ((at-op? ")") (begin (advance-tok!) (list :unit))) (else (let ((e (parse-expr))) (begin ;; Optional type annotation `(e : T)` — skip ;; the type source before `)`. (when (at-op? ":") (begin (advance-tok!) (define skip-pty (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? ")") nil) (else (begin (advance-tok!) (skip-pty)))))) (skip-pty))) (consume! "op" ")") e)))))) ((and (= tt "op") (= tv "[")) (begin (advance-tok!) (cond ((at-op? "]") (begin (advance-tok!) (list :list))) (else (let ((items (list))) (begin (append! items (parse-expr-no-seq)) (define loop (fn () (when (at-op? ";") (begin (advance-tok!) (when (not (at-op? "]")) (begin (append! items (parse-expr-no-seq)) (loop))))))) (loop) (consume! "op" "]") (cons :list items))))))) ((at-kw? "begin") (begin (advance-tok!) (let ((e (parse-expr))) (begin (consume! "keyword" "end") e)))) ;; Record literal { f1 = e1; f2 = e2 } or update ;; { r with f1 = e1; f2 = e2 }. ((and (= tt "op") (= tv "{")) (begin (advance-tok!) (cond ;; { r with field = expr; ... } — base ident + with. ((and (= (ocaml-tok-type (peek-tok)) "ident") (= (ocaml-tok-value (peek-tok-at 1)) "with")) (let ((base-name (ocaml-tok-value (peek-tok)))) (begin (advance-tok!) ;; ident (advance-tok!) ;; with (let ((fields (list))) (begin (define one (fn () (let ((fname (ocaml-tok-value (consume! "ident" nil)))) (begin (consume! "op" "=") (let ((fexpr (parse-expr-no-seq))) (append! fields (list fname fexpr))))))) (one) (define more (fn () (when (at-op? ";") (begin (advance-tok!) (when (not (at-op? "}")) (begin (one) (more))))))) (more) (consume! "op" "}") (cons :record-update (cons (list :var base-name) fields))))))) (else ;; Plain record literal { f = e; f = e; ... }. (let ((fields (list))) (begin (define one (fn () (let ((fname (ocaml-tok-value (consume! "ident" nil)))) (begin (consume! "op" "=") (let ((fexpr (parse-expr-no-seq))) (append! fields (list fname fexpr))))))) (one) (define more (fn () (when (at-op? ";") (begin (advance-tok!) (when (not (at-op? "}")) (begin (one) (more))))))) (more) (consume! "op" "}") (cons :record fields))))))) (else (error (str "ocaml-parse: unexpected token " tt " " tv " at idx " idx))))))) (define at-app-start? (fn () (let ((tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") true) ((= tt "string") true) ((= tt "char") true) ((= tt "ident") true) ((= tt "ctor") true) ((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin"))) true) ((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{") (= tv "!"))) true) (else false))))) (define parse-atom-postfix (fn () ;; After a primary atom, consume `.field` chains. Field name ;; may be lower (record field, module value) or upper (module ;; or constructor reference). Note: `M.x.y` is left-assoc: ;; `(:field (:field M "x") "y")`. (let ((head (parse-atom))) (begin (define loop (fn () (when (at-op? ".") (begin (advance-tok!) (let ((tok (peek-tok))) (begin (advance-tok!) (set! head (list :field head (ocaml-tok-value tok))) (loop))))))) (loop) head)))) (set! parse-app (fn () (let ((head (parse-atom-postfix))) (begin (define loop (fn () (when (at-app-start?) (let ((arg (cond ((at-op? "!") (begin (advance-tok!) (list :deref (parse-atom-postfix)))) (else (parse-atom-postfix))))) (begin (set! head (list :app head arg)) (loop)))))) (loop) head)))) (set! parse-prefix (fn () (cond ((at-op? "-") (begin (advance-tok!) (list :neg (parse-prefix)))) ((at-op? "!") (begin (advance-tok!) (list :deref (parse-prefix)))) ((at-kw? "not") (begin (advance-tok!) (list :not (parse-prefix)))) (else (parse-app))))) (set! parse-binop-rhs (fn (lhs min-prec) (let ((tok (peek-tok))) (cond ((not (ocaml-tok-is-binop? tok)) lhs) (else (let ((op (ocaml-tok-value tok)) (prec (ocaml-binop-prec (ocaml-tok-value tok)))) (cond ((< prec min-prec) lhs) (else (begin (advance-tok!) (let ((rhs (parse-prefix)) (next-min (if (ocaml-binop-right? op) prec (+ prec 1)))) (begin (set! rhs (parse-binop-rhs rhs next-min)) (parse-binop-rhs (list :op op lhs rhs) min-prec)))))))))))) (define parse-binary (fn () (let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1)))) (set! parse-tuple (fn () (let ((first (parse-binary))) (cond ((at-op? ",") (let ((items (list first))) (begin (define loop (fn () (when (at-op? ",") (begin (advance-tok!) (append! items (parse-binary)) (loop))))) (loop) (cons :tuple items)))) (else first))))) (define parse-fun (fn () (let ((params (list))) (begin (define collect-params (fn () (let ((nm (try-consume-param!))) (when (not (= nm nil)) (begin (append! params nm) (collect-params)))))) (collect-params) (when (= (len params) 0) (error "ocaml-parse: fun expects at least one parameter")) (consume! "op" "->") (let ((body (parse-expr))) (list :fun params body)))))) (define parse-let (fn () ;; `let open M in body` — local open. Detect early so the ;; rest of the let-handler doesn't try to parse `open` as ;; an ident name. (cond ((at-kw? "open") (begin (advance-tok!) ;; Read path as Ctor(.Ctor)* and build :field-chain AST. (let ((path nil)) (begin (when (= (ocaml-tok-type (peek-tok)) "ctor") (begin (set! path (list :con (ocaml-tok-value (peek-tok)))) (advance-tok!))) (define more (fn () (when (and (at-op? ".") (= (ocaml-tok-type (nth tokens (+ idx 1))) "ctor")) (begin (advance-tok!) ;; . (let ((nm (ocaml-tok-value (peek-tok)))) (begin (advance-tok!) (set! path (list :field path nm)))) (more))))) (more) (consume! "keyword" "in") (let ((body (parse-expr))) (list :let-open path body)))))) (else (let ((reccy false) (bindings (list))) (begin (when (at-kw? "rec") (begin (advance-tok!) (set! reccy true))) (define parse-one! (fn () (let ((nm (ocaml-tok-value (consume! "ident" nil))) (ps (list))) (begin (define collect-params (fn () (let ((p (try-consume-param!))) (when (not (= p nil)) (begin (append! ps p) (collect-params)))))) (collect-params) ;; Optional type annotation: skip `: TYPE` before `=`. (when (at-op? ":") (begin (advance-tok!) (define skip-tann (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? "=") nil) (else (begin (advance-tok!) (skip-tann)))))) (skip-tann))) (consume! "op" "=") (let ((rhs (parse-expr))) (append! bindings (list nm ps rhs))))))) (parse-one!) (define more (fn () (when (at-kw? "and") (begin (advance-tok!) (parse-one!) (more))))) (more) (consume! "keyword" "in") (let ((body (parse-expr))) (cond ((= (len bindings) 1) (let ((b (first bindings))) (if reccy (list :let-rec (nth b 0) (nth b 1) (nth b 2) body) (list :let (nth b 0) (nth b 1) (nth b 2) body)))) (else (if reccy (list :let-rec-mut bindings body) (list :let-mut bindings body))))))))))) (define parse-if (fn () (let ((cond-expr (parse-expr-no-seq))) (begin (consume! "keyword" "then") (let ((then-expr (parse-expr-no-seq))) (cond ((at-kw? "else") (begin (advance-tok!) (let ((else-expr (parse-expr-no-seq))) (list :if cond-expr then-expr else-expr)))) (else (list :if cond-expr then-expr (list :unit))))))))) (define parse-match (fn () (let ((scrut (parse-expr-no-seq))) (begin (consume! "keyword" "with") (when (at-op? "|") (advance-tok!)) (let ((cases (list))) (begin (define one (fn () (let ((p (parse-pattern)) (guard nil)) (begin (when (at-kw? "when") (begin (advance-tok!) (set! guard (parse-expr-no-seq)))) (consume! "op" "->") (let ((body (parse-expr))) (cond ((= guard nil) (append! cases (list :case p body))) (else (append! cases (list :case-when p guard body))))))))) (one) (define loop (fn () (when (at-op? "|") (begin (advance-tok!) (one) (loop))))) (loop) (cons :match (cons scrut (list cases))))))))) (define parse-try (fn () (let ((expr (parse-expr-no-seq))) (begin (consume! "keyword" "with") (when (at-op? "|") (advance-tok!)) (let ((cases (list))) (begin (define one (fn () (let ((p (parse-pattern)) (guard nil)) (begin (when (at-kw? "when") (begin (advance-tok!) (set! guard (parse-expr-no-seq)))) (consume! "op" "->") (let ((body (parse-expr))) (cond ((= guard nil) (append! cases (list :case p body))) (else (append! cases (list :case-when p guard body))))))))) (one) (define loop (fn () (when (at-op? "|") (begin (advance-tok!) (one) (loop))))) (loop) (list :try expr cases))))))) (define parse-function (fn () ;; `function | pat [when GUARD] -> body | …` (let () (begin (when (at-op? "|") (advance-tok!)) (let ((cases (list))) (begin (define one (fn () (let ((p (parse-pattern)) (guard nil)) (begin (when (at-kw? "when") (begin (advance-tok!) (set! guard (parse-expr-no-seq)))) (consume! "op" "->") (let ((body (parse-expr))) (cond ((= guard nil) (append! cases (list :case p body))) (else (append! cases (list :case-when p guard body))))))))) (one) (define loop (fn () (when (at-op? "|") (begin (advance-tok!) (one) (loop))))) (loop) (list :function cases))))))) (define parse-for (fn () (let ((name (ocaml-tok-value (consume! "ident" nil)))) (begin (consume! "op" "=") (let ((lo (parse-expr-no-seq))) (let ((dir (cond ((at-kw? "to") (begin (advance-tok!) :ascend)) ((at-kw? "downto") (begin (advance-tok!) :descend)) (else (error "ocaml-parse: expected to/downto in for"))))) (let ((hi (parse-expr-no-seq))) (begin (consume! "keyword" "do") (let ((body (parse-expr))) (begin (consume! "keyword" "done") (list :for name lo hi dir body))))))))))) (define parse-while (fn () (let ((cond-expr (parse-expr-no-seq))) (begin (consume! "keyword" "do") (let ((body (parse-expr))) (begin (consume! "keyword" "done") (list :while cond-expr body))))))) (set! parse-expr-no-seq (fn () (cond ((at-kw? "fun") (begin (advance-tok!) (parse-fun))) ((at-kw? "let") (begin (advance-tok!) (parse-let))) ((at-kw? "if") (begin (advance-tok!) (parse-if))) ((at-kw? "match") (begin (advance-tok!) (parse-match))) ((at-kw? "function") (begin (advance-tok!) (parse-function))) ((at-kw? "for") (begin (advance-tok!) (parse-for))) ((at-kw? "while") (begin (advance-tok!) (parse-while))) ((at-kw? "try") (begin (advance-tok!) (parse-try))) (else (parse-tuple))))) (set! parse-expr (fn () (let ((lhs (parse-expr-no-seq))) (cond ((at-op? ";") (let ((items (list lhs))) (begin (define loop (fn () (when (at-op? ";") (begin (advance-tok!) (cond ((at-kw? "end") nil) ((at-op? ")") nil) ((at-op? "|") nil) ((at-kw? "in") nil) ((at-kw? "then") nil) ((at-kw? "else") nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) (else (begin (append! items (parse-expr-no-seq)) (loop)))))))) (loop) (cons :seq items)))) (else lhs))))) (let ((result (parse-expr))) (begin (when (not (= (ocaml-tok-type (peek-tok)) "eof")) (error (str "ocaml-parse: trailing tokens at idx " idx " — got " (ocaml-tok-type (peek-tok)) " " (ocaml-tok-value (peek-tok))))) result)))))) (define ocaml-parse-program (fn (src) (let ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list))) (begin (set! tok-len (len tokens)) (define peek-tok (fn () (nth tokens idx))) (define advance-tok! (fn () (set! idx (+ idx 1)))) (define check-tok? (fn (type value) (let ((t (peek-tok))) (and (= (ocaml-tok-type t) type) (or (= value nil) (= (ocaml-tok-value t) value)))))) (define consume! (fn (type value) (if (check-tok? type value) (let ((t (peek-tok))) (begin (advance-tok!) t)) (error (str "ocaml-parse-program: expected " type " " value " got " (ocaml-tok-type (peek-tok)) " " (ocaml-tok-value (peek-tok))))))) (define at-kw? (fn (kw) (check-tok? "keyword" kw))) (define at-op? (fn (op) (check-tok? "op" op))) (define skip-double-semi! (fn () (when (at-op? ";;") (begin (advance-tok!) (skip-double-semi!))))) (define cur-pos (fn () (let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos))))) ;; Two flavors of boundary skipping: ;; ;; * `skip-to-decl-boundary!` — used by parse-decl-expr. Stops ;; at the start of the next top-level decl: ;;, let, module, ;; open, include, and, type, exception, or eof. ;; ;; * `skip-let-rhs-boundary!` — used inside parse-decl-let after ;; the `=`. Treats `let` as the opener of a nested let..in ;; block (NOT a decl boundary), so `let f x = let y = 0 in y` ;; parses correctly. Boundary tokens (depth 0): ;;, module, ;; open, include, and, type, exception, or eof. ;; Lookahead: starting just past a `let` at the cursor, scan ;; for a matching `in` before the next decl boundary. Returns ;; true iff such an `in` exists — meaning the let is nested, ;; not a new decl. (define has-matching-in? (fn () (let ((p (+ idx 1)) (d 1) (result false) (done false)) (begin (define scan (fn () (when (not done) (cond ((>= p tok-len) (set! done true)) (else (let ((t (nth tokens p))) (let ((tt (ocaml-tok-type t)) (tv (ocaml-tok-value t))) (cond ((= tt "eof") (set! done true)) ((and (= tt "op") (= tv ";;")) (set! done true)) ((and (= tt "keyword") (= tv "module")) (set! done true)) ((and (= tt "keyword") (= tv "type")) (set! done true)) ((and (= tt "keyword") (= tv "exception")) (set! done true)) ((and (= tt "keyword") (= tv "open")) (set! done true)) ((and (= tt "keyword") (= tv "include")) (set! done true)) ((and (= tt "keyword") (= tv "let")) (begin (set! d (+ d 1)) (set! p (+ p 1)) (scan))) ((and (= tt "keyword") (= tv "in")) (cond ((= d 1) (begin (set! result true) (set! done true))) (else (begin (set! d (- d 1)) (set! p (+ p 1)) (scan))))) (else (begin (set! p (+ p 1)) (scan))))))))))) (scan) result)))) ;; Same as skip-to-boundary but treats inner `let` as the start ;; of a nested let..in (open depth) IF a matching `in` exists ;; before any decl boundary; otherwise stops. (define skip-let-rhs-boundary! (fn () (let ((depth 0)) (begin (define step (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((and (= depth 0) (at-op? ";;")) nil) ((and (= depth 0) (at-kw? "module")) nil) ((and (= depth 0) (at-kw? "open")) nil) ((and (= depth 0) (at-kw? "include")) nil) ((and (= depth 0) (at-kw? "and")) nil) ((and (= depth 0) (at-kw? "type")) nil) ((and (= depth 0) (at-kw? "exception")) nil) ((and (= depth 0) (at-kw? "let")) (cond ((has-matching-in?) (begin (set! depth (+ depth 1)) (advance-tok!) (step))) (else nil))) ((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct") (at-kw? "sig") (at-kw? "for") (at-kw? "while")) (begin (set! depth (+ depth 1)) (advance-tok!) (step))) ((or (at-kw? "in") (at-kw? "end") (at-kw? "done")) (begin (when (> depth 0) (set! depth (- depth 1))) (advance-tok!) (step))) (else (begin (advance-tok!) (step)))))) (step))))) (define skip-to-boundary! (fn () (let ((depth 0)) (begin (define step (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((and (= depth 0) (at-op? ";;")) nil) ((and (= depth 0) (at-kw? "let")) nil) ((and (= depth 0) (at-kw? "module")) nil) ((and (= depth 0) (at-kw? "open")) nil) ((and (= depth 0) (at-kw? "include")) nil) ((and (= depth 0) (at-kw? "and")) nil) ((and (= depth 0) (at-kw? "type")) nil) ((and (= depth 0) (at-kw? "exception")) nil) ((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct") (at-kw? "sig") (at-kw? "for") (at-kw? "while")) (begin (set! depth (+ depth 1)) (advance-tok!) (step))) ((or (at-kw? "in") (at-kw? "end") (at-kw? "done")) (begin (when (> depth 0) (set! depth (- depth 1))) (advance-tok!) (step))) (else (begin (advance-tok!) (step)))))) (step))))) (define parse-decl-let (fn () (advance-tok!) (let ((reccy false) (bindings (list))) (begin (when (at-kw? "rec") (begin (advance-tok!) (set! reccy true))) (define parse-one! (fn () (let ((nm (ocaml-tok-value (consume! "ident" nil))) (ps (list))) (begin (define collect-params (fn () (cond ((check-tok? "ident" nil) (begin (append! ps (ocaml-tok-value (peek-tok))) (advance-tok!) (collect-params))) ((and (at-op? "(") (< (+ idx 1) tok-len) (let ((t1 (nth tokens (+ idx 1)))) (and (= (ocaml-tok-type t1) "op") (= (ocaml-tok-value t1) ")")))) (begin (advance-tok!) (advance-tok!) (append! ps (str "__unit_" idx)) (collect-params))) (else nil)))) (collect-params) ;; Optional type annotation: skip `: TYPE` before `=`. (when (at-op? ":") (begin (advance-tok!) (define skip-tann (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? "=") nil) (else (begin (advance-tok!) (skip-tann)))))) (skip-tann))) (consume! "op" "=") (let ((expr-start (cur-pos))) (begin (skip-let-rhs-boundary!) (let ((expr-src (slice src expr-start (cur-pos)))) (let ((expr (ocaml-parse expr-src))) (append! bindings (list nm ps expr)))))))))) (parse-one!) (define more (fn () (when (at-kw? "and") (begin (advance-tok!) (parse-one!) (more))))) (more) (cond ((= (len bindings) 1) (let ((b (first bindings))) (if reccy (list :def-rec (nth b 0) (nth b 1) (nth b 2)) (list :def (nth b 0) (nth b 1) (nth b 2))))) (else (if reccy (list :def-rec-mut bindings) (list :def-mut bindings)))))))) (define parse-decl-expr (fn () (let ((expr-start (cur-pos))) (begin (skip-to-boundary!) (let ((expr-src (slice src expr-start (cur-pos)))) (let ((expr (ocaml-parse expr-src))) (list :expr expr))))))) ;; module M = struct DECLS end ;; Parsed by sub-tokenising the body source between `struct` and ;; the matching `end`. Nested modules / sigs increment depth. ;; exception NAME [of TYPE [* TYPE]*] (define parse-decl-exception (fn () (advance-tok!) ;; consume 'exception' (let ((name (ocaml-tok-value (consume! "ctor" nil))) (arg-srcs (list))) (begin (when (at-kw? "of") (begin (advance-tok!) (let ((arg-start (cur-pos))) (begin (define skip-type (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? ";;") nil) ((at-kw? "let") nil) ((at-kw? "type") nil) ((at-kw? "and") nil) ((at-kw? "module") nil) ((at-kw? "exception") nil) (else (begin (advance-tok!) (skip-type)))))) (skip-type) (append! arg-srcs (slice src arg-start (cur-pos))))))) (cons :exception-def (cons name arg-srcs)))))) ;; type [PARAMS] NAME = | Ctor [of T1 [* T2]*] | … ;; ;; PARAMS is `'a` or `('a, 'b)` (single or paren-tuple of tyvars). ;; We parse the structure and emit `(:type-def NAME PARAMS CTORS)` ;; where each CTOR is `(NAME ARG-TYPES)` (ARG-TYPES list of source ;; strings — types are treated opaquely at runtime). (define parse-decl-type (fn () (advance-tok!) ;; consume 'type' (let ((tparams (list))) (begin ;; Optional type-vars before the type name. (cond ((= (ocaml-tok-type (peek-tok)) "tyvar") (begin (append! tparams (ocaml-tok-value (peek-tok))) (advance-tok!))) ((at-op? "(") (begin (advance-tok!) (define more (fn () (when (= (ocaml-tok-type (peek-tok)) "tyvar") (begin (append! tparams (ocaml-tok-value (peek-tok))) (advance-tok!) (when (at-op? ",") (begin (advance-tok!) (more))))))) (more) (consume! "op" ")")))) (let ((name (ocaml-tok-value (consume! "ident" nil)))) (begin (consume! "op" "=") (cond ;; Record type: type NAME = { f1 [: T1]; f2 [: T2]; ... } ((at-op? "{") (begin (advance-tok!) (let ((fields (list))) (begin (define field-one (fn () (let ((mut false)) (begin (when (at-kw? "mutable") (begin (advance-tok!) (set! mut true))) (let ((fname (ocaml-tok-value (consume! "ident" nil)))) (begin (when (at-op? ":") (begin (advance-tok!) (define skip-fty (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? ";") nil) ((at-op? "}") nil) (else (begin (advance-tok!) (skip-fty)))))) (skip-fty))) (append! fields (if mut (list :mutable fname) (list fname))))))))) (field-one) (define field-more (fn () (when (at-op? ";") (begin (advance-tok!) (when (not (at-op? "}")) (begin (field-one) (field-more))))))) (field-more) (consume! "op" "}") (list :type-def-record name tparams fields))))) ;; Type alias: type t = int / type t = 'a list / etc. ;; Detected when next token is NOT `|` and NOT a ctor. ((and (not (at-op? "|")) (not (= (ocaml-tok-type (peek-tok)) "ctor"))) (begin ;; Skip the alias source up to the next boundary. (define skip-alias (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? ";;") nil) ((at-kw? "let") nil) ((at-kw? "type") nil) ((at-kw? "and") nil) ((at-kw? "module") nil) ((at-kw? "exception") nil) ((at-kw? "open") nil) ((at-kw? "include") nil) ((at-kw? "end") nil) (else (begin (advance-tok!) (skip-alias)))))) (skip-alias) (list :type-alias name tparams))) (else (begin (when (at-op? "|") (advance-tok!)) ;; Sum type: Ctor [of TYPE [* TYPE]*] (| Ctor …)* (let ((ctors (list))) (begin (define one (fn () (let ((cname (ocaml-tok-value (consume! "ctor" nil))) (arg-srcs (list))) (begin (when (at-kw? "of") (begin (advance-tok!) (let ((arg-start (cur-pos))) (begin (define skip-type (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? "|") nil) ((at-op? ";;") nil) ((at-kw? "let") nil) ((at-kw? "type") nil) ((at-kw? "and") nil) ((at-kw? "module") nil) (else (begin (advance-tok!) (skip-type)))))) (skip-type) (append! arg-srcs (slice src arg-start (cur-pos))))))) (append! ctors (cons cname arg-srcs)))))) (one) (define more (fn () (when (at-op? "|") (begin (advance-tok!) (one) (more))))) (more) (list :type-def name tparams ctors)))))))))))) ;; open M / include M — collect a path Ctor(.SubCtor)* and emit ;; (:open PATH) or (:include PATH). (define parse-decl-open (fn (include?) (advance-tok!) (let ((path-start (cur-pos))) (begin ;; Walk until end of the path. A path is Ctor (. Ctor)*. (define skip-path (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "ctor") (begin (advance-tok!) (skip-path))) ((at-op? ".") (begin (advance-tok!) (skip-path))) (else nil)))) (skip-path) (let ((path-src (slice src path-start (cur-pos)))) (let ((path-expr (ocaml-parse path-src))) (if include? (list :include path-expr) (list :open path-expr)))))))) ;; Parse a `struct DECLS end` body and return the decls list. (define parse-struct-body (fn () (consume! "keyword" "struct") (let ((body-start (cur-pos)) (depth 1)) (begin (define skip (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-kw? "struct") (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) ((at-kw? "begin") (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) ((at-kw? "sig") (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) ((at-kw? "end") (cond ((= depth 1) nil) (else (begin (set! depth (- depth 1)) (advance-tok!) (skip))))) (else (begin (advance-tok!) (skip)))))) (skip) (let ((body-end (cur-pos))) (begin (consume! "keyword" "end") (let ((body-src (slice src body-start body-end))) (let ((body-prog (ocaml-parse-program body-src))) (rest body-prog))))))))) ;; Skip an optional `: Sig` constraint (parens-balanced; we ;; ignore signatures in this iteration). (define skip-optional-sig (fn () (when (at-op? ":") (begin (advance-tok!) (let ((depth 0)) (begin (define skip (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((and (= depth 0) (at-op? ")")) nil) ((and (= depth 0) (at-op? "=")) nil) ((at-op? "(") (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) ((at-kw? "sig") (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) ((at-op? ")") (begin (set! depth (- depth 1)) (advance-tok!) (skip))) ((at-kw? "end") (begin (set! depth (- depth 1)) (advance-tok!) (skip))) (else (begin (advance-tok!) (skip)))))) (skip))))))) ;; module type S = sig ... end ;; Parsed-and-discarded (signatures are type-level only). Returns ;; a (:module-type-def NAME) marker for the eval loop to ignore. (define parse-decl-module-type (fn () (advance-tok!) ;; "type" (let ((name (ocaml-tok-value (consume! "ctor" nil)))) (begin (consume! "op" "=") (cond ((at-kw? "sig") (begin (advance-tok!) (let ((depth 1)) (begin (define skip (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((or (at-kw? "sig") (at-kw? "struct") (at-kw? "begin")) (begin (set! depth (+ depth 1)) (advance-tok!) (skip))) ((at-kw? "end") (cond ((= depth 1) nil) (else (begin (set! depth (- depth 1)) (advance-tok!) (skip))))) (else (begin (advance-tok!) (skip)))))) (skip) (consume! "keyword" "end"))))) (else ;; module type S = AnotherSig — skip-to-boundary. (skip-to-boundary!))) (list :module-type-def name))))) (define parse-decl-module (fn () (advance-tok!) (cond ((at-kw? "type") (parse-decl-module-type)) (else (parse-decl-module-rest))))) (define parse-decl-module-rest (fn () (let ((name (ocaml-tok-value (consume! "ctor" nil))) (params (list))) (begin ;; Functor parameters: `(P)` or `(P : Sig)`, repeated. (define collect-params (fn () (when (at-op? "(") (begin (advance-tok!) (when (= (ocaml-tok-type (peek-tok)) "ctor") (begin (append! params (ocaml-tok-value (peek-tok))) (advance-tok!))) (skip-optional-sig) (consume! "op" ")") (collect-params))))) (collect-params) (skip-optional-sig) (consume! "op" "=") (cond ;; Body is `struct DECLS end` — possibly a functor body. ((at-kw? "struct") (let ((decls (parse-struct-body))) (cond ((= (len params) 0) (list :module-def name decls)) (else (list :functor-def name params decls))))) ;; Body is a path possibly applied: `M`, `M.Sub`, `F(A)`, `F(A)(B)`. (else (let ((body-start (cur-pos))) (begin (define skip-path-app (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "ctor") (begin (advance-tok!) (skip-path-app))) ((at-op? ".") (begin (advance-tok!) (skip-path-app))) ((at-op? "(") ;; Paren-balanced argument list. (let ((d 1)) (begin (advance-tok!) (define skip-args (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? "(") (begin (set! d (+ d 1)) (advance-tok!) (skip-args))) ((at-op? ")") (cond ((= d 1) (begin (advance-tok!) nil)) (else (begin (set! d (- d 1)) (advance-tok!) (skip-args))))) (else (begin (advance-tok!) (skip-args)))))) (skip-args) (skip-path-app)))) (else nil)))) (skip-path-app) (let ((body-src (slice src body-start (cur-pos)))) (list :module-alias name body-src)))))))))) (define loop (fn () (begin (skip-double-semi!) (when (< idx tok-len) (cond ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-kw? "let") (begin (append! decls (parse-decl-let)) (loop))) ((at-kw? "module") (begin (append! decls (parse-decl-module)) (loop))) ((at-kw? "open") (begin (append! decls (parse-decl-open false)) (loop))) ((at-kw? "include") (begin (append! decls (parse-decl-open true)) (loop))) ((at-kw? "type") (begin (append! decls (parse-decl-type)) (loop))) ((at-kw? "exception") (begin (append! decls (parse-decl-exception)) (loop))) (else (begin (append! decls (parse-decl-expr)) (loop)))))))) (loop) (cons :program decls)))))