Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 21s
parse-atom-postfix's '.()' branch now disambiguates between let-open
and array-get based on whether the head is a module path (':con' or
':field' chain rooted in ':con'). Module paths still emit
(:let-open M EXPR); everything else emits (:array-get ARR I).
Eval handles :array-get by reading the cell's underlying list at
index. The '<-' assignment handler now also accepts :array-get lhs
and rewrites the cell with one position changed.
Idiomatic OCaml array code now works:
let a = Array.make 5 0 in
for i = 0 to 4 do a.(i) <- i * i done;
a.(3) + a.(4) = 25
let a = Array.init 4 (fun i -> i + 1) in
a.(0) + a.(1) + a.(2) + a.(3) = 10
List.(length [1;2;3]) = 3 (* unchanged: List is a module *)
1669 lines
71 KiB
Plaintext
1669 lines
71 KiB
Plaintext
;; lib/ocaml/parser.sx — OCaml expression parser.
|
|
;;
|
|
;; Input: token list from (ocaml-tokenize src).
|
|
;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head;
|
|
;; keywords serialize to their string name so `(list :var "x")` is the
|
|
;; same value as `(list "var" "x")` at runtime.
|
|
;;
|
|
;; Expression scope:
|
|
;; atoms int/float/string/char/bool, unit (), var, con, list literal
|
|
;; application left-associative, f x y z
|
|
;; prefix -E unary minus, not E
|
|
;; infix 29 ops via lib/guest/pratt.sx
|
|
;; tuple a, b, c (lower than infix, higher than let/if)
|
|
;; parens (e)
|
|
;; if if c then t else e (else optional → unit)
|
|
;; fun fun x y -> body
|
|
;; let let x = e in body (no rec, function shorthand)
|
|
;; let rec f x = e in body
|
|
;; match match e with [|] p -> body | p -> body | ...
|
|
;; sequence e1 ; e2 → (:seq e1 e2 …) (lowest-precedence binary)
|
|
;;
|
|
;; Pattern scope:
|
|
;; _ (wildcard), int/string/char/bool literals, ident (var binding),
|
|
;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…),
|
|
;; list literal [pat;pat;…], cons p1 :: p2.
|
|
;;
|
|
;; AST shapes:
|
|
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
|
|
;; (:var NAME) (:con NAME)
|
|
;; (:app FN ARG)
|
|
;; (:op OP LHS RHS)
|
|
;; (:neg E) (:not E)
|
|
;; (:tuple ITEMS) (:list ITEMS)
|
|
;; (:seq EXPRS)
|
|
;; (:if C T E)
|
|
;; (:fun PARAMS BODY)
|
|
;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY)
|
|
;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...)
|
|
;;
|
|
;; (:pwild) (:pvar N) (:plit LIT)
|
|
;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary
|
|
;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL)
|
|
|
|
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
|
|
|
|
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
|
|
|
|
(define
|
|
ocaml-op-table
|
|
(list
|
|
(list ":=" 1 :right)
|
|
(list "<-" 1 :right)
|
|
(list "||" 2 :right)
|
|
(list "or" 2 :right)
|
|
(list "&&" 3 :right)
|
|
(list "&" 3 :right)
|
|
(list "=" 4 :left)
|
|
(list "<" 4 :left)
|
|
(list ">" 4 :left)
|
|
(list "<=" 4 :left)
|
|
(list ">=" 4 :left)
|
|
(list "<>" 4 :left)
|
|
(list "==" 4 :left)
|
|
(list "!=" 4 :left)
|
|
(list "|>" 4 :left)
|
|
(list "@" 5 :right)
|
|
(list "^" 5 :right)
|
|
(list "::" 6 :right)
|
|
(list "+" 7 :left)
|
|
(list "-" 7 :left)
|
|
(list "+." 7 :left)
|
|
(list "-." 7 :left)
|
|
(list "*" 8 :left)
|
|
(list "/" 8 :left)
|
|
(list "*." 8 :left)
|
|
(list "/." 8 :left)
|
|
(list "%" 8 :left)
|
|
(list "mod" 8 :left)
|
|
(list "land" 8 :left)
|
|
(list "lor" 8 :left)
|
|
(list "lxor" 8 :left)
|
|
(list "**" 9 :right)
|
|
(list "lsl" 9 :right)
|
|
(list "lsr" 9 :right)
|
|
(list "asr" 9 :right)))
|
|
|
|
(define
|
|
ocaml-binop-prec
|
|
(fn
|
|
(op)
|
|
(let
|
|
((entry (pratt-op-lookup ocaml-op-table op)))
|
|
(if (= entry nil) 0 (pratt-op-prec entry)))))
|
|
|
|
(define
|
|
ocaml-binop-right?
|
|
(fn
|
|
(op)
|
|
(let
|
|
((entry (pratt-op-lookup ocaml-op-table op)))
|
|
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
|
|
|
|
(define
|
|
ocaml-tok-is-binop?
|
|
(fn
|
|
(tok)
|
|
(let
|
|
((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok)))
|
|
(cond
|
|
((= tt "op") (not (= (ocaml-binop-prec tv) 0)))
|
|
((= tt "keyword") (not (= (ocaml-binop-prec tv) 0)))
|
|
(else false)))))
|
|
|
|
(define
|
|
ocaml-parse
|
|
(fn
|
|
(src)
|
|
(let
|
|
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0))
|
|
(begin
|
|
(set! tok-len (len tokens))
|
|
(define peek-tok (fn () (nth tokens idx)))
|
|
(define advance-tok! (fn () (set! idx (+ idx 1))))
|
|
(define
|
|
check-tok?
|
|
(fn
|
|
(type value)
|
|
(let
|
|
((t (peek-tok)))
|
|
(and
|
|
(= (ocaml-tok-type t) type)
|
|
(or (= value nil) (= (ocaml-tok-value t) value))))))
|
|
(define
|
|
consume!
|
|
(fn
|
|
(type value)
|
|
(if
|
|
(check-tok? type value)
|
|
(let ((t (peek-tok))) (begin (advance-tok!) t))
|
|
(error
|
|
(str
|
|
"ocaml-parse: expected "
|
|
type
|
|
" "
|
|
value
|
|
" got "
|
|
(ocaml-tok-type (peek-tok))
|
|
" "
|
|
(ocaml-tok-value (peek-tok)))))))
|
|
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
|
|
(define at-op? (fn (op) (check-tok? "op" op)))
|
|
(define parse-pattern (fn () nil))
|
|
(define parse-pattern-cons (fn () nil))
|
|
(define parse-pattern-app (fn () nil))
|
|
(define parse-pattern-atom (fn () nil))
|
|
(define
|
|
at-pattern-atom?
|
|
(fn
|
|
()
|
|
(let
|
|
((tt (ocaml-tok-type (peek-tok)))
|
|
(tv (ocaml-tok-value (peek-tok))))
|
|
(cond
|
|
((= tt "number") true)
|
|
((= tt "string") true)
|
|
((= tt "char") true)
|
|
((= tt "ident") true)
|
|
((= tt "ctor") true)
|
|
((and (= tt "keyword") (or (= tv "true") (= tv "false")))
|
|
true)
|
|
((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{") (= tv "!"))) true)
|
|
(else false)))))
|
|
(set!
|
|
parse-pattern-atom
|
|
(fn
|
|
()
|
|
(let
|
|
((tt (ocaml-tok-type (peek-tok)))
|
|
(tv (ocaml-tok-value (peek-tok))))
|
|
(cond
|
|
((= tt "number")
|
|
(begin
|
|
(advance-tok!)
|
|
(if
|
|
(= (round tv) tv)
|
|
(list :plit (list :int tv))
|
|
(list :plit (list :float tv)))))
|
|
((= tt "string")
|
|
(begin (advance-tok!) (list :plit (list :string tv))))
|
|
((= tt "char")
|
|
(begin (advance-tok!) (list :plit (list :char tv))))
|
|
((and (= tt "keyword") (= tv "true"))
|
|
(begin (advance-tok!) (list :plit (list :bool true))))
|
|
((and (= tt "keyword") (= tv "false"))
|
|
(begin (advance-tok!) (list :plit (list :bool false))))
|
|
((and (= tt "ident") (= tv "_"))
|
|
(begin (advance-tok!) (list :pwild)))
|
|
((= tt "ident") (begin (advance-tok!) (list :pvar tv)))
|
|
((= tt "ctor") (begin (advance-tok!) (list :pcon tv)))
|
|
((and (= tt "op") (= tv "("))
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((at-op? ")")
|
|
(begin (advance-tok!) (list :plit (list :unit))))
|
|
(else
|
|
(let
|
|
((first (parse-pattern)))
|
|
(cond
|
|
((at-op? ",")
|
|
(let
|
|
((items (list first)))
|
|
(begin
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-op? ",")
|
|
(begin
|
|
(advance-tok!)
|
|
(append! items (parse-pattern))
|
|
(loop)))))
|
|
(loop)
|
|
(consume! "op" ")")
|
|
(cons :ptuple items))))
|
|
;; Parens-only or-pattern: (P1 | P2 | ...).
|
|
((at-op? "|")
|
|
(let ((alts (list first)))
|
|
(begin
|
|
(define loop-or
|
|
(fn ()
|
|
(when (at-op? "|")
|
|
(begin (advance-tok!)
|
|
(append! alts (parse-pattern))
|
|
(loop-or)))))
|
|
(loop-or)
|
|
(consume! "op" ")")
|
|
(cons :por alts))))
|
|
(else (begin (consume! "op" ")") first))))))))
|
|
((and (= tt "op") (= tv "["))
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((at-op? "]") (begin (advance-tok!) (list :plist)))
|
|
(else
|
|
(let
|
|
((items (list)))
|
|
(begin
|
|
(append! items (parse-pattern))
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-op? ";")
|
|
(begin
|
|
(advance-tok!)
|
|
(when
|
|
(not (at-op? "]"))
|
|
(begin
|
|
(append! items (parse-pattern))
|
|
(loop)))))))
|
|
(loop)
|
|
(consume! "op" "]")
|
|
(cons :plist items)))))))
|
|
((and (= tt "op") (= tv "{"))
|
|
;; Record pattern: { f1 = pat1; f2 = pat2; ... }
|
|
(begin
|
|
(advance-tok!)
|
|
(let ((fields (list)))
|
|
(begin
|
|
(define one
|
|
(fn ()
|
|
(let ((fname (ocaml-tok-value (consume! "ident" nil))))
|
|
(begin
|
|
(consume! "op" "=")
|
|
(let ((fp (parse-pattern)))
|
|
(append! fields (list fname fp)))))))
|
|
(one)
|
|
(define more
|
|
(fn ()
|
|
(when (at-op? ";")
|
|
(begin (advance-tok!)
|
|
(when (not (at-op? "}"))
|
|
(begin (one) (more)))))))
|
|
(more)
|
|
(consume! "op" "}")
|
|
(cons :precord fields)))))
|
|
(else
|
|
(error
|
|
(str
|
|
"ocaml-parse: unexpected pattern token "
|
|
tt
|
|
" "
|
|
tv
|
|
" at idx "
|
|
idx)))))))
|
|
(set!
|
|
parse-pattern-app
|
|
(fn
|
|
()
|
|
(let
|
|
((head (parse-pattern-atom)))
|
|
(cond
|
|
((and (= (nth head 0) :pcon) (at-pattern-atom?))
|
|
(let
|
|
((arg (parse-pattern-atom)))
|
|
(let
|
|
((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg)))))
|
|
(concat (list :pcon (nth head 1)) args))))
|
|
(else head)))))
|
|
(set!
|
|
parse-pattern-cons
|
|
(fn
|
|
()
|
|
(let
|
|
((lhs (parse-pattern-app)))
|
|
(cond
|
|
((at-op? "::")
|
|
(begin
|
|
(advance-tok!)
|
|
(list :pcons lhs (parse-pattern-cons))))
|
|
(else lhs)))))
|
|
;; Top-level pattern is the cons-pat layer wrapped with optional
|
|
;; `pat as name` aliasing. Or-patterns are not supported at the
|
|
;; top level due to ambiguity with the match clause separator;
|
|
;; use `(A | B)` if needed in the future via a parens-only or.
|
|
(set! parse-pattern
|
|
(fn ()
|
|
(let ((p (parse-pattern-cons)))
|
|
(cond
|
|
((at-kw? "as")
|
|
(begin
|
|
(advance-tok!)
|
|
(let ((n (ocaml-tok-value (consume! "ident" nil))))
|
|
(list :pas p n))))
|
|
(else p)))))
|
|
|
|
(define peek-tok-at
|
|
(fn (n)
|
|
(if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil)))
|
|
|
|
;; Param consumption — matches ident, `_` (wildcard), or `()`
|
|
;; (unit). Returns a fresh ident name or nil if no param at cursor.
|
|
(define wild-counter (list 0))
|
|
(define try-consume-param!
|
|
(fn ()
|
|
(cond
|
|
((and (check-tok? "ident" nil)
|
|
(= (ocaml-tok-value (peek-tok)) "_"))
|
|
(begin
|
|
(advance-tok!)
|
|
(set-nth! wild-counter 0 (+ (nth wild-counter 0) 1))
|
|
(str "__wild_" (nth wild-counter 0))))
|
|
((check-tok? "ident" nil)
|
|
(let ((nm (ocaml-tok-value (peek-tok))))
|
|
(begin (advance-tok!) nm)))
|
|
((and (at-op? "(") (= (ocaml-tok-value (peek-tok-at 1)) ")"))
|
|
(begin
|
|
(advance-tok!) (advance-tok!)
|
|
(set-nth! wild-counter 0 (+ (nth wild-counter 0) 1))
|
|
(str "__unit_" (nth wild-counter 0))))
|
|
((and (at-op? "(") (= (ocaml-tok-type (peek-tok-at 1)) "ident"))
|
|
;; (x : T) — typed param. Skip the `: T` part.
|
|
(let ((nm (ocaml-tok-value (peek-tok-at 1))))
|
|
(begin
|
|
(advance-tok!) (advance-tok!)
|
|
(when (at-op? ":")
|
|
(begin
|
|
;; Skip until matching `)`.
|
|
(let ((d 1))
|
|
(begin
|
|
(define skip
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((at-op? "(")
|
|
(begin (set! d (+ d 1)) (advance-tok!) (skip)))
|
|
((at-op? ")")
|
|
(cond
|
|
((= d 1) nil)
|
|
(else (begin (set! d (- d 1)) (advance-tok!) (skip)))))
|
|
(else (begin (advance-tok!) (skip))))))
|
|
(skip)))))
|
|
(consume! "op" ")")
|
|
nm)))
|
|
(else nil))))
|
|
(define parse-expr (fn () nil))
|
|
(define parse-expr-no-seq (fn () nil))
|
|
(define parse-tuple (fn () nil))
|
|
(define parse-binop-rhs (fn (lhs min-prec) lhs))
|
|
(define parse-prefix (fn () nil))
|
|
(define parse-app (fn () nil))
|
|
(define parse-atom (fn () nil))
|
|
(set!
|
|
parse-atom
|
|
(fn
|
|
()
|
|
(let
|
|
((t (peek-tok))
|
|
(tt (ocaml-tok-type (peek-tok)))
|
|
(tv (ocaml-tok-value (peek-tok))))
|
|
(cond
|
|
((= tt "number")
|
|
(begin
|
|
(advance-tok!)
|
|
(if (= (round tv) tv) (list :int tv) (list :float tv))))
|
|
((= tt "string") (begin (advance-tok!) (list :string tv)))
|
|
((= tt "char") (begin (advance-tok!) (list :char tv)))
|
|
((and (= tt "keyword") (= tv "true"))
|
|
(begin (advance-tok!) (list :bool true)))
|
|
((and (= tt "keyword") (= tv "false"))
|
|
(begin (advance-tok!) (list :bool false)))
|
|
((= tt "ident") (begin (advance-tok!) (list :var tv)))
|
|
((= tt "ctor") (begin (advance-tok!) (list :con tv)))
|
|
((and (= tt "op") (= tv "("))
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((at-op? ")") (begin (advance-tok!) (list :unit)))
|
|
;; (op) — operator section: build (fun a b -> a op b).
|
|
;; Recognises ops with precedence > 0 (i.e. binops),
|
|
;; followed immediately by `)`.
|
|
((and (or (= (ocaml-tok-type (peek-tok)) "op")
|
|
(= (ocaml-tok-type (peek-tok)) "keyword"))
|
|
(not (= (ocaml-binop-prec
|
|
(ocaml-tok-value (peek-tok))) 0))
|
|
(let ((t1 (nth tokens (+ idx 1))))
|
|
(and (= (ocaml-tok-type t1) "op")
|
|
(= (ocaml-tok-value t1) ")"))))
|
|
(let ((opv (ocaml-tok-value (peek-tok))))
|
|
(begin
|
|
(advance-tok!)
|
|
(advance-tok!)
|
|
(list :fun (list "a" "b")
|
|
(list :op opv (list :var "a")
|
|
(list :var "b"))))))
|
|
(else
|
|
(let
|
|
((e (parse-expr)))
|
|
(begin
|
|
;; Optional type annotation `(e : T)` — skip
|
|
;; the type source before `)`.
|
|
(when (at-op? ":")
|
|
(begin
|
|
(advance-tok!)
|
|
(define skip-pty
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-op? ")") nil)
|
|
(else (begin (advance-tok!) (skip-pty))))))
|
|
(skip-pty)))
|
|
(consume! "op" ")") e))))))
|
|
((and (= tt "op") (= tv "["))
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((at-op? "]") (begin (advance-tok!) (list :list)))
|
|
(else
|
|
(let
|
|
((items (list)))
|
|
(begin
|
|
(append! items (parse-expr-no-seq))
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-op? ";")
|
|
(begin
|
|
(advance-tok!)
|
|
(when
|
|
(not (at-op? "]"))
|
|
(begin
|
|
(append! items (parse-expr-no-seq))
|
|
(loop)))))))
|
|
(loop)
|
|
(consume! "op" "]")
|
|
(cons :list items)))))))
|
|
((at-kw? "begin")
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((e (parse-expr)))
|
|
(begin (consume! "keyword" "end") e))))
|
|
;; Record literal { f1 = e1; f2 = e2 } or update
|
|
;; { r with f1 = e1; f2 = e2 }.
|
|
((and (= tt "op") (= tv "{"))
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
;; { r with field = expr; ... } — base ident + with.
|
|
((and (= (ocaml-tok-type (peek-tok)) "ident")
|
|
(= (ocaml-tok-value (peek-tok-at 1)) "with"))
|
|
(let ((base-name (ocaml-tok-value (peek-tok))))
|
|
(begin
|
|
(advance-tok!) ;; ident
|
|
(advance-tok!) ;; with
|
|
(let ((fields (list)))
|
|
(begin
|
|
(define one
|
|
(fn ()
|
|
(let ((fname (ocaml-tok-value (consume! "ident" nil))))
|
|
(begin
|
|
(consume! "op" "=")
|
|
(let ((fexpr (parse-expr-no-seq)))
|
|
(append! fields (list fname fexpr)))))))
|
|
(one)
|
|
(define more
|
|
(fn ()
|
|
(when (at-op? ";")
|
|
(begin (advance-tok!)
|
|
(when (not (at-op? "}"))
|
|
(begin (one) (more)))))))
|
|
(more)
|
|
(consume! "op" "}")
|
|
(cons :record-update
|
|
(cons (list :var base-name) fields)))))))
|
|
(else
|
|
;; Plain record literal { f = e; f = e; ... }.
|
|
(let ((fields (list)))
|
|
(begin
|
|
(define one
|
|
(fn ()
|
|
(let ((fname (ocaml-tok-value (consume! "ident" nil))))
|
|
(begin
|
|
(consume! "op" "=")
|
|
(let ((fexpr (parse-expr-no-seq)))
|
|
(append! fields (list fname fexpr)))))))
|
|
(one)
|
|
(define more
|
|
(fn ()
|
|
(when (at-op? ";")
|
|
(begin (advance-tok!)
|
|
(when (not (at-op? "}"))
|
|
(begin (one) (more)))))))
|
|
(more)
|
|
(consume! "op" "}")
|
|
(cons :record fields)))))))
|
|
(else
|
|
(error
|
|
(str
|
|
"ocaml-parse: unexpected token "
|
|
tt
|
|
" "
|
|
tv
|
|
" at idx "
|
|
idx)))))))
|
|
(define
|
|
at-app-start?
|
|
(fn
|
|
()
|
|
(let
|
|
((tt (ocaml-tok-type (peek-tok)))
|
|
(tv (ocaml-tok-value (peek-tok))))
|
|
(cond
|
|
((= tt "number") true)
|
|
((= tt "string") true)
|
|
((= tt "char") true)
|
|
((= tt "ident") true)
|
|
((= tt "ctor") true)
|
|
((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin")))
|
|
true)
|
|
((and (= tt "op") (or (= tv "(") (= tv "[") (= tv "{") (= tv "!"))) true)
|
|
(else false)))))
|
|
(define parse-atom-postfix
|
|
(fn ()
|
|
;; After a primary atom, consume `.field` chains. Field name
|
|
;; may be lower (record field, module value) or upper (module
|
|
;; or constructor reference). Note: `M.x.y` is left-assoc:
|
|
;; `(:field (:field M "x") "y")`.
|
|
(let ((head (parse-atom)))
|
|
(begin
|
|
;; Module-path detector: head is :con, or :field chain
|
|
;; whose innermost subject is :con. Used to choose between
|
|
;; (:let-open M EXPR) and (:array-get ARR I) after `.(`.
|
|
(define is-module-path?
|
|
(fn (h)
|
|
(cond
|
|
((not (list? h)) false)
|
|
((= (first h) "con") true)
|
|
((= (first h) "field") (is-module-path? (nth h 1)))
|
|
(else false))))
|
|
(define loop
|
|
(fn ()
|
|
(when (at-op? ".")
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((at-op? "(")
|
|
(begin
|
|
(advance-tok!)
|
|
(let ((inner (parse-expr)))
|
|
(begin
|
|
(consume! "op" ")")
|
|
(set! head
|
|
(if (is-module-path? head)
|
|
(list :let-open head inner)
|
|
(list :array-get head inner)))
|
|
(loop)))))
|
|
((at-op? "[")
|
|
(begin
|
|
(advance-tok!)
|
|
(let ((idx-expr (parse-expr)))
|
|
(begin
|
|
(consume! "op" "]")
|
|
(set! head (list :string-get head idx-expr))
|
|
(loop)))))
|
|
(else
|
|
(let ((tok (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(set! head (list :field head
|
|
(ocaml-tok-value tok)))
|
|
(loop)))))))))
|
|
(loop)
|
|
head))))
|
|
(set!
|
|
parse-app
|
|
(fn
|
|
()
|
|
(let
|
|
((head (parse-atom-postfix)))
|
|
(begin
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-app-start?)
|
|
(let
|
|
((arg
|
|
(cond
|
|
((at-op? "!")
|
|
(begin (advance-tok!)
|
|
(list :deref (parse-atom-postfix))))
|
|
(else (parse-atom-postfix)))))
|
|
(begin (set! head (list :app head arg)) (loop))))))
|
|
(loop)
|
|
head))))
|
|
(set!
|
|
parse-prefix
|
|
(fn
|
|
()
|
|
(cond
|
|
((at-op? "-")
|
|
(begin (advance-tok!) (list :neg (parse-prefix))))
|
|
((at-op? "!")
|
|
(begin (advance-tok!) (list :deref (parse-prefix))))
|
|
((at-kw? "not")
|
|
(begin (advance-tok!) (list :not (parse-prefix))))
|
|
((at-kw? "assert")
|
|
(begin (advance-tok!) (list :assert (parse-prefix))))
|
|
((at-kw? "lazy")
|
|
(begin (advance-tok!) (list :lazy (parse-prefix))))
|
|
(else (parse-app)))))
|
|
(set!
|
|
parse-binop-rhs
|
|
(fn
|
|
(lhs min-prec)
|
|
(let
|
|
((tok (peek-tok)))
|
|
(cond
|
|
((not (ocaml-tok-is-binop? tok)) lhs)
|
|
(else
|
|
(let
|
|
((op (ocaml-tok-value tok))
|
|
(prec (ocaml-binop-prec (ocaml-tok-value tok))))
|
|
(cond
|
|
((< prec min-prec) lhs)
|
|
(else
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((rhs (parse-prefix))
|
|
(next-min
|
|
(if
|
|
(ocaml-binop-right? op)
|
|
prec
|
|
(+ prec 1))))
|
|
(begin
|
|
(set! rhs (parse-binop-rhs rhs next-min))
|
|
(parse-binop-rhs (list :op op lhs rhs) min-prec))))))))))))
|
|
(define
|
|
parse-binary
|
|
(fn
|
|
()
|
|
(let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1))))
|
|
(set!
|
|
parse-tuple
|
|
(fn
|
|
()
|
|
(let
|
|
((first (parse-binary)))
|
|
(cond
|
|
((at-op? ",")
|
|
(let
|
|
((items (list first)))
|
|
(begin
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-op? ",")
|
|
(begin
|
|
(advance-tok!)
|
|
(append! items (parse-binary))
|
|
(loop)))))
|
|
(loop)
|
|
(cons :tuple items))))
|
|
(else first)))))
|
|
(define
|
|
parse-fun
|
|
(fn
|
|
()
|
|
(let
|
|
((params (list)))
|
|
(begin
|
|
(define
|
|
collect-params
|
|
(fn ()
|
|
(let ((nm (try-consume-param!)))
|
|
(when (not (= nm nil))
|
|
(begin (append! params nm) (collect-params))))))
|
|
(collect-params)
|
|
(when
|
|
(= (len params) 0)
|
|
(error "ocaml-parse: fun expects at least one parameter"))
|
|
(consume! "op" "->")
|
|
(let ((body (parse-expr))) (list :fun params body))))))
|
|
(define
|
|
parse-let
|
|
(fn ()
|
|
;; `let open M in body` — local open. Detect early so the
|
|
;; rest of the let-handler doesn't try to parse `open` as
|
|
;; an ident name.
|
|
(cond
|
|
((at-kw? "open")
|
|
(begin
|
|
(advance-tok!)
|
|
;; Read path as Ctor(.Ctor)* and build :field-chain AST.
|
|
(let ((path nil))
|
|
(begin
|
|
(when (= (ocaml-tok-type (peek-tok)) "ctor")
|
|
(begin
|
|
(set! path (list :con (ocaml-tok-value (peek-tok))))
|
|
(advance-tok!)))
|
|
(define more
|
|
(fn ()
|
|
(when (and (at-op? ".")
|
|
(= (ocaml-tok-type
|
|
(nth tokens (+ idx 1))) "ctor"))
|
|
(begin
|
|
(advance-tok!) ;; .
|
|
(let ((nm (ocaml-tok-value (peek-tok))))
|
|
(begin
|
|
(advance-tok!)
|
|
(set! path (list :field path nm))))
|
|
(more)))))
|
|
(more)
|
|
(consume! "keyword" "in")
|
|
(let ((body (parse-expr)))
|
|
(list :let-open path body))))))
|
|
(else
|
|
(let ((reccy false) (bindings (list)))
|
|
(begin
|
|
(when (at-kw? "rec")
|
|
(begin (advance-tok!) (set! reccy true)))
|
|
(define parse-one!
|
|
(fn ()
|
|
(let ((nm (ocaml-tok-value (consume! "ident" nil)))
|
|
(ps (list)))
|
|
(begin
|
|
(define collect-params
|
|
(fn ()
|
|
(let ((p (try-consume-param!)))
|
|
(when (not (= p nil))
|
|
(begin (append! ps p) (collect-params))))))
|
|
(collect-params)
|
|
;; Optional type annotation: skip `: TYPE` before `=`.
|
|
(when (at-op? ":")
|
|
(begin
|
|
(advance-tok!)
|
|
(define skip-tann
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-op? "=") nil)
|
|
(else (begin (advance-tok!) (skip-tann))))))
|
|
(skip-tann)))
|
|
(consume! "op" "=")
|
|
(let ((rhs (parse-expr)))
|
|
(append! bindings (list nm ps rhs)))))))
|
|
(parse-one!)
|
|
(define more
|
|
(fn ()
|
|
(when (at-kw? "and")
|
|
(begin (advance-tok!) (parse-one!) (more)))))
|
|
(more)
|
|
(consume! "keyword" "in")
|
|
(let ((body (parse-expr)))
|
|
(cond
|
|
((= (len bindings) 1)
|
|
(let ((b (first bindings)))
|
|
(if reccy
|
|
(list :let-rec (nth b 0) (nth b 1) (nth b 2) body)
|
|
(list :let (nth b 0) (nth b 1) (nth b 2) body))))
|
|
(else
|
|
(if reccy
|
|
(list :let-rec-mut bindings body)
|
|
(list :let-mut bindings body)))))))))))
|
|
(define
|
|
parse-if
|
|
(fn
|
|
()
|
|
(let
|
|
((cond-expr (parse-expr-no-seq)))
|
|
(begin
|
|
(consume! "keyword" "then")
|
|
(let
|
|
((then-expr (parse-expr-no-seq)))
|
|
(cond
|
|
((at-kw? "else")
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((else-expr (parse-expr-no-seq)))
|
|
(list :if cond-expr then-expr else-expr))))
|
|
(else (list :if cond-expr then-expr (list :unit)))))))))
|
|
(define
|
|
parse-match
|
|
(fn
|
|
()
|
|
(let
|
|
((scrut (parse-expr-no-seq)))
|
|
(begin
|
|
(consume! "keyword" "with")
|
|
(when (at-op? "|") (advance-tok!))
|
|
(let
|
|
((cases (list)))
|
|
(begin
|
|
(define
|
|
one
|
|
(fn
|
|
()
|
|
(let
|
|
((p (parse-pattern)) (guard nil))
|
|
(begin
|
|
(when (at-kw? "when")
|
|
(begin
|
|
(advance-tok!)
|
|
(set! guard (parse-expr-no-seq))))
|
|
(consume! "op" "->")
|
|
(let
|
|
((body (parse-expr)))
|
|
(cond
|
|
((= guard nil)
|
|
(append! cases (list :case p body)))
|
|
(else
|
|
(append! cases (list :case-when p guard body)))))))))
|
|
(one)
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-op? "|")
|
|
(begin (advance-tok!) (one) (loop)))))
|
|
(loop)
|
|
(cons :match (cons scrut (list cases)))))))))
|
|
(define parse-try
|
|
(fn ()
|
|
(let ((expr (parse-expr-no-seq)))
|
|
(begin
|
|
(consume! "keyword" "with")
|
|
(when (at-op? "|") (advance-tok!))
|
|
(let ((cases (list)))
|
|
(begin
|
|
(define one
|
|
(fn ()
|
|
(let ((p (parse-pattern)) (guard nil))
|
|
(begin
|
|
(when (at-kw? "when")
|
|
(begin (advance-tok!)
|
|
(set! guard (parse-expr-no-seq))))
|
|
(consume! "op" "->")
|
|
(let ((body (parse-expr)))
|
|
(cond
|
|
((= guard nil)
|
|
(append! cases (list :case p body)))
|
|
(else
|
|
(append! cases (list :case-when p guard body)))))))))
|
|
(one)
|
|
(define loop
|
|
(fn ()
|
|
(when (at-op? "|")
|
|
(begin (advance-tok!) (one) (loop)))))
|
|
(loop)
|
|
(list :try expr cases)))))))
|
|
(define parse-function
|
|
(fn ()
|
|
;; `function | pat [when GUARD] -> body | …`
|
|
(let ()
|
|
(begin
|
|
(when (at-op? "|") (advance-tok!))
|
|
(let ((cases (list)))
|
|
(begin
|
|
(define one
|
|
(fn ()
|
|
(let ((p (parse-pattern)) (guard nil))
|
|
(begin
|
|
(when (at-kw? "when")
|
|
(begin (advance-tok!)
|
|
(set! guard (parse-expr-no-seq))))
|
|
(consume! "op" "->")
|
|
(let ((body (parse-expr)))
|
|
(cond
|
|
((= guard nil)
|
|
(append! cases (list :case p body)))
|
|
(else
|
|
(append! cases (list :case-when p guard body)))))))))
|
|
(one)
|
|
(define loop
|
|
(fn ()
|
|
(when (at-op? "|")
|
|
(begin (advance-tok!) (one) (loop)))))
|
|
(loop)
|
|
(list :function cases)))))))
|
|
(define parse-for
|
|
(fn ()
|
|
(let ((name (ocaml-tok-value (consume! "ident" nil))))
|
|
(begin
|
|
(consume! "op" "=")
|
|
(let ((lo (parse-expr-no-seq)))
|
|
(let ((dir
|
|
(cond
|
|
((at-kw? "to") (begin (advance-tok!) :ascend))
|
|
((at-kw? "downto") (begin (advance-tok!) :descend))
|
|
(else (error "ocaml-parse: expected to/downto in for")))))
|
|
(let ((hi (parse-expr-no-seq)))
|
|
(begin
|
|
(consume! "keyword" "do")
|
|
(let ((body (parse-expr)))
|
|
(begin
|
|
(consume! "keyword" "done")
|
|
(list :for name lo hi dir body)))))))))))
|
|
(define parse-while
|
|
(fn ()
|
|
(let ((cond-expr (parse-expr-no-seq)))
|
|
(begin
|
|
(consume! "keyword" "do")
|
|
(let ((body (parse-expr)))
|
|
(begin
|
|
(consume! "keyword" "done")
|
|
(list :while cond-expr body)))))))
|
|
(set!
|
|
parse-expr-no-seq
|
|
(fn
|
|
()
|
|
(cond
|
|
((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
|
|
((at-kw? "let") (begin (advance-tok!) (parse-let)))
|
|
((at-kw? "if") (begin (advance-tok!) (parse-if)))
|
|
((at-kw? "match") (begin (advance-tok!) (parse-match)))
|
|
((at-kw? "function") (begin (advance-tok!) (parse-function)))
|
|
((at-kw? "for") (begin (advance-tok!) (parse-for)))
|
|
((at-kw? "while") (begin (advance-tok!) (parse-while)))
|
|
((at-kw? "try") (begin (advance-tok!) (parse-try)))
|
|
(else (parse-tuple)))))
|
|
(set!
|
|
parse-expr
|
|
(fn
|
|
()
|
|
(let
|
|
((lhs (parse-expr-no-seq)))
|
|
(cond
|
|
((at-op? ";")
|
|
(let
|
|
((items (list lhs)))
|
|
(begin
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-op? ";")
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((at-kw? "end") nil)
|
|
((at-op? ")") nil)
|
|
((at-op? "|") nil)
|
|
((at-kw? "in") nil)
|
|
((at-kw? "then") nil)
|
|
((at-kw? "else") nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
(else
|
|
(begin
|
|
(append! items (parse-expr-no-seq))
|
|
(loop))))))))
|
|
(loop)
|
|
(cons :seq items))))
|
|
(else lhs)))))
|
|
(let
|
|
((result (parse-expr)))
|
|
(begin
|
|
(when
|
|
(not (= (ocaml-tok-type (peek-tok)) "eof"))
|
|
(error
|
|
(str
|
|
"ocaml-parse: trailing tokens at idx "
|
|
idx
|
|
" — got "
|
|
(ocaml-tok-type (peek-tok))
|
|
" "
|
|
(ocaml-tok-value (peek-tok)))))
|
|
result))))))
|
|
|
|
(define
|
|
ocaml-parse-program
|
|
(fn
|
|
(src)
|
|
(let
|
|
((tokens (ocaml-tokenize src))
|
|
(idx 0)
|
|
(tok-len 0)
|
|
(decls (list)))
|
|
(begin
|
|
(set! tok-len (len tokens))
|
|
(define peek-tok (fn () (nth tokens idx)))
|
|
(define advance-tok! (fn () (set! idx (+ idx 1))))
|
|
(define
|
|
check-tok?
|
|
(fn
|
|
(type value)
|
|
(let
|
|
((t (peek-tok)))
|
|
(and
|
|
(= (ocaml-tok-type t) type)
|
|
(or (= value nil) (= (ocaml-tok-value t) value))))))
|
|
(define
|
|
consume!
|
|
(fn
|
|
(type value)
|
|
(if
|
|
(check-tok? type value)
|
|
(let ((t (peek-tok))) (begin (advance-tok!) t))
|
|
(error
|
|
(str
|
|
"ocaml-parse-program: expected "
|
|
type
|
|
" "
|
|
value
|
|
" got "
|
|
(ocaml-tok-type (peek-tok))
|
|
" "
|
|
(ocaml-tok-value (peek-tok)))))))
|
|
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
|
|
(define at-op? (fn (op) (check-tok? "op" op)))
|
|
(define
|
|
skip-double-semi!
|
|
(fn
|
|
()
|
|
(when (at-op? ";;") (begin (advance-tok!) (skip-double-semi!)))))
|
|
(define
|
|
cur-pos
|
|
(fn
|
|
()
|
|
(let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos)))))
|
|
;; Two flavors of boundary skipping:
|
|
;;
|
|
;; * `skip-to-decl-boundary!` — used by parse-decl-expr. Stops
|
|
;; at the start of the next top-level decl: ;;, let, module,
|
|
;; open, include, and, type, exception, or eof.
|
|
;;
|
|
;; * `skip-let-rhs-boundary!` — used inside parse-decl-let after
|
|
;; the `=`. Treats `let` as the opener of a nested let..in
|
|
;; block (NOT a decl boundary), so `let f x = let y = 0 in y`
|
|
;; parses correctly. Boundary tokens (depth 0): ;;, module,
|
|
;; open, include, and, type, exception, or eof.
|
|
;; Lookahead: starting just past a `let` at the cursor, scan
|
|
;; for a matching `in` before the next decl boundary. Returns
|
|
;; true iff such an `in` exists — meaning the let is nested,
|
|
;; not a new decl.
|
|
(define has-matching-in?
|
|
(fn ()
|
|
(let ((p (+ idx 1)) (d 1) (result false) (done false))
|
|
(begin
|
|
(define scan
|
|
(fn ()
|
|
(when (not done)
|
|
(cond
|
|
((>= p tok-len) (set! done true))
|
|
(else
|
|
(let ((t (nth tokens p)))
|
|
(let ((tt (ocaml-tok-type t)) (tv (ocaml-tok-value t)))
|
|
(cond
|
|
((= tt "eof") (set! done true))
|
|
((and (= tt "op") (= tv ";;")) (set! done true))
|
|
((and (= tt "keyword") (= tv "module")) (set! done true))
|
|
((and (= tt "keyword") (= tv "type")) (set! done true))
|
|
((and (= tt "keyword") (= tv "exception")) (set! done true))
|
|
((and (= tt "keyword") (= tv "open")) (set! done true))
|
|
((and (= tt "keyword") (= tv "include")) (set! done true))
|
|
((and (= tt "keyword") (= tv "let"))
|
|
(begin (set! d (+ d 1)) (set! p (+ p 1)) (scan)))
|
|
((and (= tt "keyword") (= tv "in"))
|
|
(cond
|
|
((= d 1) (begin (set! result true) (set! done true)))
|
|
(else
|
|
(begin (set! d (- d 1)) (set! p (+ p 1)) (scan)))))
|
|
(else (begin (set! p (+ p 1)) (scan)))))))))))
|
|
(scan)
|
|
result))))
|
|
|
|
;; Same as skip-to-boundary but treats inner `let` as the start
|
|
;; of a nested let..in (open depth) IF a matching `in` exists
|
|
;; before any decl boundary; otherwise stops.
|
|
(define
|
|
skip-let-rhs-boundary!
|
|
(fn ()
|
|
(let ((depth 0))
|
|
(begin
|
|
(define step
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((and (= depth 0) (at-op? ";;")) nil)
|
|
((and (= depth 0) (at-kw? "module")) nil)
|
|
((and (= depth 0) (at-kw? "open")) nil)
|
|
((and (= depth 0) (at-kw? "include")) nil)
|
|
((and (= depth 0) (at-kw? "and")) nil)
|
|
((and (= depth 0) (at-kw? "type")) nil)
|
|
((and (= depth 0) (at-kw? "exception")) nil)
|
|
((and (= depth 0) (at-kw? "let"))
|
|
(cond
|
|
((has-matching-in?)
|
|
(begin (set! depth (+ depth 1)) (advance-tok!) (step)))
|
|
(else nil)))
|
|
((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct")
|
|
(at-kw? "sig") (at-kw? "for") (at-kw? "while"))
|
|
(begin (set! depth (+ depth 1)) (advance-tok!) (step)))
|
|
((or (at-kw? "in") (at-kw? "end") (at-kw? "done"))
|
|
(begin
|
|
(when (> depth 0) (set! depth (- depth 1)))
|
|
(advance-tok!) (step)))
|
|
(else (begin (advance-tok!) (step))))))
|
|
(step)))))
|
|
|
|
(define
|
|
skip-to-boundary!
|
|
(fn ()
|
|
(let ((depth 0))
|
|
(begin
|
|
(define step
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((and (= depth 0) (at-op? ";;")) nil)
|
|
((and (= depth 0) (at-kw? "let")) nil)
|
|
((and (= depth 0) (at-kw? "module")) nil)
|
|
((and (= depth 0) (at-kw? "open")) nil)
|
|
((and (= depth 0) (at-kw? "include")) nil)
|
|
((and (= depth 0) (at-kw? "and")) nil)
|
|
((and (= depth 0) (at-kw? "type")) nil)
|
|
((and (= depth 0) (at-kw? "exception")) nil)
|
|
((or (at-kw? "let") (at-kw? "begin") (at-kw? "struct")
|
|
(at-kw? "sig") (at-kw? "for") (at-kw? "while"))
|
|
(begin (set! depth (+ depth 1)) (advance-tok!) (step)))
|
|
((or (at-kw? "in") (at-kw? "end") (at-kw? "done"))
|
|
(begin
|
|
(when (> depth 0) (set! depth (- depth 1)))
|
|
(advance-tok!) (step)))
|
|
(else (begin (advance-tok!) (step))))))
|
|
(step)))))
|
|
(define
|
|
parse-decl-let
|
|
(fn ()
|
|
(advance-tok!)
|
|
(let ((reccy false) (bindings (list)))
|
|
(begin
|
|
(when (at-kw? "rec") (begin (advance-tok!) (set! reccy true)))
|
|
(define parse-one!
|
|
(fn ()
|
|
(let ((nm (cond
|
|
((and (at-op? "(")
|
|
(< (+ idx 1) tok-len)
|
|
(let ((t1 (nth tokens (+ idx 1))))
|
|
(and (= (ocaml-tok-type t1) "op")
|
|
(= (ocaml-tok-value t1) ")"))))
|
|
(begin (advance-tok!) (advance-tok!)
|
|
(str "__unit_" idx)))
|
|
(else
|
|
(ocaml-tok-value (consume! "ident" nil)))))
|
|
(ps (list)))
|
|
(begin
|
|
(define collect-params
|
|
(fn ()
|
|
(cond
|
|
((check-tok? "ident" nil)
|
|
(begin
|
|
(append! ps (ocaml-tok-value (peek-tok)))
|
|
(advance-tok!)
|
|
(collect-params)))
|
|
((and (at-op? "(")
|
|
(< (+ idx 1) tok-len)
|
|
(let ((t1 (nth tokens (+ idx 1))))
|
|
(and (= (ocaml-tok-type t1) "op")
|
|
(= (ocaml-tok-value t1) ")"))))
|
|
(begin
|
|
(advance-tok!) (advance-tok!)
|
|
(append! ps (str "__unit_" idx))
|
|
(collect-params)))
|
|
(else nil))))
|
|
(collect-params)
|
|
;; Optional type annotation: skip `: TYPE` before `=`.
|
|
(when (at-op? ":")
|
|
(begin
|
|
(advance-tok!)
|
|
(define skip-tann
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-op? "=") nil)
|
|
(else (begin (advance-tok!) (skip-tann))))))
|
|
(skip-tann)))
|
|
(consume! "op" "=")
|
|
(let ((expr-start (cur-pos)))
|
|
(begin
|
|
(skip-let-rhs-boundary!)
|
|
(let ((expr-src (slice src expr-start (cur-pos))))
|
|
(let ((expr (ocaml-parse expr-src)))
|
|
(append! bindings (list nm ps expr))))))))))
|
|
(parse-one!)
|
|
(define more
|
|
(fn ()
|
|
(when (at-kw? "and")
|
|
(begin (advance-tok!) (parse-one!) (more)))))
|
|
(more)
|
|
(cond
|
|
((= (len bindings) 1)
|
|
(let ((b (first bindings)))
|
|
(if reccy
|
|
(list :def-rec (nth b 0) (nth b 1) (nth b 2))
|
|
(list :def (nth b 0) (nth b 1) (nth b 2)))))
|
|
(else
|
|
(if reccy
|
|
(list :def-rec-mut bindings)
|
|
(list :def-mut bindings))))))))
|
|
(define
|
|
parse-decl-expr
|
|
(fn
|
|
()
|
|
(let
|
|
((expr-start (cur-pos)))
|
|
(begin
|
|
(skip-to-boundary!)
|
|
(let
|
|
((expr-src (slice src expr-start (cur-pos))))
|
|
(let ((expr (ocaml-parse expr-src))) (list :expr expr)))))))
|
|
;; module M = struct DECLS end
|
|
;; Parsed by sub-tokenising the body source between `struct` and
|
|
;; the matching `end`. Nested modules / sigs increment depth.
|
|
;; exception NAME [of TYPE [* TYPE]*]
|
|
(define
|
|
parse-decl-exception
|
|
(fn ()
|
|
(advance-tok!) ;; consume 'exception'
|
|
(let ((name (ocaml-tok-value (consume! "ctor" nil)))
|
|
(arg-srcs (list)))
|
|
(begin
|
|
(when (at-kw? "of")
|
|
(begin
|
|
(advance-tok!)
|
|
(let ((arg-start (cur-pos)))
|
|
(begin
|
|
(define skip-type
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-op? ";;") nil)
|
|
((at-kw? "let") nil)
|
|
((at-kw? "type") nil)
|
|
((at-kw? "and") nil)
|
|
((at-kw? "module") nil)
|
|
((at-kw? "exception") nil)
|
|
(else (begin (advance-tok!) (skip-type))))))
|
|
(skip-type)
|
|
(append! arg-srcs (slice src arg-start (cur-pos)))))))
|
|
(cons :exception-def (cons name arg-srcs))))))
|
|
|
|
;; type [PARAMS] NAME = | Ctor [of T1 [* T2]*] | …
|
|
;;
|
|
;; PARAMS is `'a` or `('a, 'b)` (single or paren-tuple of tyvars).
|
|
;; We parse the structure and emit `(:type-def NAME PARAMS CTORS)`
|
|
;; where each CTOR is `(NAME ARG-TYPES)` (ARG-TYPES list of source
|
|
;; strings — types are treated opaquely at runtime).
|
|
(define
|
|
parse-decl-type
|
|
(fn ()
|
|
(advance-tok!) ;; consume 'type'
|
|
(let ((tparams (list)))
|
|
(begin
|
|
;; Optional type-vars before the type name.
|
|
(cond
|
|
((= (ocaml-tok-type (peek-tok)) "tyvar")
|
|
(begin
|
|
(append! tparams (ocaml-tok-value (peek-tok)))
|
|
(advance-tok!)))
|
|
((at-op? "(")
|
|
(begin
|
|
(advance-tok!)
|
|
(define more
|
|
(fn ()
|
|
(when (= (ocaml-tok-type (peek-tok)) "tyvar")
|
|
(begin
|
|
(append! tparams (ocaml-tok-value (peek-tok)))
|
|
(advance-tok!)
|
|
(when (at-op? ",")
|
|
(begin (advance-tok!) (more)))))))
|
|
(more)
|
|
(consume! "op" ")"))))
|
|
(let ((name (ocaml-tok-value (consume! "ident" nil))))
|
|
(begin
|
|
(consume! "op" "=")
|
|
(cond
|
|
;; Record type: type NAME = { f1 [: T1]; f2 [: T2]; ... }
|
|
((at-op? "{")
|
|
(begin
|
|
(advance-tok!)
|
|
(let ((fields (list)))
|
|
(begin
|
|
(define field-one
|
|
(fn ()
|
|
(let ((mut false))
|
|
(begin
|
|
(when (at-kw? "mutable")
|
|
(begin (advance-tok!) (set! mut true)))
|
|
(let ((fname (ocaml-tok-value (consume! "ident" nil))))
|
|
(begin
|
|
(when (at-op? ":")
|
|
(begin
|
|
(advance-tok!)
|
|
(define skip-fty
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-op? ";") nil)
|
|
((at-op? "}") nil)
|
|
(else (begin (advance-tok!) (skip-fty))))))
|
|
(skip-fty)))
|
|
(append! fields
|
|
(if mut
|
|
(list :mutable fname)
|
|
(list fname)))))))))
|
|
(field-one)
|
|
(define field-more
|
|
(fn ()
|
|
(when (at-op? ";")
|
|
(begin (advance-tok!)
|
|
(when (not (at-op? "}"))
|
|
(begin (field-one) (field-more)))))))
|
|
(field-more)
|
|
(consume! "op" "}")
|
|
(list :type-def-record name tparams fields)))))
|
|
;; Type alias: type t = int / type t = 'a list / etc.
|
|
;; Detected when next token is NOT `|` and NOT a ctor.
|
|
((and (not (at-op? "|"))
|
|
(not (= (ocaml-tok-type (peek-tok)) "ctor")))
|
|
(begin
|
|
;; Skip the alias source up to the next boundary.
|
|
(define skip-alias
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-op? ";;") nil)
|
|
((at-kw? "let") nil)
|
|
((at-kw? "type") nil)
|
|
((at-kw? "and") nil)
|
|
((at-kw? "module") nil)
|
|
((at-kw? "exception") nil)
|
|
((at-kw? "open") nil)
|
|
((at-kw? "include") nil)
|
|
((at-kw? "end") nil)
|
|
(else (begin (advance-tok!) (skip-alias))))))
|
|
(skip-alias)
|
|
(list :type-alias name tparams)))
|
|
(else
|
|
(begin
|
|
(when (at-op? "|") (advance-tok!))
|
|
;; Sum type: Ctor [of TYPE [* TYPE]*] (| Ctor …)*
|
|
(let ((ctors (list)))
|
|
(begin
|
|
(define one
|
|
(fn ()
|
|
(let ((cname (ocaml-tok-value (consume! "ctor" nil)))
|
|
(arg-srcs (list)))
|
|
(begin
|
|
(when (at-kw? "of")
|
|
(begin
|
|
(advance-tok!)
|
|
(let ((arg-start (cur-pos)))
|
|
(begin
|
|
(define skip-type
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-op? "|") nil)
|
|
((at-op? ";;") nil)
|
|
((at-kw? "let") nil)
|
|
((at-kw? "type") nil)
|
|
((at-kw? "and") nil)
|
|
((at-kw? "module") nil)
|
|
(else (begin (advance-tok!) (skip-type))))))
|
|
(skip-type)
|
|
(append! arg-srcs (slice src arg-start (cur-pos)))))))
|
|
(append! ctors (cons cname arg-srcs))))))
|
|
(one)
|
|
(define more
|
|
(fn ()
|
|
(when (at-op? "|")
|
|
(begin (advance-tok!) (one) (more)))))
|
|
(more)
|
|
(list :type-def name tparams ctors))))))))))))
|
|
|
|
;; open M / include M — collect a path Ctor(.SubCtor)* and emit
|
|
;; (:open PATH) or (:include PATH).
|
|
(define
|
|
parse-decl-open
|
|
(fn (include?)
|
|
(advance-tok!)
|
|
(let ((path-start (cur-pos)))
|
|
(begin
|
|
;; Walk until end of the path. A path is Ctor (. Ctor)*.
|
|
(define skip-path
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "ctor")
|
|
(begin (advance-tok!) (skip-path)))
|
|
((at-op? ".") (begin (advance-tok!) (skip-path)))
|
|
(else nil))))
|
|
(skip-path)
|
|
(let ((path-src (slice src path-start (cur-pos))))
|
|
(let ((path-expr (ocaml-parse path-src)))
|
|
(if include?
|
|
(list :include path-expr)
|
|
(list :open path-expr))))))))
|
|
;; Parse a `struct DECLS end` body and return the decls list.
|
|
(define
|
|
parse-struct-body
|
|
(fn ()
|
|
(consume! "keyword" "struct")
|
|
(let ((body-start (cur-pos)) (depth 1))
|
|
(begin
|
|
(define skip
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-kw? "struct")
|
|
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
|
|
((at-kw? "begin")
|
|
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
|
|
((at-kw? "sig")
|
|
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
|
|
((at-kw? "end")
|
|
(cond
|
|
((= depth 1) nil)
|
|
(else
|
|
(begin (set! depth (- depth 1)) (advance-tok!) (skip)))))
|
|
(else (begin (advance-tok!) (skip))))))
|
|
(skip)
|
|
(let ((body-end (cur-pos)))
|
|
(begin
|
|
(consume! "keyword" "end")
|
|
(let ((body-src (slice src body-start body-end)))
|
|
(let ((body-prog (ocaml-parse-program body-src)))
|
|
(rest body-prog)))))))))
|
|
|
|
;; Skip an optional `: Sig` constraint (parens-balanced; we
|
|
;; ignore signatures in this iteration).
|
|
(define
|
|
skip-optional-sig
|
|
(fn ()
|
|
(when (at-op? ":")
|
|
(begin
|
|
(advance-tok!)
|
|
(let ((depth 0))
|
|
(begin
|
|
(define skip
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((and (= depth 0) (at-op? ")")) nil)
|
|
((and (= depth 0) (at-op? "=")) nil)
|
|
((at-op? "(")
|
|
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
|
|
((at-kw? "sig")
|
|
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
|
|
((at-op? ")")
|
|
(begin (set! depth (- depth 1)) (advance-tok!) (skip)))
|
|
((at-kw? "end")
|
|
(begin (set! depth (- depth 1)) (advance-tok!) (skip)))
|
|
(else (begin (advance-tok!) (skip))))))
|
|
(skip)))))))
|
|
|
|
;; module type S = sig ... end
|
|
;; Parsed-and-discarded (signatures are type-level only). Returns
|
|
;; a (:module-type-def NAME) marker for the eval loop to ignore.
|
|
(define
|
|
parse-decl-module-type
|
|
(fn ()
|
|
(advance-tok!) ;; "type"
|
|
(let ((name (ocaml-tok-value (consume! "ctor" nil))))
|
|
(begin
|
|
(consume! "op" "=")
|
|
(cond
|
|
((at-kw? "sig")
|
|
(begin
|
|
(advance-tok!)
|
|
(let ((depth 1))
|
|
(begin
|
|
(define skip
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((or (at-kw? "sig") (at-kw? "struct") (at-kw? "begin"))
|
|
(begin (set! depth (+ depth 1)) (advance-tok!) (skip)))
|
|
((at-kw? "end")
|
|
(cond
|
|
((= depth 1) nil)
|
|
(else
|
|
(begin (set! depth (- depth 1)) (advance-tok!) (skip)))))
|
|
(else (begin (advance-tok!) (skip))))))
|
|
(skip)
|
|
(consume! "keyword" "end")))))
|
|
(else
|
|
;; module type S = AnotherSig — skip-to-boundary.
|
|
(skip-to-boundary!)))
|
|
(list :module-type-def name)))))
|
|
|
|
(define
|
|
parse-decl-module
|
|
(fn ()
|
|
(advance-tok!)
|
|
(cond
|
|
((at-kw? "type") (parse-decl-module-type))
|
|
(else (parse-decl-module-rest)))))
|
|
|
|
(define
|
|
parse-decl-module-rest
|
|
(fn ()
|
|
(let ((name (ocaml-tok-value (consume! "ctor" nil)))
|
|
(params (list)))
|
|
(begin
|
|
;; Functor parameters: `(P)` or `(P : Sig)`, repeated.
|
|
(define collect-params
|
|
(fn ()
|
|
(when (at-op? "(")
|
|
(begin
|
|
(advance-tok!)
|
|
(when (= (ocaml-tok-type (peek-tok)) "ctor")
|
|
(begin
|
|
(append! params (ocaml-tok-value (peek-tok)))
|
|
(advance-tok!)))
|
|
(skip-optional-sig)
|
|
(consume! "op" ")")
|
|
(collect-params)))))
|
|
(collect-params)
|
|
(skip-optional-sig)
|
|
(consume! "op" "=")
|
|
(cond
|
|
;; Body is `struct DECLS end` — possibly a functor body.
|
|
((at-kw? "struct")
|
|
(let ((decls (parse-struct-body)))
|
|
(cond
|
|
((= (len params) 0) (list :module-def name decls))
|
|
(else (list :functor-def name params decls)))))
|
|
;; Body is a path possibly applied: `M`, `M.Sub`, `F(A)`, `F(A)(B)`.
|
|
(else
|
|
(let ((body-start (cur-pos)))
|
|
(begin
|
|
(define skip-path-app
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "ctor")
|
|
(begin (advance-tok!) (skip-path-app)))
|
|
((at-op? ".")
|
|
(begin (advance-tok!) (skip-path-app)))
|
|
((at-op? "(")
|
|
;; Paren-balanced argument list.
|
|
(let ((d 1))
|
|
(begin
|
|
(advance-tok!)
|
|
(define skip-args
|
|
(fn ()
|
|
(cond
|
|
((>= idx tok-len) nil)
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-op? "(")
|
|
(begin (set! d (+ d 1)) (advance-tok!) (skip-args)))
|
|
((at-op? ")")
|
|
(cond
|
|
((= d 1) (begin (advance-tok!) nil))
|
|
(else (begin (set! d (- d 1)) (advance-tok!) (skip-args)))))
|
|
(else (begin (advance-tok!) (skip-args))))))
|
|
(skip-args)
|
|
(skip-path-app))))
|
|
(else nil))))
|
|
(skip-path-app)
|
|
(let ((body-src (slice src body-start (cur-pos))))
|
|
(list :module-alias name body-src))))))))))
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(begin
|
|
(skip-double-semi!)
|
|
(when
|
|
(< idx tok-len)
|
|
(cond
|
|
((= (ocaml-tok-type (peek-tok)) "eof") nil)
|
|
((at-kw? "let")
|
|
(cond
|
|
;; `let r = expr in body` at the top level is an
|
|
;; expression-let, not a decl. Detect by scanning
|
|
;; for a matching `in` at this depth — has-matching-in?
|
|
;; walks the same boundaries as the decl scanner.
|
|
((has-matching-in?)
|
|
(let ((expr-start (cur-pos)))
|
|
(begin
|
|
(skip-let-rhs-boundary!)
|
|
(let ((expr-src (slice src expr-start (cur-pos))))
|
|
(let ((expr (ocaml-parse expr-src)))
|
|
(begin
|
|
(append! decls (list :expr expr))
|
|
(loop)))))))
|
|
(else
|
|
(begin (append! decls (parse-decl-let)) (loop)))))
|
|
((at-kw? "module")
|
|
(begin (append! decls (parse-decl-module)) (loop)))
|
|
((at-kw? "open")
|
|
(begin (append! decls (parse-decl-open false)) (loop)))
|
|
((at-kw? "include")
|
|
(begin (append! decls (parse-decl-open true)) (loop)))
|
|
((at-kw? "type")
|
|
(begin (append! decls (parse-decl-type)) (loop)))
|
|
((at-kw? "exception")
|
|
(begin (append! decls (parse-decl-exception)) (loop)))
|
|
(else (begin (append! decls (parse-decl-expr)) (loop))))))))
|
|
(loop)
|
|
(cons :program decls)))))
|