;; lib/ocaml/parser.sx — OCaml expression parser. ;; ;; Input: token list from (ocaml-tokenize src). ;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head; ;; keywords serialize to their string name so `(list :var "x")` is the ;; same value as `(list "var" "x")` at runtime. ;; ;; Expression scope: ;; atoms int/float/string/char/bool, unit (), var, con, list literal ;; application left-associative, f x y z ;; prefix -E unary minus, not E ;; infix 29 ops via lib/guest/pratt.sx ;; tuple a, b, c (lower than infix, higher than let/if) ;; parens (e) ;; if if c then t else e (else optional → unit) ;; fun fun x y -> body ;; let let x = e in body (no rec, function shorthand) ;; let rec f x = e in body ;; match match e with [|] p -> body | p -> body | ... ;; sequence e1 ; e2 → (:seq e1 e2 …) (lowest-precedence binary) ;; ;; Pattern scope: ;; _ (wildcard), int/string/char/bool literals, ident (var binding), ;; ctor (no args), ctor pat, (), parens, tuple (pat,pat,…), ;; list literal [pat;pat;…], cons p1 :: p2. ;; ;; AST shapes: ;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit) ;; (:var NAME) (:con NAME) ;; (:app FN ARG) ;; (:op OP LHS RHS) ;; (:neg E) (:not E) ;; (:tuple ITEMS) (:list ITEMS) ;; (:seq EXPRS) ;; (:if C T E) ;; (:fun PARAMS BODY) ;; (:let NAME PARAMS EXPR BODY) (:let-rec NAME PARAMS EXPR BODY) ;; (:match SCRUTINEE CLAUSES) CLAUSES = ((:case PAT BODY) ...) ;; ;; (:pwild) (:pvar N) (:plit LIT) ;; (:pcon NAME ARG-PATS) — ARG-PATS empty for nullary ;; (:ptuple PATS) (:plist PATS) (:pcons HEAD TAIL) (define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) (define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value)))) (define ocaml-op-table (list (list ":=" 1 :right) (list "||" 2 :right) (list "or" 2 :right) (list "&&" 3 :right) (list "&" 3 :right) (list "=" 4 :left) (list "<" 4 :left) (list ">" 4 :left) (list "<=" 4 :left) (list ">=" 4 :left) (list "<>" 4 :left) (list "==" 4 :left) (list "!=" 4 :left) (list "|>" 4 :left) (list "@" 5 :right) (list "^" 5 :right) (list "::" 6 :right) (list "+" 7 :left) (list "-" 7 :left) (list "*" 8 :left) (list "/" 8 :left) (list "%" 8 :left) (list "mod" 8 :left) (list "land" 8 :left) (list "lor" 8 :left) (list "lxor" 8 :left) (list "**" 9 :right) (list "lsl" 9 :right) (list "lsr" 9 :right) (list "asr" 9 :right))) (define ocaml-binop-prec (fn (op) (let ((entry (pratt-op-lookup ocaml-op-table op))) (if (= entry nil) 0 (pratt-op-prec entry))))) (define ocaml-binop-right? (fn (op) (let ((entry (pratt-op-lookup ocaml-op-table op))) (and (not (= entry nil)) (= (pratt-op-assoc entry) :right))))) (define ocaml-tok-is-binop? (fn (tok) (let ((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok))) (cond ((= tt "op") (not (= (ocaml-binop-prec tv) 0))) ((= tt "keyword") (not (= (ocaml-binop-prec tv) 0))) (else false))))) (define ocaml-parse (fn (src) (let ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0)) (begin (set! tok-len (len tokens)) (define peek-tok (fn () (nth tokens idx))) (define advance-tok! (fn () (set! idx (+ idx 1)))) (define check-tok? (fn (type value) (let ((t (peek-tok))) (and (= (ocaml-tok-type t) type) (or (= value nil) (= (ocaml-tok-value t) value)))))) (define consume! (fn (type value) (if (check-tok? type value) (let ((t (peek-tok))) (begin (advance-tok!) t)) (error (str "ocaml-parse: expected " type " " value " got " (ocaml-tok-type (peek-tok)) " " (ocaml-tok-value (peek-tok))))))) (define at-kw? (fn (kw) (check-tok? "keyword" kw))) (define at-op? (fn (op) (check-tok? "op" op))) (define parse-pattern (fn () nil)) (define parse-pattern-cons (fn () nil)) (define parse-pattern-app (fn () nil)) (define parse-pattern-atom (fn () nil)) (define at-pattern-atom? (fn () (let ((tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") true) ((= tt "string") true) ((= tt "char") true) ((= tt "ident") true) ((= tt "ctor") true) ((and (= tt "keyword") (or (= tv "true") (= tv "false"))) true) ((and (= tt "op") (or (= tv "(") (= tv "["))) true) (else false))))) (set! parse-pattern-atom (fn () (let ((tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") (begin (advance-tok!) (if (= (round tv) tv) (list :plit (list :int tv)) (list :plit (list :float tv))))) ((= tt "string") (begin (advance-tok!) (list :plit (list :string tv)))) ((= tt "char") (begin (advance-tok!) (list :plit (list :char tv)))) ((and (= tt "keyword") (= tv "true")) (begin (advance-tok!) (list :plit (list :bool true)))) ((and (= tt "keyword") (= tv "false")) (begin (advance-tok!) (list :plit (list :bool false)))) ((and (= tt "ident") (= tv "_")) (begin (advance-tok!) (list :pwild))) ((= tt "ident") (begin (advance-tok!) (list :pvar tv))) ((= tt "ctor") (begin (advance-tok!) (list :pcon tv))) ((and (= tt "op") (= tv "(")) (begin (advance-tok!) (cond ((at-op? ")") (begin (advance-tok!) (list :plit (list :unit)))) (else (let ((first (parse-pattern))) (cond ((at-op? ",") (let ((items (list first))) (begin (define loop (fn () (when (at-op? ",") (begin (advance-tok!) (append! items (parse-pattern)) (loop))))) (loop) (consume! "op" ")") (cons :ptuple items)))) (else (begin (consume! "op" ")") first)))))))) ((and (= tt "op") (= tv "[")) (begin (advance-tok!) (cond ((at-op? "]") (begin (advance-tok!) (list :plist))) (else (let ((items (list))) (begin (append! items (parse-pattern)) (define loop (fn () (when (at-op? ";") (begin (advance-tok!) (when (not (at-op? "]")) (begin (append! items (parse-pattern)) (loop))))))) (loop) (consume! "op" "]") (cons :plist items))))))) (else (error (str "ocaml-parse: unexpected pattern token " tt " " tv " at idx " idx))))))) (set! parse-pattern-app (fn () (let ((head (parse-pattern-atom))) (cond ((and (= (nth head 0) :pcon) (at-pattern-atom?)) (let ((arg (parse-pattern-atom))) (let ((args (cond ((= (nth arg 0) :ptuple) (rest arg)) (else (list arg))))) (concat (list :pcon (nth head 1)) args)))) (else head))))) (set! parse-pattern-cons (fn () (let ((lhs (parse-pattern-app))) (cond ((at-op? "::") (begin (advance-tok!) (list :pcons lhs (parse-pattern-cons)))) (else lhs))))) (set! parse-pattern (fn () (parse-pattern-cons))) (define parse-expr (fn () nil)) (define parse-expr-no-seq (fn () nil)) (define parse-tuple (fn () nil)) (define parse-binop-rhs (fn (lhs min-prec) lhs)) (define parse-prefix (fn () nil)) (define parse-app (fn () nil)) (define parse-atom (fn () nil)) (set! parse-atom (fn () (let ((t (peek-tok)) (tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") (begin (advance-tok!) (if (= (round tv) tv) (list :int tv) (list :float tv)))) ((= tt "string") (begin (advance-tok!) (list :string tv))) ((= tt "char") (begin (advance-tok!) (list :char tv))) ((and (= tt "keyword") (= tv "true")) (begin (advance-tok!) (list :bool true))) ((and (= tt "keyword") (= tv "false")) (begin (advance-tok!) (list :bool false))) ((= tt "ident") (begin (advance-tok!) (list :var tv))) ((= tt "ctor") (begin (advance-tok!) (list :con tv))) ((and (= tt "op") (= tv "(")) (begin (advance-tok!) (cond ((at-op? ")") (begin (advance-tok!) (list :unit))) (else (let ((e (parse-expr))) (begin (consume! "op" ")") e)))))) ((and (= tt "op") (= tv "[")) (begin (advance-tok!) (cond ((at-op? "]") (begin (advance-tok!) (list :list))) (else (let ((items (list))) (begin (append! items (parse-expr-no-seq)) (define loop (fn () (when (at-op? ";") (begin (advance-tok!) (when (not (at-op? "]")) (begin (append! items (parse-expr-no-seq)) (loop))))))) (loop) (consume! "op" "]") (cons :list items))))))) ((at-kw? "begin") (begin (advance-tok!) (let ((e (parse-expr))) (begin (consume! "keyword" "end") e)))) (else (error (str "ocaml-parse: unexpected token " tt " " tv " at idx " idx))))))) (define at-app-start? (fn () (let ((tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") true) ((= tt "string") true) ((= tt "char") true) ((= tt "ident") true) ((= tt "ctor") true) ((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin"))) true) ((and (= tt "op") (or (= tv "(") (= tv "["))) true) (else false))))) (set! parse-app (fn () (let ((head (parse-atom))) (begin (define loop (fn () (when (at-app-start?) (let ((arg (parse-atom))) (begin (set! head (list :app head arg)) (loop)))))) (loop) head)))) (set! parse-prefix (fn () (cond ((at-op? "-") (begin (advance-tok!) (list :neg (parse-prefix)))) ((at-op? "!") (begin (advance-tok!) (list :deref (parse-prefix)))) ((at-kw? "not") (begin (advance-tok!) (list :not (parse-prefix)))) (else (parse-app))))) (set! parse-binop-rhs (fn (lhs min-prec) (let ((tok (peek-tok))) (cond ((not (ocaml-tok-is-binop? tok)) lhs) (else (let ((op (ocaml-tok-value tok)) (prec (ocaml-binop-prec (ocaml-tok-value tok)))) (cond ((< prec min-prec) lhs) (else (begin (advance-tok!) (let ((rhs (parse-prefix)) (next-min (if (ocaml-binop-right? op) prec (+ prec 1)))) (begin (set! rhs (parse-binop-rhs rhs next-min)) (parse-binop-rhs (list :op op lhs rhs) min-prec)))))))))))) (define parse-binary (fn () (let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1)))) (set! parse-tuple (fn () (let ((first (parse-binary))) (cond ((at-op? ",") (let ((items (list first))) (begin (define loop (fn () (when (at-op? ",") (begin (advance-tok!) (append! items (parse-binary)) (loop))))) (loop) (cons :tuple items)))) (else first))))) (define parse-fun (fn () (let ((params (list))) (begin (define collect-params (fn () (when (check-tok? "ident" nil) (begin (append! params (ocaml-tok-value (peek-tok))) (advance-tok!) (collect-params))))) (collect-params) (when (= (len params) 0) (error "ocaml-parse: fun expects at least one parameter")) (consume! "op" "->") (let ((body (parse-expr))) (list :fun params body)))))) (define parse-let (fn () (let ((reccy false)) (begin (when (at-kw? "rec") (begin (advance-tok!) (set! reccy true))) (let ((name (ocaml-tok-value (consume! "ident" nil))) (params (list))) (begin (define collect-params (fn () (when (check-tok? "ident" nil) (begin (append! params (ocaml-tok-value (peek-tok))) (advance-tok!) (collect-params))))) (collect-params) (consume! "op" "=") (let ((rhs (parse-expr))) (begin (consume! "keyword" "in") (let ((body (parse-expr))) (if reccy (list :let-rec name params rhs body) (list :let name params rhs body))))))))))) (define parse-if (fn () (let ((cond-expr (parse-expr-no-seq))) (begin (consume! "keyword" "then") (let ((then-expr (parse-expr-no-seq))) (cond ((at-kw? "else") (begin (advance-tok!) (let ((else-expr (parse-expr-no-seq))) (list :if cond-expr then-expr else-expr)))) (else (list :if cond-expr then-expr (list :unit))))))))) (define parse-match (fn () (let ((scrut (parse-expr-no-seq))) (begin (consume! "keyword" "with") (when (at-op? "|") (advance-tok!)) (let ((cases (list))) (begin (define one (fn () (let ((p (parse-pattern))) (begin (consume! "op" "->") (let ((body (parse-expr))) (append! cases (list :case p body))))))) (one) (define loop (fn () (when (at-op? "|") (begin (advance-tok!) (one) (loop))))) (loop) (cons :match (cons scrut (list cases))))))))) (define parse-for (fn () (let ((name (ocaml-tok-value (consume! "ident" nil)))) (begin (consume! "op" "=") (let ((lo (parse-expr-no-seq))) (let ((dir (cond ((at-kw? "to") (begin (advance-tok!) :ascend)) ((at-kw? "downto") (begin (advance-tok!) :descend)) (else (error "ocaml-parse: expected to/downto in for"))))) (let ((hi (parse-expr-no-seq))) (begin (consume! "keyword" "do") (let ((body (parse-expr))) (begin (consume! "keyword" "done") (list :for name lo hi dir body))))))))))) (define parse-while (fn () (let ((cond-expr (parse-expr-no-seq))) (begin (consume! "keyword" "do") (let ((body (parse-expr))) (begin (consume! "keyword" "done") (list :while cond-expr body))))))) (set! parse-expr-no-seq (fn () (cond ((at-kw? "fun") (begin (advance-tok!) (parse-fun))) ((at-kw? "let") (begin (advance-tok!) (parse-let))) ((at-kw? "if") (begin (advance-tok!) (parse-if))) ((at-kw? "match") (begin (advance-tok!) (parse-match))) ((at-kw? "for") (begin (advance-tok!) (parse-for))) ((at-kw? "while") (begin (advance-tok!) (parse-while))) (else (parse-tuple))))) (set! parse-expr (fn () (let ((lhs (parse-expr-no-seq))) (cond ((at-op? ";") (let ((items (list lhs))) (begin (define loop (fn () (when (at-op? ";") (begin (advance-tok!) (cond ((at-kw? "end") nil) ((at-op? ")") nil) ((at-op? "|") nil) ((at-kw? "in") nil) ((at-kw? "then") nil) ((at-kw? "else") nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) (else (begin (append! items (parse-expr-no-seq)) (loop)))))))) (loop) (cons :seq items)))) (else lhs))))) (let ((result (parse-expr))) (begin (when (not (= (ocaml-tok-type (peek-tok)) "eof")) (error (str "ocaml-parse: trailing tokens at idx " idx " — got " (ocaml-tok-type (peek-tok)) " " (ocaml-tok-value (peek-tok))))) result)))))) (define ocaml-parse-program (fn (src) (let ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0) (decls (list))) (begin (set! tok-len (len tokens)) (define peek-tok (fn () (nth tokens idx))) (define advance-tok! (fn () (set! idx (+ idx 1)))) (define check-tok? (fn (type value) (let ((t (peek-tok))) (and (= (ocaml-tok-type t) type) (or (= value nil) (= (ocaml-tok-value t) value)))))) (define consume! (fn (type value) (if (check-tok? type value) (let ((t (peek-tok))) (begin (advance-tok!) t)) (error (str "ocaml-parse-program: expected " type " " value " got " (ocaml-tok-type (peek-tok)) " " (ocaml-tok-value (peek-tok))))))) (define at-kw? (fn (kw) (check-tok? "keyword" kw))) (define at-op? (fn (op) (check-tok? "op" op))) (define skip-double-semi! (fn () (when (at-op? ";;") (begin (advance-tok!) (skip-double-semi!))))) (define cur-pos (fn () (let ((t (peek-tok))) (if (= t nil) (len src) (get t :pos))))) (define skip-to-boundary! (fn () (cond ((>= idx tok-len) nil) ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-op? ";;") nil) ((at-kw? "let") nil) (else (begin (advance-tok!) (skip-to-boundary!)))))) (define parse-decl-let (fn () (advance-tok!) (let ((reccy false)) (begin (when (at-kw? "rec") (begin (advance-tok!) (set! reccy true))) (let ((name (ocaml-tok-value (consume! "ident" nil))) (params (list))) (begin (define collect-params (fn () (when (check-tok? "ident" nil) (begin (append! params (ocaml-tok-value (peek-tok))) (advance-tok!) (collect-params))))) (collect-params) (consume! "op" "=") (let ((expr-start (cur-pos))) (begin (skip-to-boundary!) (let ((expr-src (slice src expr-start (cur-pos)))) (let ((expr (ocaml-parse expr-src))) (if reccy (list :def-rec name params expr) (list :def name params expr)))))))))))) (define parse-decl-expr (fn () (let ((expr-start (cur-pos))) (begin (skip-to-boundary!) (let ((expr-src (slice src expr-start (cur-pos)))) (let ((expr (ocaml-parse expr-src))) (list :expr expr))))))) (define loop (fn () (begin (skip-double-semi!) (when (< idx tok-len) (cond ((= (ocaml-tok-type (peek-tok)) "eof") nil) ((at-kw? "let") (begin (append! decls (parse-decl-let)) (loop))) (else (begin (append! decls (parse-decl-expr)) (loop)))))))) (loop) (cons :program decls)))))