;; lib/ocaml/parser.sx — OCaml expression parser. ;; ;; Input: token list from (ocaml-tokenize src). ;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head; ;; keywords serialize to their string name so `(list :var "x")` is the ;; same value as `(list "var" "x")` at runtime. ;; ;; Scope (this iteration — expressions only): ;; atoms int/float/string/char, true/false, unit (), var, con, list literal ;; application left-associative, f x y z ;; prefix -E unary minus, not E ;; infix standard ops via lib/guest/pratt.sx table ;; tuple a, b, c (lower than infix, higher than let/if) ;; parens (e) ;; if if c then t else e (else optional → unit) ;; fun fun x y -> body ;; let let x = e in body (no rec) ;; let f x y = e in body (function shorthand) ;; let rec f x = e in body ;; ;; AST shapes: ;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit) ;; (:var NAME) (:con NAME) ;; (:app FN ARG) — binary, chain for multi-arg ;; (:op OP LHS RHS) — binary infix; OP is the source string ;; (:neg E) (:not E) ;; (:tuple ITEMS) ;; (:list ITEMS) ;; (:if C T E) ;; (:fun PARAMS BODY) — PARAMS list of strings (idents) ;; (:let NAME PARAMS EXPR BODY) ;; (:let-rec NAME PARAMS EXPR BODY) (define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) (define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value)))) ;; Standard OCaml binary operator table. ;; Higher precedence = tighter binding. ;; ASSOC is :left or :right. (define ocaml-op-table (list (list "||" 2 :right) (list "or" 2 :right) (list "&&" 3 :right) (list "&" 3 :right) (list "=" 4 :left) (list "<" 4 :left) (list ">" 4 :left) (list "<=" 4 :left) (list ">=" 4 :left) (list "<>" 4 :left) (list "==" 4 :left) (list "!=" 4 :left) (list "|>" 4 :left) (list "@" 5 :right) (list "^" 5 :right) (list "::" 6 :right) (list "+" 7 :left) (list "-" 7 :left) (list "*" 8 :left) (list "/" 8 :left) (list "%" 8 :left) (list "mod" 8 :left) (list "land" 8 :left) (list "lor" 8 :left) (list "lxor" 8 :left) (list "**" 9 :right) (list "lsl" 9 :right) (list "lsr" 9 :right) (list "asr" 9 :right))) (define ocaml-binop-prec (fn (op) (let ((entry (pratt-op-lookup ocaml-op-table op))) (if (= entry nil) 0 (pratt-op-prec entry))))) (define ocaml-binop-right? (fn (op) (let ((entry (pratt-op-lookup ocaml-op-table op))) (and (not (= entry nil)) (= (pratt-op-assoc entry) :right))))) ;; Some OCaml binops are spelled with keyword tokens (mod / land / lor / ;; lxor / lsl / lsr / asr / or). Recognise both shapes. (define ocaml-tok-is-binop? (fn (tok) (let ((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok))) (cond ((= tt "op") (not (= (ocaml-binop-prec tv) 0))) ((= tt "keyword") (not (= (ocaml-binop-prec tv) 0))) (else false))))) (define ocaml-parse (fn (src) (let ((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0)) (begin (set! tok-len (len tokens)) (define peek-tok (fn () (nth tokens idx))) (define advance-tok! (fn () (set! idx (+ idx 1)))) (define check-tok? (fn (type value) (let ((t (peek-tok))) (and (= (ocaml-tok-type t) type) (or (= value nil) (= (ocaml-tok-value t) value)))))) (define consume! (fn (type value) (if (check-tok? type value) (let ((t (peek-tok))) (begin (advance-tok!) t)) (error (str "ocaml-parse: expected " type " " value " got " (ocaml-tok-type (peek-tok)) " " (ocaml-tok-value (peek-tok))))))) (define at-kw? (fn (kw) (check-tok? "keyword" kw))) (define at-op? (fn (op) (check-tok? "op" op))) (define parse-expr (fn () nil)) (define parse-tuple (fn () nil)) (define parse-binop-rhs (fn (lhs min-prec) lhs)) (define parse-prefix (fn () nil)) (define parse-app (fn () nil)) (define parse-atom (fn () nil)) (set! parse-atom (fn () (let ((t (peek-tok)) (tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") (begin (advance-tok!) (if (= (round tv) tv) (list :int tv) (list :float tv)))) ((= tt "string") (begin (advance-tok!) (list :string tv))) ((= tt "char") (begin (advance-tok!) (list :char tv))) ((and (= tt "keyword") (= tv "true")) (begin (advance-tok!) (list :bool true))) ((and (= tt "keyword") (= tv "false")) (begin (advance-tok!) (list :bool false))) ((= tt "ident") (begin (advance-tok!) (list :var tv))) ((= tt "ctor") (begin (advance-tok!) (list :con tv))) ((and (= tt "op") (= tv "(")) (begin (advance-tok!) (cond ((at-op? ")") (begin (advance-tok!) (list :unit))) (else (let ((e (parse-expr))) (begin (consume! "op" ")") e)))))) ((and (= tt "op") (= tv "[")) (begin (advance-tok!) (cond ((at-op? "]") (begin (advance-tok!) (list :list))) (else (let ((items (list))) (begin (append! items (parse-expr)) (define loop (fn () (when (at-op? ";") (begin (advance-tok!) (when (not (at-op? "]")) (begin (append! items (parse-expr)) (loop))))))) (loop) (consume! "op" "]") (cons :list items))))))) ((at-kw? "begin") (begin (advance-tok!) (let ((e (parse-expr))) (begin (consume! "keyword" "end") e)))) (else (error (str "ocaml-parse: unexpected token " tt " " tv " at idx " idx))))))) (define at-app-start? (fn () (let ((tt (ocaml-tok-type (peek-tok))) (tv (ocaml-tok-value (peek-tok)))) (cond ((= tt "number") true) ((= tt "string") true) ((= tt "char") true) ((= tt "ident") true) ((= tt "ctor") true) ((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin"))) true) ((and (= tt "op") (or (= tv "(") (= tv "["))) true) (else false))))) (set! parse-app (fn () (let ((head (parse-atom))) (begin (define loop (fn () (when (at-app-start?) (let ((arg (parse-atom))) (begin (set! head (list :app head arg)) (loop)))))) (loop) head)))) (set! parse-prefix (fn () (cond ((at-op? "-") (begin (advance-tok!) (list :neg (parse-prefix)))) ((at-kw? "not") (begin (advance-tok!) (list :not (parse-prefix)))) (else (parse-app))))) (set! parse-binop-rhs (fn (lhs min-prec) (let ((tok (peek-tok))) (cond ((not (ocaml-tok-is-binop? tok)) lhs) (else (let ((op (ocaml-tok-value tok)) (prec (ocaml-binop-prec (ocaml-tok-value tok)))) (cond ((< prec min-prec) lhs) (else (begin (advance-tok!) (let ((rhs (parse-prefix)) (next-min (if (ocaml-binop-right? op) prec (+ prec 1)))) (begin (set! rhs (parse-binop-rhs rhs next-min)) (parse-binop-rhs (list :op op lhs rhs) min-prec)))))))))))) (define parse-binary (fn () (let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1)))) (set! parse-tuple (fn () (let ((first (parse-binary))) (cond ((at-op? ",") (let ((items (list first))) (begin (define loop (fn () (when (at-op? ",") (begin (advance-tok!) (append! items (parse-binary)) (loop))))) (loop) (cons :tuple items)))) (else first))))) (define parse-fun (fn () (let ((params (list))) (begin (define collect-params (fn () (when (check-tok? "ident" nil) (begin (append! params (ocaml-tok-value (peek-tok))) (advance-tok!) (collect-params))))) (collect-params) (when (= (len params) 0) (error "ocaml-parse: fun expects at least one parameter")) (consume! "op" "->") (let ((body (parse-expr))) (list :fun params body)))))) (define parse-let (fn () (let ((reccy false)) (begin (when (at-kw? "rec") (begin (advance-tok!) (set! reccy true))) (let ((name (ocaml-tok-value (consume! "ident" nil))) (params (list))) (begin (define collect-params (fn () (when (check-tok? "ident" nil) (begin (append! params (ocaml-tok-value (peek-tok))) (advance-tok!) (collect-params))))) (collect-params) (consume! "op" "=") (let ((rhs (parse-expr))) (begin (consume! "keyword" "in") (let ((body (parse-expr))) (if reccy (list :let-rec name params rhs body) (list :let name params rhs body))))))))))) (define parse-if (fn () (let ((cond-expr (parse-expr))) (begin (consume! "keyword" "then") (let ((then-expr (parse-expr))) (cond ((at-kw? "else") (begin (advance-tok!) (let ((else-expr (parse-expr))) (list :if cond-expr then-expr else-expr)))) (else (list :if cond-expr then-expr (list :unit))))))))) (set! parse-expr (fn () (cond ((at-kw? "fun") (begin (advance-tok!) (parse-fun))) ((at-kw? "let") (begin (advance-tok!) (parse-let))) ((at-kw? "if") (begin (advance-tok!) (parse-if))) (else (parse-tuple))))) (let ((result (parse-expr))) (begin (when (not (= (ocaml-tok-type (peek-tok)) "eof")) (error (str "ocaml-parse: trailing tokens at idx " idx " — got " (ocaml-tok-type (peek-tok)) " " (ocaml-tok-value (peek-tok))))) result))))))