Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 55s
Atoms (literals/var/con/unit/list), application (left-assoc), prefix - / not, 29-op precedence table via pratt-op-lookup (incl. keyword-spelled mod/land/ lor/lxor/lsl/lsr/asr), tuples, parens, if/then/else, fun, let, let rec with function shorthand. AST follows Haskell-on-SX (:int / :op / :fun / etc).
419 lines
14 KiB
Plaintext
419 lines
14 KiB
Plaintext
;; lib/ocaml/parser.sx — OCaml expression parser.
|
|
;;
|
|
;; Input: token list from (ocaml-tokenize src).
|
|
;; Output: an OCaml AST. Nodes are plain lists tagged by a keyword head;
|
|
;; keywords serialize to their string name so `(list :var "x")` is the
|
|
;; same value as `(list "var" "x")` at runtime.
|
|
;;
|
|
;; Scope (this iteration — expressions only):
|
|
;; atoms int/float/string/char, true/false, unit (), var, con, list literal
|
|
;; application left-associative, f x y z
|
|
;; prefix -E unary minus, not E
|
|
;; infix standard ops via lib/guest/pratt.sx table
|
|
;; tuple a, b, c (lower than infix, higher than let/if)
|
|
;; parens (e)
|
|
;; if if c then t else e (else optional → unit)
|
|
;; fun fun x y -> body
|
|
;; let let x = e in body (no rec)
|
|
;; let f x y = e in body (function shorthand)
|
|
;; let rec f x = e in body
|
|
;;
|
|
;; AST shapes:
|
|
;; (:int N) (:float N) (:string S) (:char C) (:bool B) (:unit)
|
|
;; (:var NAME) (:con NAME)
|
|
;; (:app FN ARG) — binary, chain for multi-arg
|
|
;; (:op OP LHS RHS) — binary infix; OP is the source string
|
|
;; (:neg E) (:not E)
|
|
;; (:tuple ITEMS)
|
|
;; (:list ITEMS)
|
|
;; (:if C T E)
|
|
;; (:fun PARAMS BODY) — PARAMS list of strings (idents)
|
|
;; (:let NAME PARAMS EXPR BODY)
|
|
;; (:let-rec NAME PARAMS EXPR BODY)
|
|
|
|
(define ocaml-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
|
|
|
|
(define ocaml-tok-value (fn (t) (if (= t nil) nil (get t :value))))
|
|
|
|
;; Standard OCaml binary operator table.
|
|
;; Higher precedence = tighter binding.
|
|
;; ASSOC is :left or :right.
|
|
(define
|
|
ocaml-op-table
|
|
(list
|
|
(list "||" 2 :right)
|
|
(list "or" 2 :right)
|
|
(list "&&" 3 :right)
|
|
(list "&" 3 :right)
|
|
(list "=" 4 :left)
|
|
(list "<" 4 :left)
|
|
(list ">" 4 :left)
|
|
(list "<=" 4 :left)
|
|
(list ">=" 4 :left)
|
|
(list "<>" 4 :left)
|
|
(list "==" 4 :left)
|
|
(list "!=" 4 :left)
|
|
(list "|>" 4 :left)
|
|
(list "@" 5 :right)
|
|
(list "^" 5 :right)
|
|
(list "::" 6 :right)
|
|
(list "+" 7 :left)
|
|
(list "-" 7 :left)
|
|
(list "*" 8 :left)
|
|
(list "/" 8 :left)
|
|
(list "%" 8 :left)
|
|
(list "mod" 8 :left)
|
|
(list "land" 8 :left)
|
|
(list "lor" 8 :left)
|
|
(list "lxor" 8 :left)
|
|
(list "**" 9 :right)
|
|
(list "lsl" 9 :right)
|
|
(list "lsr" 9 :right)
|
|
(list "asr" 9 :right)))
|
|
|
|
(define
|
|
ocaml-binop-prec
|
|
(fn
|
|
(op)
|
|
(let
|
|
((entry (pratt-op-lookup ocaml-op-table op)))
|
|
(if (= entry nil) 0 (pratt-op-prec entry)))))
|
|
|
|
(define
|
|
ocaml-binop-right?
|
|
(fn
|
|
(op)
|
|
(let
|
|
((entry (pratt-op-lookup ocaml-op-table op)))
|
|
(and (not (= entry nil)) (= (pratt-op-assoc entry) :right)))))
|
|
|
|
;; Some OCaml binops are spelled with keyword tokens (mod / land / lor /
|
|
;; lxor / lsl / lsr / asr / or). Recognise both shapes.
|
|
(define
|
|
ocaml-tok-is-binop?
|
|
(fn
|
|
(tok)
|
|
(let
|
|
((tt (ocaml-tok-type tok)) (tv (ocaml-tok-value tok)))
|
|
(cond
|
|
((= tt "op") (not (= (ocaml-binop-prec tv) 0)))
|
|
((= tt "keyword") (not (= (ocaml-binop-prec tv) 0)))
|
|
(else false)))))
|
|
|
|
(define
|
|
ocaml-parse
|
|
(fn
|
|
(src)
|
|
(let
|
|
((tokens (ocaml-tokenize src)) (idx 0) (tok-len 0))
|
|
(begin
|
|
(set! tok-len (len tokens))
|
|
(define peek-tok (fn () (nth tokens idx)))
|
|
(define advance-tok! (fn () (set! idx (+ idx 1))))
|
|
(define
|
|
check-tok?
|
|
(fn
|
|
(type value)
|
|
(let
|
|
((t (peek-tok)))
|
|
(and
|
|
(= (ocaml-tok-type t) type)
|
|
(or (= value nil) (= (ocaml-tok-value t) value))))))
|
|
(define
|
|
consume!
|
|
(fn
|
|
(type value)
|
|
(if
|
|
(check-tok? type value)
|
|
(let ((t (peek-tok))) (begin (advance-tok!) t))
|
|
(error
|
|
(str
|
|
"ocaml-parse: expected "
|
|
type
|
|
" "
|
|
value
|
|
" got "
|
|
(ocaml-tok-type (peek-tok))
|
|
" "
|
|
(ocaml-tok-value (peek-tok)))))))
|
|
(define at-kw? (fn (kw) (check-tok? "keyword" kw)))
|
|
(define at-op? (fn (op) (check-tok? "op" op)))
|
|
(define parse-expr (fn () nil))
|
|
(define parse-tuple (fn () nil))
|
|
(define parse-binop-rhs (fn (lhs min-prec) lhs))
|
|
(define parse-prefix (fn () nil))
|
|
(define parse-app (fn () nil))
|
|
(define parse-atom (fn () nil))
|
|
(set!
|
|
parse-atom
|
|
(fn
|
|
()
|
|
(let
|
|
((t (peek-tok))
|
|
(tt (ocaml-tok-type (peek-tok)))
|
|
(tv (ocaml-tok-value (peek-tok))))
|
|
(cond
|
|
((= tt "number")
|
|
(begin
|
|
(advance-tok!)
|
|
(if (= (round tv) tv) (list :int tv) (list :float tv))))
|
|
((= tt "string") (begin (advance-tok!) (list :string tv)))
|
|
((= tt "char") (begin (advance-tok!) (list :char tv)))
|
|
((and (= tt "keyword") (= tv "true"))
|
|
(begin (advance-tok!) (list :bool true)))
|
|
((and (= tt "keyword") (= tv "false"))
|
|
(begin (advance-tok!) (list :bool false)))
|
|
((= tt "ident") (begin (advance-tok!) (list :var tv)))
|
|
((= tt "ctor") (begin (advance-tok!) (list :con tv)))
|
|
((and (= tt "op") (= tv "("))
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((at-op? ")") (begin (advance-tok!) (list :unit)))
|
|
(else
|
|
(let
|
|
((e (parse-expr)))
|
|
(begin (consume! "op" ")") e))))))
|
|
((and (= tt "op") (= tv "["))
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((at-op? "]") (begin (advance-tok!) (list :list)))
|
|
(else
|
|
(let
|
|
((items (list)))
|
|
(begin
|
|
(append! items (parse-expr))
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-op? ";")
|
|
(begin
|
|
(advance-tok!)
|
|
(when
|
|
(not (at-op? "]"))
|
|
(begin
|
|
(append! items (parse-expr))
|
|
(loop)))))))
|
|
(loop)
|
|
(consume! "op" "]")
|
|
(cons :list items)))))))
|
|
((at-kw? "begin")
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((e (parse-expr)))
|
|
(begin (consume! "keyword" "end") e))))
|
|
(else
|
|
(error
|
|
(str
|
|
"ocaml-parse: unexpected token "
|
|
tt
|
|
" "
|
|
tv
|
|
" at idx "
|
|
idx)))))))
|
|
(define
|
|
at-app-start?
|
|
(fn
|
|
()
|
|
(let
|
|
((tt (ocaml-tok-type (peek-tok)))
|
|
(tv (ocaml-tok-value (peek-tok))))
|
|
(cond
|
|
((= tt "number") true)
|
|
((= tt "string") true)
|
|
((= tt "char") true)
|
|
((= tt "ident") true)
|
|
((= tt "ctor") true)
|
|
((and (= tt "keyword") (or (= tv "true") (= tv "false") (= tv "begin")))
|
|
true)
|
|
((and (= tt "op") (or (= tv "(") (= tv "["))) true)
|
|
(else false)))))
|
|
(set!
|
|
parse-app
|
|
(fn
|
|
()
|
|
(let
|
|
((head (parse-atom)))
|
|
(begin
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-app-start?)
|
|
(let
|
|
((arg (parse-atom)))
|
|
(begin (set! head (list :app head arg)) (loop))))))
|
|
(loop)
|
|
head))))
|
|
(set!
|
|
parse-prefix
|
|
(fn
|
|
()
|
|
(cond
|
|
((at-op? "-")
|
|
(begin (advance-tok!) (list :neg (parse-prefix))))
|
|
((at-kw? "not")
|
|
(begin (advance-tok!) (list :not (parse-prefix))))
|
|
(else (parse-app)))))
|
|
(set!
|
|
parse-binop-rhs
|
|
(fn
|
|
(lhs min-prec)
|
|
(let
|
|
((tok (peek-tok)))
|
|
(cond
|
|
((not (ocaml-tok-is-binop? tok)) lhs)
|
|
(else
|
|
(let
|
|
((op (ocaml-tok-value tok))
|
|
(prec (ocaml-binop-prec (ocaml-tok-value tok))))
|
|
(cond
|
|
((< prec min-prec) lhs)
|
|
(else
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((rhs (parse-prefix))
|
|
(next-min
|
|
(if
|
|
(ocaml-binop-right? op)
|
|
prec
|
|
(+ prec 1))))
|
|
(begin
|
|
(set! rhs (parse-binop-rhs rhs next-min))
|
|
(parse-binop-rhs (list :op op lhs rhs) min-prec))))))))))))
|
|
(define
|
|
parse-binary
|
|
(fn
|
|
()
|
|
(let ((lhs (parse-prefix))) (parse-binop-rhs lhs 1))))
|
|
(set!
|
|
parse-tuple
|
|
(fn
|
|
()
|
|
(let
|
|
((first (parse-binary)))
|
|
(cond
|
|
((at-op? ",")
|
|
(let
|
|
((items (list first)))
|
|
(begin
|
|
(define
|
|
loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-op? ",")
|
|
(begin
|
|
(advance-tok!)
|
|
(append! items (parse-binary))
|
|
(loop)))))
|
|
(loop)
|
|
(cons :tuple items))))
|
|
(else first)))))
|
|
(define
|
|
parse-fun
|
|
(fn
|
|
()
|
|
(let
|
|
((params (list)))
|
|
(begin
|
|
(define
|
|
collect-params
|
|
(fn
|
|
()
|
|
(when
|
|
(check-tok? "ident" nil)
|
|
(begin
|
|
(append! params (ocaml-tok-value (peek-tok)))
|
|
(advance-tok!)
|
|
(collect-params)))))
|
|
(collect-params)
|
|
(when
|
|
(= (len params) 0)
|
|
(error "ocaml-parse: fun expects at least one parameter"))
|
|
(consume! "op" "->")
|
|
(let ((body (parse-expr))) (list :fun params body))))))
|
|
(define
|
|
parse-let
|
|
(fn
|
|
()
|
|
(let
|
|
((reccy false))
|
|
(begin
|
|
(when
|
|
(at-kw? "rec")
|
|
(begin (advance-tok!) (set! reccy true)))
|
|
(let
|
|
((name (ocaml-tok-value (consume! "ident" nil)))
|
|
(params (list)))
|
|
(begin
|
|
(define
|
|
collect-params
|
|
(fn
|
|
()
|
|
(when
|
|
(check-tok? "ident" nil)
|
|
(begin
|
|
(append! params (ocaml-tok-value (peek-tok)))
|
|
(advance-tok!)
|
|
(collect-params)))))
|
|
(collect-params)
|
|
(consume! "op" "=")
|
|
(let
|
|
((rhs (parse-expr)))
|
|
(begin
|
|
(consume! "keyword" "in")
|
|
(let
|
|
((body (parse-expr)))
|
|
(if
|
|
reccy
|
|
(list :let-rec name params rhs body)
|
|
(list :let name params rhs body)))))))))))
|
|
(define
|
|
parse-if
|
|
(fn
|
|
()
|
|
(let
|
|
((cond-expr (parse-expr)))
|
|
(begin
|
|
(consume! "keyword" "then")
|
|
(let
|
|
((then-expr (parse-expr)))
|
|
(cond
|
|
((at-kw? "else")
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((else-expr (parse-expr)))
|
|
(list :if cond-expr then-expr else-expr))))
|
|
(else (list :if cond-expr then-expr (list :unit)))))))))
|
|
(set!
|
|
parse-expr
|
|
(fn
|
|
()
|
|
(cond
|
|
((at-kw? "fun") (begin (advance-tok!) (parse-fun)))
|
|
((at-kw? "let") (begin (advance-tok!) (parse-let)))
|
|
((at-kw? "if") (begin (advance-tok!) (parse-if)))
|
|
(else (parse-tuple)))))
|
|
(let
|
|
((result (parse-expr)))
|
|
(begin
|
|
(when
|
|
(not (= (ocaml-tok-type (peek-tok)) "eof"))
|
|
(error
|
|
(str
|
|
"ocaml-parse: trailing tokens at idx "
|
|
idx
|
|
" — got "
|
|
(ocaml-tok-type (peek-tok))
|
|
" "
|
|
(ocaml-tok-value (peek-tok)))))
|
|
result))))))
|