Files
rose-ash/lib/haskell/parser.sx
giles 58dbbc5d8b
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
haskell: full patterns — as/lazy/negative/infix + lambda & let pat LHS (+18 tests, 138/138)
2026-04-24 18:34:47 +00:00

925 lines
31 KiB
Plaintext

;; Haskell 98 expression parser.
;;
;; Input: the post-layout token list from (hk-layout (hk-tokenize src)).
;; Output: an AST. Nodes are plain lists tagged by a keyword head
;; (keywords evaluate to their string name, so `(list :var "x")` is
;; indistinguishable from `(list "var" "x")` at runtime — this lets
;; tests literally write `(list :var "x")` on both sides).
;;
;; Scope (this iteration — expressions only):
;; atoms int/float/string/char/var/con, parens, tuple, list, range
;; application left-associative, f x y z
;; prefix - unary negation on an lexp
;; infix ops precedence-climbing, full Haskell 98 default table
;; lambda \x y -> body
;; if if c then t else e
;; let let { x = e ; y = e } in body (uses layout braces)
;;
;; AST shapes:
;; (:int N)
;; (:float F)
;; (:string S)
;; (:char C)
;; (:var NAME)
;; (:con NAME)
;; (:app FN ARG) — binary, chain for multi-arg
;; (:op OP LHS RHS) — binary infix
;; (:neg E)
;; (:tuple ITEMS) — ITEMS is a list of AST nodes
;; (:list ITEMS) — enumerated list
;; (:range FROM TO) — [from..to]
;; (:range-step FROM NEXT TO) — [from,next..to]
;; (:if C T E)
;; (:lambda PARAMS BODY) — PARAMS is list of varids
;; (:let BINDS BODY) — BINDS is list of (:bind NAME EXPR)
;; ── Operator precedence table (Haskell 98 defaults) ──────────────
(define
hk-op-prec-table
(let
((t (dict)))
(dict-set! t "!!" {:prec 9 :assoc "left"})
(dict-set! t "." {:prec 9 :assoc "right"})
(dict-set! t "^" {:prec 8 :assoc "right"})
(dict-set! t "^^" {:prec 8 :assoc "right"})
(dict-set! t "**" {:prec 8 :assoc "right"})
(dict-set! t "*" {:prec 7 :assoc "left"})
(dict-set! t "/" {:prec 7 :assoc "left"})
(dict-set! t "+" {:prec 6 :assoc "left"})
(dict-set! t "-" {:prec 6 :assoc "left"})
(dict-set! t ":" {:prec 5 :assoc "right"})
(dict-set! t "++" {:prec 5 :assoc "right"})
(dict-set! t "==" {:prec 4 :assoc "non"})
(dict-set! t "/=" {:prec 4 :assoc "non"})
(dict-set! t "<" {:prec 4 :assoc "non"})
(dict-set! t "<=" {:prec 4 :assoc "non"})
(dict-set! t ">" {:prec 4 :assoc "non"})
(dict-set! t ">=" {:prec 4 :assoc "non"})
(dict-set! t "&&" {:prec 3 :assoc "right"})
(dict-set! t "||" {:prec 2 :assoc "right"})
(dict-set! t ">>" {:prec 1 :assoc "left"})
(dict-set! t ">>=" {:prec 1 :assoc "left"})
(dict-set! t "=<<" {:prec 1 :assoc "right"})
(dict-set! t "$" {:prec 0 :assoc "right"})
(dict-set! t "$!" {:prec 0 :assoc "right"})
t))
(define
hk-op-info
(fn
(op)
(if
(has-key? hk-op-prec-table op)
(get hk-op-prec-table op)
{:prec 9 :assoc "left"})))
;; ── Atom-start predicate ─────────────────────────────────────────
(define
hk-atom-start?
(fn
(tok)
(if
(nil? tok)
false
(let
((ty (get tok "type")))
(or
(= ty "integer")
(= ty "float")
(= ty "string")
(= ty "char")
(= ty "varid")
(= ty "conid")
(= ty "qvarid")
(= ty "qconid")
(= ty "lparen")
(= ty "lbracket"))))))
;; apat-start? — what can begin an atomic pattern
(define
hk-apat-start?
(fn
(tok)
(if
(nil? tok)
false
(let
((ty (get tok "type")) (val (get tok "value")))
(or
(and (= ty "reserved") (= val "_"))
(= ty "integer")
(= ty "float")
(= ty "string")
(= ty "char")
(= ty "varid")
(= ty "conid")
(= ty "qconid")
(= ty "lparen")
(= ty "lbracket")
(and (= ty "varsym") (= val "-"))
(and (= ty "reservedop") (= val "~")))))))
;; ── Main entry ───────────────────────────────────────────────────
(define
hk-parse-expr
(fn
(tokens)
(let
((toks tokens) (pos 0) (n (len tokens)))
(define hk-peek (fn () (if (< pos n) (nth toks pos) nil)))
(define
hk-peek-at
(fn
(offset)
(if (< (+ pos offset) n) (nth toks (+ pos offset)) nil)))
(define
hk-advance!
(fn () (let ((t (hk-peek))) (set! pos (+ pos 1)) t)))
(define
hk-peek-type
(fn () (let ((t (hk-peek))) (if (nil? t) "<eof>" (get t "type")))))
(define
hk-peek-value
(fn () (let ((t (hk-peek))) (if (nil? t) nil (get t "value")))))
(define
hk-match?
(fn
(ty v)
(let
((t (hk-peek)))
(and
(not (nil? t))
(= (get t "type") ty)
(or (nil? v) (= (get t "value") v))))))
(define
hk-err
(fn
(msg)
(raise
(str
"parse error: "
msg
" (at "
(hk-peek-type)
(if (nil? (hk-peek-value)) "" (str " " (hk-peek-value)))
")"))))
(define
hk-expect!
(fn
(ty v)
(if
(hk-match? ty v)
(hk-advance!)
(hk-err
(str "expected " ty (if (nil? v) "" (str " '" v "'")))))))
;; ── Atoms ────────────────────────────────────────────────
(define
hk-parse-aexp
(fn
()
(let
((t (hk-peek)))
(cond
((nil? t) (hk-err "unexpected end of input"))
((= (get t "type") "integer")
(do (hk-advance!) (list :int (get t "value"))))
((= (get t "type") "float")
(do (hk-advance!) (list :float (get t "value"))))
((= (get t "type") "string")
(do (hk-advance!) (list :string (get t "value"))))
((= (get t "type") "char")
(do (hk-advance!) (list :char (get t "value"))))
((= (get t "type") "varid")
(do (hk-advance!) (list :var (get t "value"))))
((= (get t "type") "conid")
(do (hk-advance!) (list :con (get t "value"))))
((= (get t "type") "qvarid")
(do (hk-advance!) (list :var (get t "value"))))
((= (get t "type") "qconid")
(do (hk-advance!) (list :con (get t "value"))))
((= (get t "type") "lparen") (hk-parse-parens))
((= (get t "type") "lbracket") (hk-parse-list-lit))
(:else (hk-err "unexpected token in expression"))))))
;; ── Parens / tuple / unit ────────────────────────────────
(define
hk-parse-parens
(fn
()
(hk-expect! "lparen" nil)
(cond
((hk-match? "rparen" nil)
(do (hk-advance!) (list :con "()")))
(:else
(let
((first-e (hk-parse-expr-inner))
(items (list))
(is-tuple false))
(append! items first-e)
(define
hk-tup-loop
(fn
()
(when
(hk-match? "comma" nil)
(do
(hk-advance!)
(set! is-tuple true)
(append! items (hk-parse-expr-inner))
(hk-tup-loop)))))
(hk-tup-loop)
(hk-expect! "rparen" nil)
(if is-tuple (list :tuple items) first-e))))))
;; ── List literal / range ─────────────────────────────────
(define
hk-parse-list-lit
(fn
()
(hk-expect! "lbracket" nil)
(cond
((hk-match? "rbracket" nil)
(do (hk-advance!) (list :list (list))))
(:else
(let
((first-e (hk-parse-expr-inner)))
(cond
((hk-match? "reservedop" "..")
(do
(hk-advance!)
(let
((end-e (hk-parse-expr-inner)))
(hk-expect! "rbracket" nil)
(list :range first-e end-e))))
((hk-match? "comma" nil)
(do
(hk-advance!)
(let
((second-e (hk-parse-expr-inner)))
(cond
((hk-match? "reservedop" "..")
(do
(hk-advance!)
(let
((end-e (hk-parse-expr-inner)))
(hk-expect! "rbracket" nil)
(list
:range-step
first-e
second-e
end-e))))
(:else
(let
((items (list)))
(append! items first-e)
(append! items second-e)
(define
hk-list-loop
(fn
()
(when
(hk-match? "comma" nil)
(do
(hk-advance!)
(append!
items
(hk-parse-expr-inner))
(hk-list-loop)))))
(hk-list-loop)
(hk-expect! "rbracket" nil)
(list :list items)))))))
(:else
(do
(hk-expect! "rbracket" nil)
(list :list (list first-e))))))))))
;; ── Application: left-assoc aexp chain ───────────────────
(define
hk-parse-fexp
(fn
()
(let
((fn-e (hk-parse-aexp)))
(define
hk-app-loop
(fn
()
(when
(hk-atom-start? (hk-peek))
(let
((arg (hk-parse-aexp)))
(set! fn-e (list :app fn-e arg))
(hk-app-loop)))))
(hk-app-loop)
fn-e)))
;; ── Lambda: \ apat1 apat2 ... apatn -> body ──────────────
(define
hk-parse-lambda
(fn
()
(hk-expect! "reservedop" "\\")
(let
((params (list)))
(when
(not (hk-apat-start? (hk-peek)))
(hk-err "lambda needs at least one pattern parameter"))
(define
hk-lam-loop
(fn
()
(when
(hk-apat-start? (hk-peek))
(do
(append! params (hk-parse-apat))
(hk-lam-loop)))))
(hk-lam-loop)
(hk-expect! "reservedop" "->")
(list :lambda params (hk-parse-expr-inner)))))
;; ── if-then-else ────────────────────────────────────────
(define
hk-parse-if
(fn
()
(hk-expect! "reserved" "if")
(let
((c (hk-parse-expr-inner)))
(hk-expect! "reserved" "then")
(let
((th (hk-parse-expr-inner)))
(hk-expect! "reserved" "else")
(let
((el (hk-parse-expr-inner)))
(list :if c th el))))))
;; ── Let expression ──────────────────────────────────────
(define
hk-parse-let
(fn
()
(hk-expect! "reserved" "let")
(let ((explicit (hk-match? "lbrace" nil)))
(if
explicit
(hk-advance!)
(hk-expect! "vlbrace" nil))
(let
((binds (list)))
(when
(not
(if
explicit
(hk-match? "rbrace" nil)
(hk-match? "vrbrace" nil)))
(do
(append! binds (hk-parse-bind))
(define
hk-let-loop
(fn
()
(when
(or
(hk-match? "semi" nil)
(hk-match? "vsemi" nil))
(do
(hk-advance!)
(when
(not
(if
explicit
(hk-match? "rbrace" nil)
(hk-match? "vrbrace" nil)))
(append! binds (hk-parse-bind)))
(hk-let-loop)))))
(hk-let-loop)))
(if
explicit
(hk-expect! "rbrace" nil)
(hk-expect! "vrbrace" nil))
(hk-expect! "reserved" "in")
(list :let binds (hk-parse-expr-inner))))))
;; Binding LHS is a pattern. Simple `x = e` parses as
;; (:bind (:p-var "x") e); pattern bindings like
;; `(x, y) = pair` parse with a p-tuple LHS.
(define
hk-parse-bind
(fn
()
(let
((pat (hk-parse-pat)))
(hk-expect! "reservedop" "=")
(list :bind pat (hk-parse-expr-inner)))))
;; ── Patterns ─────────────────────────────────────────────
(define
hk-parse-apat
(fn
()
(let
((t (hk-peek)))
(cond
((nil? t) (hk-err "unexpected end of input in pattern"))
((and
(= (get t "type") "reserved")
(= (get t "value") "_"))
(do (hk-advance!) (list :p-wild)))
((and
(= (get t "type") "reservedop")
(= (get t "value") "~"))
(do (hk-advance!) (list :p-lazy (hk-parse-apat))))
((and
(= (get t "type") "varsym")
(= (get t "value") "-"))
(do
(hk-advance!)
(let
((n (hk-peek)))
(cond
((nil? n)
(hk-err "expected numeric literal after '-'"))
((= (get n "type") "integer")
(do
(hk-advance!)
(list :p-int (- 0 (get n "value")))))
((= (get n "type") "float")
(do
(hk-advance!)
(list :p-float (- 0 (get n "value")))))
(:else
(hk-err
"only numeric literals may follow '-' in a pattern"))))))
((= (get t "type") "integer")
(do (hk-advance!) (list :p-int (get t "value"))))
((= (get t "type") "float")
(do (hk-advance!) (list :p-float (get t "value"))))
((= (get t "type") "string")
(do (hk-advance!) (list :p-string (get t "value"))))
((= (get t "type") "char")
(do (hk-advance!) (list :p-char (get t "value"))))
((= (get t "type") "varid")
(let
((next-t (hk-peek-at 1)))
(cond
((and
(not (nil? next-t))
(= (get next-t "type") "reservedop")
(= (get next-t "value") "@"))
(do
(hk-advance!)
(hk-advance!)
(list :p-as (get t "value") (hk-parse-apat))))
(:else
(do (hk-advance!) (list :p-var (get t "value")))))))
((= (get t "type") "conid")
(do
(hk-advance!)
(list :p-con (get t "value") (list))))
((= (get t "type") "qconid")
(do
(hk-advance!)
(list :p-con (get t "value") (list))))
((= (get t "type") "lparen") (hk-parse-paren-pat))
((= (get t "type") "lbracket") (hk-parse-list-pat))
(:else (hk-err "unexpected token in pattern"))))))
(define
hk-parse-paren-pat
(fn
()
(hk-expect! "lparen" nil)
(cond
((hk-match? "rparen" nil)
(do (hk-advance!) (list :p-con "()" (list))))
(:else
(let
((first-p (hk-parse-pat))
(items (list))
(is-tup false))
(append! items first-p)
(define
hk-ppt-loop
(fn
()
(when
(hk-match? "comma" nil)
(do
(hk-advance!)
(set! is-tup true)
(append! items (hk-parse-pat))
(hk-ppt-loop)))))
(hk-ppt-loop)
(hk-expect! "rparen" nil)
(if is-tup (list :p-tuple items) first-p))))))
(define
hk-parse-list-pat
(fn
()
(hk-expect! "lbracket" nil)
(cond
((hk-match? "rbracket" nil)
(do (hk-advance!) (list :p-list (list))))
(:else
(let
((items (list)))
(append! items (hk-parse-pat))
(define
hk-plt-loop
(fn
()
(when
(hk-match? "comma" nil)
(do
(hk-advance!)
(append! items (hk-parse-pat))
(hk-plt-loop)))))
(hk-plt-loop)
(hk-expect! "rbracket" nil)
(list :p-list items))))))
(define
hk-parse-pat-lhs
(fn
()
(let
((t (hk-peek)))
(cond
((and
(not (nil? t))
(or
(= (get t "type") "conid")
(= (get t "type") "qconid")))
(let
((name (get (hk-advance!) "value")) (args (list)))
(define
hk-pca-loop
(fn
()
(when
(hk-apat-start? (hk-peek))
(do
(append! args (hk-parse-apat))
(hk-pca-loop)))))
(hk-pca-loop)
(list :p-con name args)))
(:else (hk-parse-apat))))))
;; Infix constructor patterns: `x : xs`, `a `Cons` b`, etc.
;; Right-associative, single precedence band.
(define
hk-parse-pat
(fn
()
(let
((left (hk-parse-pat-lhs)))
(cond
((or
(= (hk-peek-type) "consym")
(and
(= (hk-peek-type) "reservedop")
(= (hk-peek-value) ":")))
(let
((op (get (hk-advance!) "value")))
(let
((right (hk-parse-pat)))
(list :p-con op (list left right)))))
(:else left)))))
;; ── case ─ of { pat -> expr ; ... } ─────────────────────
(define
hk-parse-alt
(fn
()
(let
((pat (hk-parse-pat)))
(hk-expect! "reservedop" "->")
(list :alt pat (hk-parse-expr-inner)))))
(define
hk-parse-case
(fn
()
(hk-expect! "reserved" "case")
(let
((scrut (hk-parse-expr-inner)))
(hk-expect! "reserved" "of")
(let
((explicit (hk-match? "lbrace" nil)))
(if
explicit
(hk-advance!)
(hk-expect! "vlbrace" nil))
(let
((alts (list)))
(when
(not
(if
explicit
(hk-match? "rbrace" nil)
(hk-match? "vrbrace" nil)))
(do
(append! alts (hk-parse-alt))
(define
hk-case-loop
(fn
()
(when
(or
(hk-match? "semi" nil)
(hk-match? "vsemi" nil))
(do
(hk-advance!)
(when
(not
(if
explicit
(hk-match? "rbrace" nil)
(hk-match? "vrbrace" nil)))
(append! alts (hk-parse-alt)))
(hk-case-loop)))))
(hk-case-loop)))
(if
explicit
(hk-expect! "rbrace" nil)
(hk-expect! "vrbrace" nil))
(list :case scrut alts))))))
;; ── do { stmt ; stmt ; ... } ────────────────────────────
;; Scan ahead (respecting paren/bracket/brace depth) for a `<-`
;; before the next `;` / `}` — distinguishes `pat <- e` from a
;; bare expression statement.
(define
hk-do-stmt-is-bind?
(fn
()
(let
((j pos) (depth 0) (found false) (done false))
(define
hk-scan-loop
(fn
()
(when
(and (not done) (< j n))
(let
((t (nth toks j)) (ty nil))
(set! ty (get t "type"))
(cond
((and
(= depth 0)
(or
(= ty "semi")
(= ty "vsemi")
(= ty "rbrace")
(= ty "vrbrace")))
(set! done true))
((and
(= depth 0)
(= ty "reservedop")
(= (get t "value") "<-"))
(do (set! found true) (set! done true)))
((or
(= ty "lparen")
(= ty "lbracket")
(= ty "lbrace")
(= ty "vlbrace"))
(set! depth (+ depth 1)))
((or
(= ty "rparen")
(= ty "rbracket"))
(set! depth (- depth 1)))
(:else nil))
(set! j (+ j 1))
(hk-scan-loop)))))
(hk-scan-loop)
found)))
(define
hk-parse-do-let
(fn
()
(hk-expect! "reserved" "let")
(let ((explicit (hk-match? "lbrace" nil)))
(if
explicit
(hk-advance!)
(hk-expect! "vlbrace" nil))
(let
((binds (list)))
(when
(not
(if
explicit
(hk-match? "rbrace" nil)
(hk-match? "vrbrace" nil)))
(do
(append! binds (hk-parse-bind))
(define
hk-dlet-loop
(fn
()
(when
(or
(hk-match? "semi" nil)
(hk-match? "vsemi" nil))
(do
(hk-advance!)
(when
(not
(if
explicit
(hk-match? "rbrace" nil)
(hk-match? "vrbrace" nil)))
(append! binds (hk-parse-bind)))
(hk-dlet-loop)))))
(hk-dlet-loop)))
(if
explicit
(hk-expect! "rbrace" nil)
(hk-expect! "vrbrace" nil))
(list :do-let binds)))))
(define
hk-parse-do-stmt
(fn
()
(cond
((hk-match? "reserved" "let") (hk-parse-do-let))
((hk-do-stmt-is-bind?)
(let
((pat (hk-parse-pat)))
(hk-expect! "reservedop" "<-")
(list :do-bind pat (hk-parse-expr-inner))))
(:else (list :do-expr (hk-parse-expr-inner))))))
(define
hk-parse-do
(fn
()
(hk-expect! "reserved" "do")
(let
((explicit (hk-match? "lbrace" nil)))
(if
explicit
(hk-advance!)
(hk-expect! "vlbrace" nil))
(let
((stmts (list)))
(when
(not
(if
explicit
(hk-match? "rbrace" nil)
(hk-match? "vrbrace" nil)))
(do
(append! stmts (hk-parse-do-stmt))
(define
hk-do-loop
(fn
()
(when
(or
(hk-match? "semi" nil)
(hk-match? "vsemi" nil))
(do
(hk-advance!)
(when
(not
(if
explicit
(hk-match? "rbrace" nil)
(hk-match? "vrbrace" nil)))
(append! stmts (hk-parse-do-stmt)))
(hk-do-loop)))))
(hk-do-loop)))
(if
explicit
(hk-expect! "rbrace" nil)
(hk-expect! "vrbrace" nil))
(list :do stmts)))))
;; ── lexp: lambda | if | let | case | do | fexp ──────────
(define
hk-parse-lexp
(fn
()
(cond
((hk-match? "reservedop" "\\") (hk-parse-lambda))
((hk-match? "reserved" "if") (hk-parse-if))
((hk-match? "reserved" "let") (hk-parse-let))
((hk-match? "reserved" "case") (hk-parse-case))
((hk-match? "reserved" "do") (hk-parse-do))
(:else (hk-parse-fexp)))))
;; ── Prefix: unary - ─────────────────────────────────────
(define
hk-parse-prefix
(fn
()
(cond
((and (hk-match? "varsym" "-"))
(do (hk-advance!) (list :neg (hk-parse-lexp))))
(:else (hk-parse-lexp)))))
;; ── Infix: precedence climbing ──────────────────────────
(define
hk-is-infix-op?
(fn
(tok)
(if
(nil? tok)
false
(or
(= (get tok "type") "varsym")
(= (get tok "type") "consym")
(and
(= (get tok "type") "reservedop")
(= (get tok "value") ":"))
(= (get tok "type") "backtick")))))
(define
hk-consume-op!
(fn
()
(let
((t (hk-peek)))
(cond
((= (get t "type") "backtick")
(do
(hk-advance!)
(let
((v (hk-expect! "varid" nil)))
(hk-expect! "backtick" nil)
(get v "value"))))
(:else (do (hk-advance!) (get t "value")))))))
(define
hk-parse-infix
(fn
(min-prec)
(let
((left (hk-parse-prefix)))
(define
hk-inf-loop
(fn
()
(when
(hk-is-infix-op? (hk-peek))
(let
((op-tok (hk-peek)))
(let
((op-name
(if
(= (get op-tok "type") "backtick")
(get (hk-peek-at 1) "value")
(get op-tok "value"))))
(let
((info (hk-op-info op-name)))
(when
(>= (get info "prec") min-prec)
(do
(hk-consume-op!)
(let
((next-min
(cond
((= (get info "assoc") "left")
(+ (get info "prec") 1))
((= (get info "assoc") "right")
(get info "prec"))
(:else (+ (get info "prec") 1)))))
(let
((right (hk-parse-infix next-min)))
(set!
left
(list :op op-name left right))
(hk-inf-loop)))))))))))
(hk-inf-loop)
left)))
(define hk-parse-expr-inner (fn () (hk-parse-infix 0)))
;; ── Top-level: strip leading/trailing module-level braces ─
(let
((start-brace
(or
(hk-match? "vlbrace" nil)
(hk-match? "lbrace" nil))))
(when start-brace (hk-advance!))
(let
((result (hk-parse-expr-inner)))
(when start-brace
(when
(or
(hk-match? "vrbrace" nil)
(hk-match? "rbrace" nil))
(hk-advance!)))
result)))))
;; ── Convenience: tokenize + layout + parse ───────────────────────
(define
hk-parse
(fn (src) (hk-parse-expr (hk-layout (hk-tokenize src)))))