From 869b0b552d8744a65347717360e678b3bd542125 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 24 Apr 2026 19:06:38 +0000 Subject: [PATCH] haskell: top-level decls (fn-clause, type-sig, data, type, newtype, fixity) + type parser (+24 tests, 162/162) --- lib/haskell/parser.sx | 451 +++++++++++++++++++++++++++++- lib/haskell/tests/parser-decls.sx | 273 ++++++++++++++++++ plans/haskell-on-sx.md | 27 +- 3 files changed, 747 insertions(+), 4 deletions(-) create mode 100644 lib/haskell/tests/parser-decls.sx diff --git a/lib/haskell/parser.sx b/lib/haskell/parser.sx index 07db0617..1b442e2d 100644 --- a/lib/haskell/parser.sx +++ b/lib/haskell/parser.sx @@ -119,11 +119,28 @@ (and (= ty "varsym") (= val "-")) (and (= ty "reservedop") (= val "~"))))))) +;; ── Atype-start predicate (types) ─────────────────────────────── +(define + hk-atype-start? + (fn + (tok) + (if + (nil? tok) + false + (let + ((ty (get tok "type"))) + (or + (= ty "conid") + (= ty "qconid") + (= ty "varid") + (= ty "lparen") + (= ty "lbracket")))))) + ;; ── Main entry ─────────────────────────────────────────────────── (define - hk-parse-expr + hk-parser (fn - (tokens) + (tokens mode) (let ((toks tokens) (pos 0) (n (len tokens))) @@ -901,6 +918,423 @@ (define hk-parse-expr-inner (fn () (hk-parse-infix 0))) + ;; ── Types ──────────────────────────────────────────────── + ;; AST: (:t-var N) | (:t-con N) | (:t-app F A) + ;; (:t-fun A B) | (:t-tuple ITEMS) | (:t-list T) + (define + hk-parse-paren-type + (fn + () + (hk-expect! "lparen" nil) + (cond + ((hk-match? "rparen" nil) + (do (hk-advance!) (list :t-con "()"))) + (:else + (let + ((first-t (hk-parse-type)) + (items (list)) + (is-tup false)) + (append! items first-t) + (define + hk-pt-loop + (fn + () + (when + (hk-match? "comma" nil) + (do + (hk-advance!) + (set! is-tup true) + (append! items (hk-parse-type)) + (hk-pt-loop))))) + (hk-pt-loop) + (hk-expect! "rparen" nil) + (if is-tup (list :t-tuple items) first-t)))))) + + (define + hk-parse-list-type + (fn + () + (hk-expect! "lbracket" nil) + (cond + ((hk-match? "rbracket" nil) + (do (hk-advance!) (list :t-con "[]"))) + (:else + (let + ((inner (hk-parse-type))) + (hk-expect! "rbracket" nil) + (list :t-list inner)))))) + + (define + hk-parse-atype + (fn + () + (let + ((t (hk-peek))) + (cond + ((nil? t) (hk-err "unexpected end of input in type")) + ((= (get t "type") "conid") + (do (hk-advance!) (list :t-con (get t "value")))) + ((= (get t "type") "qconid") + (do (hk-advance!) (list :t-con (get t "value")))) + ((= (get t "type") "varid") + (do (hk-advance!) (list :t-var (get t "value")))) + ((= (get t "type") "lparen") (hk-parse-paren-type)) + ((= (get t "type") "lbracket") (hk-parse-list-type)) + (:else (hk-err "unexpected token in type")))))) + + (define + hk-parse-btype + (fn + () + (let + ((head (hk-parse-atype))) + (define + hk-bt-loop + (fn + () + (when + (hk-atype-start? (hk-peek)) + (do + (set! head (list :t-app head (hk-parse-atype))) + (hk-bt-loop))))) + (hk-bt-loop) + head))) + + (define + hk-parse-type + (fn + () + (let + ((left (hk-parse-btype))) + (cond + ((hk-match? "reservedop" "->") + (do (hk-advance!) (list :t-fun left (hk-parse-type)))) + (:else left))))) + + ;; ── Top-level declarations ────────────────────────────── + ;; AST: + ;; (:fun-clause NAME APATS BODY) + ;; (:pat-bind PAT BODY) + ;; (:type-sig NAMES TYPE) + ;; (:data NAME TVARS CONS) — CONS is list of :con-def + ;; (:con-def CNAME FIELDS) — FIELDS is list of types + ;; (:type-syn NAME TVARS TYPE) + ;; (:newtype NAME TVARS CNAME FIELD) + ;; (:fixity ASSOC PREC OPS) — ASSOC ∈ "l" | "r" | "n" + ;; (:program DECLS) + + ;; Scan ahead for a top-level `::` (respecting paren/bracket + ;; depth) before the next statement terminator. Used to tell a + ;; type signature apart from a function clause. + (define + hk-has-top-dcolon? + (fn + () + (let + ((j pos) (depth 0) (found false) (done false)) + (define + hk-dcol-loop + (fn + () + (when + (and (not done) (< j n)) + (let + ((t (nth toks j)) (ty (get t "type"))) + (cond + ((and + (= depth 0) + (or + (= ty "vsemi") + (= ty "semi") + (= ty "rbrace") + (= ty "vrbrace"))) + (set! done true)) + ((and + (= depth 0) + (= ty "reservedop") + (= (get t "value") "::")) + (do (set! found true) (set! done true))) + ((or + (= ty "lparen") + (= ty "lbracket") + (= ty "lbrace") + (= ty "vlbrace")) + (set! depth (+ depth 1))) + ((or + (= ty "rparen") + (= ty "rbracket")) + (set! depth (- depth 1))) + (:else nil)) + (set! j (+ j 1)) + (hk-dcol-loop))))) + (hk-dcol-loop) + found))) + + (define + hk-parse-type-sig + (fn + () + (let + ((names (list))) + (when + (not (hk-match? "varid" nil)) + (hk-err "type signature must start with a variable")) + (append! names (get (hk-advance!) "value")) + (define + hk-sig-loop + (fn + () + (when + (hk-match? "comma" nil) + (do + (hk-advance!) + (when + (not (hk-match? "varid" nil)) + (hk-err "expected name after ','")) + (append! names (get (hk-advance!) "value")) + (hk-sig-loop))))) + (hk-sig-loop) + (hk-expect! "reservedop" "::") + (list :type-sig names (hk-parse-type))))) + + (define + hk-parse-fun-clause + (fn + () + (let + ((t (hk-peek))) + (cond + ((and + (not (nil? t)) + (= (get t "type") "varid")) + (let + ((name (get (hk-advance!) "value")) + (pats (list))) + (define + hk-fc-loop + (fn + () + (when + (hk-apat-start? (hk-peek)) + (do + (append! pats (hk-parse-apat)) + (hk-fc-loop))))) + (hk-fc-loop) + (hk-expect! "reservedop" "=") + (list :fun-clause name pats (hk-parse-expr-inner)))) + (:else + (let + ((pat (hk-parse-pat))) + (hk-expect! "reservedop" "=") + (list :pat-bind pat (hk-parse-expr-inner)))))))) + + (define + hk-parse-con-def + (fn + () + (when + (not (hk-match? "conid" nil)) + (hk-err "expected constructor name")) + (let + ((name (get (hk-advance!) "value")) (fields (list))) + (define + hk-cd-loop + (fn + () + (when + (hk-atype-start? (hk-peek)) + (do + (append! fields (hk-parse-atype)) + (hk-cd-loop))))) + (hk-cd-loop) + (list :con-def name fields)))) + + (define + hk-parse-tvars + (fn + () + (let ((vs (list))) + (define + hk-tv-loop + (fn + () + (when + (hk-match? "varid" nil) + (do + (append! vs (get (hk-advance!) "value")) + (hk-tv-loop))))) + (hk-tv-loop) + vs))) + + (define + hk-parse-data + (fn + () + (hk-expect! "reserved" "data") + (when + (not (hk-match? "conid" nil)) + (hk-err "data declaration needs a type name")) + (let + ((name (get (hk-advance!) "value")) + (tvars (hk-parse-tvars)) + (cons-list (list))) + (when + (hk-match? "reservedop" "=") + (do + (hk-advance!) + (append! cons-list (hk-parse-con-def)) + (define + hk-dc-loop + (fn + () + (when + (hk-match? "reservedop" "|") + (do + (hk-advance!) + (append! cons-list (hk-parse-con-def)) + (hk-dc-loop))))) + (hk-dc-loop))) + (list :data name tvars cons-list)))) + + (define + hk-parse-type-syn + (fn + () + (hk-expect! "reserved" "type") + (when + (not (hk-match? "conid" nil)) + (hk-err "type synonym needs a name")) + (let + ((name (get (hk-advance!) "value")) + (tvars (hk-parse-tvars))) + (hk-expect! "reservedop" "=") + (list :type-syn name tvars (hk-parse-type))))) + + (define + hk-parse-newtype + (fn + () + (hk-expect! "reserved" "newtype") + (when + (not (hk-match? "conid" nil)) + (hk-err "newtype needs a type name")) + (let + ((name (get (hk-advance!) "value")) + (tvars (hk-parse-tvars))) + (hk-expect! "reservedop" "=") + (when + (not (hk-match? "conid" nil)) + (hk-err "newtype needs a constructor name")) + (let + ((cname (get (hk-advance!) "value"))) + (when + (not (hk-atype-start? (hk-peek))) + (hk-err "newtype constructor needs one field")) + (list :newtype name tvars cname (hk-parse-atype)))))) + + (define + hk-parse-op + (fn + () + (cond + ((hk-match? "varsym" nil) + (get (hk-advance!) "value")) + ((hk-match? "consym" nil) + (get (hk-advance!) "value")) + ((and + (hk-match? "reservedop" nil) + (= (hk-peek-value) ":")) + (do (hk-advance!) ":")) + ((hk-match? "backtick" nil) + (do + (hk-advance!) + (let + ((v (hk-expect! "varid" nil))) + (hk-expect! "backtick" nil) + (get v "value")))) + (:else (hk-err "expected operator name in fixity decl"))))) + + (define + hk-parse-fixity + (fn + () + (let ((assoc "n")) + (cond + ((hk-match? "reserved" "infixl") (set! assoc "l")) + ((hk-match? "reserved" "infixr") (set! assoc "r")) + ((hk-match? "reserved" "infix") (set! assoc "n")) + (:else (hk-err "expected fixity keyword"))) + (hk-advance!) + (let ((prec 9)) + (when + (hk-match? "integer" nil) + (set! prec (get (hk-advance!) "value"))) + (let ((ops (list))) + (append! ops (hk-parse-op)) + (define + hk-fx-loop + (fn + () + (when + (hk-match? "comma" nil) + (do + (hk-advance!) + (append! ops (hk-parse-op)) + (hk-fx-loop))))) + (hk-fx-loop) + (list :fixity assoc prec ops)))))) + + (define + hk-parse-decl + (fn + () + (cond + ((hk-match? "reserved" "data") (hk-parse-data)) + ((hk-match? "reserved" "type") (hk-parse-type-syn)) + ((hk-match? "reserved" "newtype") (hk-parse-newtype)) + ((or + (hk-match? "reserved" "infix") + (hk-match? "reserved" "infixl") + (hk-match? "reserved" "infixr")) + (hk-parse-fixity)) + ((hk-has-top-dcolon?) (hk-parse-type-sig)) + (:else (hk-parse-fun-clause))))) + + (define + hk-parse-program + (fn + () + (let ((decls (list))) + (define + hk-prog-at-end? + (fn + () + (or + (nil? (hk-peek)) + (= (hk-peek-type) "eof") + (hk-match? "vrbrace" nil) + (hk-match? "rbrace" nil)))) + (when + (not (hk-prog-at-end?)) + (do + (append! decls (hk-parse-decl)) + (define + hk-prog-loop + (fn + () + (when + (or + (hk-match? "vsemi" nil) + (hk-match? "semi" nil)) + (do + (hk-advance!) + (when + (not (hk-prog-at-end?)) + (append! decls (hk-parse-decl))) + (hk-prog-loop))))) + (hk-prog-loop))) + (list :program decls)))) + ;; ── Top-level: strip leading/trailing module-level braces ─ (let ((start-brace @@ -909,7 +1343,11 @@ (hk-match? "lbrace" nil)))) (when start-brace (hk-advance!)) (let - ((result (hk-parse-expr-inner))) + ((result + (cond + ((= mode :expr) (hk-parse-expr-inner)) + ((= mode :module) (hk-parse-program)) + (:else (hk-err "unknown parser mode"))))) (when start-brace (when (or @@ -918,7 +1356,14 @@ (hk-advance!))) result))))) +(define hk-parse-expr (fn (tokens) (hk-parser tokens :expr))) +(define hk-parse-module (fn (tokens) (hk-parser tokens :module))) + ;; ── Convenience: tokenize + layout + parse ─────────────────────── (define hk-parse (fn (src) (hk-parse-expr (hk-layout (hk-tokenize src))))) + +(define + hk-parse-top + (fn (src) (hk-parse-module (hk-layout (hk-tokenize src))))) diff --git a/lib/haskell/tests/parser-decls.sx b/lib/haskell/tests/parser-decls.sx new file mode 100644 index 00000000..30aeff6a --- /dev/null +++ b/lib/haskell/tests/parser-decls.sx @@ -0,0 +1,273 @@ +;; Top-level declarations: function clauses, type signatures, data, +;; type, newtype, fixity. Driven by hk-parse-top which produces +;; a (:program DECLS) node. + +(define + hk-prog + (fn + (&rest decls) + (list :program decls))) + +;; ── Function clauses & pattern bindings ── +(hk-test + "simple fun-clause" + (hk-parse-top "f x = x + 1") + (hk-prog + (list + :fun-clause + "f" + (list (list :p-var "x")) + (list :op "+" (list :var "x") (list :int 1))))) + +(hk-test + "nullary decl" + (hk-parse-top "answer = 42") + (hk-prog + (list :fun-clause "answer" (list) (list :int 42)))) + +(hk-test + "multi-clause fn (separate defs for each pattern)" + (hk-parse-top "fact 0 = 1\nfact n = n") + (hk-prog + (list :fun-clause "fact" (list (list :p-int 0)) (list :int 1)) + (list + :fun-clause + "fact" + (list (list :p-var "n")) + (list :var "n")))) + +(hk-test + "constructor pattern in fn args" + (hk-parse-top "fromJust (Just x) = x") + (hk-prog + (list + :fun-clause + "fromJust" + (list (list :p-con "Just" (list (list :p-var "x")))) + (list :var "x")))) + +(hk-test + "pattern binding at top level" + (hk-parse-top "(a, b) = pair") + (hk-prog + (list + :pat-bind + (list + :p-tuple + (list (list :p-var "a") (list :p-var "b"))) + (list :var "pair")))) + +;; ── Type signatures ── +(hk-test + "single-name sig" + (hk-parse-top "f :: Int -> Int") + (hk-prog + (list + :type-sig + (list "f") + (list :t-fun (list :t-con "Int") (list :t-con "Int"))))) + +(hk-test + "multi-name sig" + (hk-parse-top "f, g, h :: Int -> Bool") + (hk-prog + (list + :type-sig + (list "f" "g" "h") + (list :t-fun (list :t-con "Int") (list :t-con "Bool"))))) + +(hk-test + "sig with type application" + (hk-parse-top "f :: Maybe a -> a") + (hk-prog + (list + :type-sig + (list "f") + (list + :t-fun + (list :t-app (list :t-con "Maybe") (list :t-var "a")) + (list :t-var "a"))))) + +(hk-test + "sig with list type" + (hk-parse-top "len :: [a] -> Int") + (hk-prog + (list + :type-sig + (list "len") + (list + :t-fun + (list :t-list (list :t-var "a")) + (list :t-con "Int"))))) + +(hk-test + "sig with tuple and right-assoc ->" + (hk-parse-top "pair :: a -> b -> (a, b)") + (hk-prog + (list + :type-sig + (list "pair") + (list + :t-fun + (list :t-var "a") + (list + :t-fun + (list :t-var "b") + (list + :t-tuple + (list (list :t-var "a") (list :t-var "b")))))))) + +(hk-test + "sig + implementation together" + (hk-parse-top "id :: a -> a\nid x = x") + (hk-prog + (list + :type-sig + (list "id") + (list :t-fun (list :t-var "a") (list :t-var "a"))) + (list + :fun-clause + "id" + (list (list :p-var "x")) + (list :var "x")))) + +;; ── data declarations ── +(hk-test + "data Maybe" + (hk-parse-top "data Maybe a = Nothing | Just a") + (hk-prog + (list + :data + "Maybe" + (list "a") + (list + (list :con-def "Nothing" (list)) + (list :con-def "Just" (list (list :t-var "a"))))))) + +(hk-test + "data Either" + (hk-parse-top "data Either a b = Left a | Right b") + (hk-prog + (list + :data + "Either" + (list "a" "b") + (list + (list :con-def "Left" (list (list :t-var "a"))) + (list :con-def "Right" (list (list :t-var "b"))))))) + +(hk-test + "data with no type parameters" + (hk-parse-top "data Bool = True | False") + (hk-prog + (list + :data + "Bool" + (list) + (list + (list :con-def "True" (list)) + (list :con-def "False" (list)))))) + +(hk-test + "recursive data type" + (hk-parse-top "data Tree a = Leaf | Node (Tree a) a (Tree a)") + (hk-prog + (list + :data + "Tree" + (list "a") + (list + (list :con-def "Leaf" (list)) + (list + :con-def + "Node" + (list + (list :t-app (list :t-con "Tree") (list :t-var "a")) + (list :t-var "a") + (list :t-app (list :t-con "Tree") (list :t-var "a")))))))) + +;; ── type synonyms ── +(hk-test + "simple type synonym" + (hk-parse-top "type Name = String") + (hk-prog + (list :type-syn "Name" (list) (list :t-con "String")))) + +(hk-test + "parameterised type synonym" + (hk-parse-top "type Pair a = (a, a)") + (hk-prog + (list + :type-syn + "Pair" + (list "a") + (list + :t-tuple + (list (list :t-var "a") (list :t-var "a")))))) + +;; ── newtype ── +(hk-test + "newtype" + (hk-parse-top "newtype Age = Age Int") + (hk-prog (list :newtype "Age" (list) "Age" (list :t-con "Int")))) + +(hk-test + "parameterised newtype" + (hk-parse-top "newtype Wrap a = Wrap a") + (hk-prog + (list :newtype "Wrap" (list "a") "Wrap" (list :t-var "a")))) + +;; ── fixity declarations ── +(hk-test + "infixl with precedence" + (hk-parse-top "infixl 5 +:, -:") + (hk-prog (list :fixity "l" 5 (list "+:" "-:")))) + +(hk-test + "infixr" + (hk-parse-top "infixr 9 .") + (hk-prog (list :fixity "r" 9 (list ".")))) + +(hk-test + "infix (non-assoc) default prec" + (hk-parse-top "infix ==") + (hk-prog (list :fixity "n" 9 (list "==")))) + +(hk-test + "fixity with backtick operator name" + (hk-parse-top "infixl 7 `div`") + (hk-prog (list :fixity "l" 7 (list "div")))) + +;; ── Several decls combined ── +(hk-test + "mixed: data + sig + fn + type" + (hk-parse-top "data Maybe a = Nothing | Just a\ntype Entry = Maybe Int\nf :: Entry -> Int\nf (Just x) = x\nf Nothing = 0") + (hk-prog + (list + :data + "Maybe" + (list "a") + (list + (list :con-def "Nothing" (list)) + (list :con-def "Just" (list (list :t-var "a"))))) + (list + :type-syn + "Entry" + (list) + (list :t-app (list :t-con "Maybe") (list :t-con "Int"))) + (list + :type-sig + (list "f") + (list :t-fun (list :t-con "Entry") (list :t-con "Int"))) + (list + :fun-clause + "f" + (list (list :p-con "Just" (list (list :p-var "x")))) + (list :var "x")) + (list + :fun-clause + "f" + (list (list :p-con "Nothing" (list))) + (list :int 0)))) + +{:fails hk-test-fails :pass hk-test-pass :fail hk-test-fail} diff --git a/plans/haskell-on-sx.md b/plans/haskell-on-sx.md index 6a3b92db..528a286e 100644 --- a/plans/haskell-on-sx.md +++ b/plans/haskell-on-sx.md @@ -60,7 +60,7 @@ Key mappings: - [x] Expressions: atoms, parens, tuples, lists, ranges, application, infix with full Haskell-98 precedence table, unary `-`, backtick operators, lambdas, `if`, `let` - [x] `case … of` and `do`-notation expressions (plus minimal patterns needed for arms/binds: var, wildcard, literal, 0-arity and applied constructor, tuple, list) - [x] Patterns — full: `as` patterns, nested, negative literal, `~` lazy, infix constructor (`:` / consym), extend lambdas/let with non-var patterns - - [ ] Top-level decls: function clauses, type signatures, `data`, `type`, `newtype`, fixity decls + - [x] Top-level decls: function clauses (simple — no guards/where yet), pattern bindings, multi-name type signatures, `data` with type vars and recursive constructors, `type` synonyms, `newtype`, fixity (`infix`/`infixl`/`infixr` with optional precedence, comma-separated ops, backtick names). Types: vars / constructors / application / `->` (right-assoc) / tuples / lists. `hk-parse-top` entry. - [ ] `where` clauses + guards - [ ] Module header + imports (stub) - [ ] List comprehensions + operator sections @@ -114,6 +114,31 @@ Key mappings: _Newest first._ +- **2026-04-24** — Phase 1: top-level decls. Refactored `hk-parse-expr` into a + `hk-parser tokens mode` with `:expr` / `:module` dispatch so the big lexical + state is shared (peek/advance/pat/expr helpers all reachable); added public + wrappers `hk-parse-expr`, `hk-parse-module`, and source-level entry + `hk-parse-top`. New type parser (`hk-parse-type` / `hk-parse-btype` / + `hk-parse-atype`): type variables (`:t-var`), type constructors (`:t-con`), + type application (`:t-app`, left-assoc), right-associative function arrow + (`:t-fun`), unit/tuples (`:t-tuple`), and lists (`:t-list`). New decl parser + (`hk-parse-decl` / `hk-parse-program`) producing a `(:program DECLS)` shell: + - `:type-sig NAMES TYPE` — comma-separated multi-name support + - `:fun-clause NAME APATS BODY` — patterns for args, body via existing expr + - `:pat-bind PAT BODY` — top-level pattern bindings like `(a, b) = pair` + - `:data NAME TVARS CONS` with `:con-def CNAME FIELDS` for nullary and + multi-arg constructors, including recursive references + - `:type-syn NAME TVARS TYPE`, `:newtype NAME TVARS CNAME FIELD` + - `:fixity ASSOC PREC OPS` — assoc one of `"l"`/`"r"`/`"n"`, default prec 9, + comma-separated operator names, including backtick-quoted varids. + Sig vs fun-clause disambiguated by a paren-balanced top-level scan for + `::` before the next `;`/`}` (`hk-has-top-dcolon?`). 24 new tests in + `lib/haskell/tests/parser-decls.sx` cover all decl forms, signatures with + application / tuples / lists / right-assoc arrows, nullary and recursive + data types, multi-clause functions, and a mixed program with data + type- + synonym + signature + two function clauses. Not yet: guards, where + clauses, module header, imports, deriving, contexts, GADTs. 162/162 green. + - **2026-04-24** — Phase 1: full patterns. Added `as` patterns (`name@apat` → `(:p-as NAME PAT)`), lazy patterns (`~apat` → `(:p-lazy PAT)`), negative literal patterns (`-N` / `-F` resolving