diff --git a/lib/haskell/parser.sx b/lib/haskell/parser.sx index fbbcb31f..a4160ded 100644 --- a/lib/haskell/parser.sx +++ b/lib/haskell/parser.sx @@ -220,7 +220,44 @@ ((= (get t "type") "lbracket") (hk-parse-list-lit)) (:else (hk-err "unexpected token in expression")))))) - ;; ── Parens / tuple / unit ──────────────────────────────── + ;; Returns {:name N :len L} if the current token begins an + ;; infix operator (varsym / consym / reservedop ":" / backtick), + ;; else nil. `len` is the number of tokens the operator occupies. + (define + hk-section-op-info + (fn + () + (let ((t (hk-peek))) + (cond + ((nil? t) nil) + ((= (get t "type") "varsym") + {:name (get t "value") :len 1}) + ((= (get t "type") "consym") + {:name (get t "value") :len 1}) + ((and + (= (get t "type") "reservedop") + (= (get t "value") ":")) + {:name ":" :len 1}) + ((= (get t "type") "backtick") + (let ((varid-t (hk-peek-at 1))) + (cond + ((and + (not (nil? varid-t)) + (= (get varid-t "type") "varid")) + {:name (get varid-t "value") :len 3}) + (:else nil)))) + (:else nil))))) + + ;; ── Parens / tuple / unit / operator sections ─────────── + ;; Forms recognised inside parens: + ;; () → unit : (:con "()") + ;; (op) → operator reference : (:var OP) + ;; (op e) → right section : (:sect-right OP E) (op ≠ "-") + ;; (e) → plain parens : unwrapped E + ;; (e1, … , en) → tuple : (:tuple ITEMS) + ;; (e op) → left section : (:sect-left OP E) + ;; `-` is excluded from right sections because `-e` always means + ;; `negate e`; `(-)` is still a valid operator reference. (define hk-parse-parens (fn @@ -230,27 +267,197 @@ ((hk-match? "rparen" nil) (do (hk-advance!) (list :con "()"))) (:else - (let - ((first-e (hk-parse-expr-inner)) - (items (list)) - (is-tuple false)) - (append! items first-e) - (define - hk-tup-loop - (fn - () - (when - (hk-match? "comma" nil) - (do - (hk-advance!) - (set! is-tuple true) - (append! items (hk-parse-expr-inner)) - (hk-tup-loop))))) - (hk-tup-loop) - (hk-expect! "rparen" nil) - (if is-tuple (list :tuple items) first-e)))))) + (let ((op-info (hk-section-op-info))) + (cond + ;; Operator reference / right section + ((and + (not (nil? op-info)) + (let + ((after + (hk-peek-at (get op-info "len")))) + (or + (and + (not (nil? after)) + (= (get after "type") "rparen")) + (not (= (get op-info "name") "-"))))) + (let + ((op-name (get op-info "name")) + (op-len (get op-info "len")) + (after + (hk-peek-at (get op-info "len")))) + (hk-consume-op!) + (cond + ((and + (not (nil? after)) + (= (get after "type") "rparen")) + (do (hk-advance!) (list :var op-name))) + (:else + (let ((expr-e (hk-parse-expr-inner))) + (hk-expect! "rparen" nil) + (list :sect-right op-name expr-e)))))) + (:else + (let + ((first-e (hk-parse-expr-inner)) + (items (list)) + (is-tuple false)) + (append! items first-e) + (define + hk-tup-loop + (fn + () + (when + (hk-match? "comma" nil) + (do + (hk-advance!) + (set! is-tuple true) + (append! items (hk-parse-expr-inner)) + (hk-tup-loop))))) + (hk-tup-loop) + (cond + ((hk-match? "rparen" nil) + (do + (hk-advance!) + (if + is-tuple + (list :tuple items) + first-e))) + (:else + (let + ((op-info2 (hk-section-op-info))) + (cond + ((and + (not (nil? op-info2)) + (not is-tuple) + (let + ((after2 + (hk-peek-at + (get op-info2 "len")))) + (and + (not (nil? after2)) + (= (get after2 "type") "rparen")))) + (let + ((op-name (get op-info2 "name"))) + (hk-consume-op!) + (hk-advance!) + (list :sect-left op-name first-e))) + (:else + (hk-err + "expected ')' after expression")))))))))))))) - ;; ── List literal / range ───────────────────────────────── + ;; ── List comprehension qualifiers ────────────────────── + ;; (:list-comp E QUALS) where each qualifier is one of: + ;; (:q-gen PAT E) — `pat <- expr` + ;; (:q-guard E) — bare boolean expression + ;; (:q-let DECLS) — `let decls` + (define + hk-comp-qual-is-gen? + (fn + () + (let + ((j pos) (depth 0) (found false) (done false)) + (define + hk-qsc-loop + (fn + () + (when + (and (not done) (< j n)) + (let + ((t (nth toks j)) (ty (get t "type"))) + (cond + ((and + (= depth 0) + (or + (= ty "comma") + (= ty "rbracket"))) + (set! done true)) + ((and + (= depth 0) + (= ty "reservedop") + (= (get t "value") "<-")) + (do (set! found true) (set! done true))) + ((or + (= ty "lparen") + (= ty "lbracket") + (= ty "lbrace") + (= ty "vlbrace")) + (set! depth (+ depth 1))) + ((or + (= ty "rparen") + (= ty "rbrace") + (= ty "vrbrace")) + (set! depth (- depth 1))) + (:else nil)) + (set! j (+ j 1)) + (hk-qsc-loop))))) + (hk-qsc-loop) + found))) + + (define + hk-parse-comp-let + (fn + () + (hk-expect! "reserved" "let") + (let ((explicit (hk-match? "lbrace" nil))) + (if + explicit + (hk-advance!) + (hk-expect! "vlbrace" nil)) + (let + ((binds (list))) + (when + (not + (if + explicit + (hk-match? "rbrace" nil) + (hk-match? "vrbrace" nil))) + (do + (append! binds (hk-parse-bind)) + (define + hk-cl-loop + (fn + () + (when + (or + (hk-match? "semi" nil) + (hk-match? "vsemi" nil)) + (do + (hk-advance!) + (when + (not + (if + explicit + (hk-match? "rbrace" nil) + (hk-match? "vrbrace" nil))) + (append! binds (hk-parse-bind))) + (hk-cl-loop))))) + (hk-cl-loop))) + (cond + (explicit (hk-expect! "rbrace" nil)) + ((hk-match? "vrbrace" nil) (hk-advance!)) + ;; In a single-line comprehension, `]` or `,` + ;; terminates the qualifier before layout's implicit + ;; vrbrace arrives — leave them for the outer parser. + ((or + (hk-match? "rbracket" nil) + (hk-match? "comma" nil)) + nil) + (:else + (hk-err "expected end of let block in comprehension"))) + (list :q-let binds))))) + + (define + hk-parse-qual + (fn + () + (cond + ((hk-match? "reserved" "let") (hk-parse-comp-let)) + ((hk-comp-qual-is-gen?) + (let ((pat (hk-parse-pat))) + (hk-expect! "reservedop" "<-") + (list :q-gen pat (hk-parse-expr-inner)))) + (:else (list :q-guard (hk-parse-expr-inner)))))) + + ;; ── List literal / range / comprehension ─────────────── (define hk-parse-list-lit (fn @@ -270,6 +477,24 @@ ((end-e (hk-parse-expr-inner))) (hk-expect! "rbracket" nil) (list :range first-e end-e)))) + ((hk-match? "reservedop" "|") + (do + (hk-advance!) + (let ((quals (list))) + (append! quals (hk-parse-qual)) + (define + hk-lc-loop + (fn + () + (when + (hk-match? "comma" nil) + (do + (hk-advance!) + (append! quals (hk-parse-qual)) + (hk-lc-loop))))) + (hk-lc-loop) + (hk-expect! "rbracket" nil) + (list :list-comp first-e quals)))) ((hk-match? "comma" nil) (do (hk-advance!) @@ -1011,31 +1236,44 @@ (let ((op-tok (hk-peek))) (let - ((op-name + ((op-len + (if + (= (get op-tok "type") "backtick") + 3 + 1)) + (op-name (if (= (get op-tok "type") "backtick") (get (hk-peek-at 1) "value") (get op-tok "value")))) (let - ((info (hk-op-info op-name))) - (when - (>= (get info "prec") min-prec) - (do - (hk-consume-op!) - (let - ((next-min - (cond - ((= (get info "assoc") "left") - (+ (get info "prec") 1)) - ((= (get info "assoc") "right") - (get info "prec")) - (:else (+ (get info "prec") 1))))) + ((after-op (hk-peek-at op-len)) + (info (hk-op-info op-name))) + (cond + ;; Bail on `op )` — let the paren parser claim + ;; it as a left section (e op). + ((and + (not (nil? after-op)) + (= (get after-op "type") "rparen")) + nil) + ((>= (get info "prec") min-prec) + (do + (hk-consume-op!) (let - ((right (hk-parse-infix next-min))) - (set! - left - (list :op op-name left right)) - (hk-inf-loop))))))))))) + ((next-min + (cond + ((= (get info "assoc") "left") + (+ (get info "prec") 1)) + ((= (get info "assoc") "right") + (get info "prec")) + (:else (+ (get info "prec") 1))))) + (let + ((right (hk-parse-infix next-min))) + (set! + left + (list :op op-name left right)) + (hk-inf-loop))))) + (:else nil)))))))) (hk-inf-loop) left))) diff --git a/lib/haskell/tests/parser-sect-comp.sx b/lib/haskell/tests/parser-sect-comp.sx new file mode 100644 index 00000000..90cafeab --- /dev/null +++ b/lib/haskell/tests/parser-sect-comp.sx @@ -0,0 +1,191 @@ +;; Operator sections and list comprehensions. + +;; ── Operator references (unchanged expr shape) ── +(hk-test + "op as value (+)" + (hk-parse "(+)") + (list :var "+")) + +(hk-test + "op as value (-)" + (hk-parse "(-)") + (list :var "-")) + +(hk-test + "op as value (:)" + (hk-parse "(:)") + (list :var ":")) + +(hk-test + "backtick op as value" + (hk-parse "(`div`)") + (list :var "div")) + +;; ── Right sections (op expr) ── +(hk-test + "right section (+ 5)" + (hk-parse "(+ 5)") + (list :sect-right "+" (list :int 5))) + +(hk-test + "right section (* x)" + (hk-parse "(* x)") + (list :sect-right "*" (list :var "x"))) + +(hk-test + "right section with backtick op" + (hk-parse "(`div` 2)") + (list :sect-right "div" (list :int 2))) + +;; `-` is unary in expr position — (- 5) is negation, not a right section +(hk-test + "(- 5) is negation, not a section" + (hk-parse "(- 5)") + (list :neg (list :int 5))) + +;; ── Left sections (expr op) ── +(hk-test + "left section (5 +)" + (hk-parse "(5 +)") + (list :sect-left "+" (list :int 5))) + +(hk-test + "left section with backtick" + (hk-parse "(x `mod`)") + (list :sect-left "mod" (list :var "x"))) + +(hk-test + "left section with cons (x :)" + (hk-parse "(x :)") + (list :sect-left ":" (list :var "x"))) + +;; ── Mixed / nesting ── +(hk-test + "map (+ 1) xs" + (hk-parse "map (+ 1) xs") + (list + :app + (list + :app + (list :var "map") + (list :sect-right "+" (list :int 1))) + (list :var "xs"))) + +(hk-test + "filter (< 0) xs" + (hk-parse "filter (< 0) xs") + (list + :app + (list + :app + (list :var "filter") + (list :sect-right "<" (list :int 0))) + (list :var "xs"))) + +;; ── Plain parens and tuples still work ── +(hk-test + "plain parens unwrap" + (hk-parse "(1 + 2)") + (list :op "+" (list :int 1) (list :int 2))) + +(hk-test + "tuple still parses" + (hk-parse "(a, b, c)") + (list + :tuple + (list (list :var "a") (list :var "b") (list :var "c")))) + +;; ── List comprehensions ── +(hk-test + "simple list comprehension" + (hk-parse "[x | x <- xs]") + (list + :list-comp + (list :var "x") + (list + (list :q-gen (list :p-var "x") (list :var "xs"))))) + +(hk-test + "comprehension with filter" + (hk-parse "[x * 2 | x <- xs, x > 0]") + (list + :list-comp + (list :op "*" (list :var "x") (list :int 2)) + (list + (list :q-gen (list :p-var "x") (list :var "xs")) + (list + :q-guard + (list :op ">" (list :var "x") (list :int 0)))))) + +(hk-test + "comprehension with let" + (hk-parse "[y | x <- xs, let y = x + 1]") + (list + :list-comp + (list :var "y") + (list + (list :q-gen (list :p-var "x") (list :var "xs")) + (list + :q-let + (list + (list + :bind + (list :p-var "y") + (list :op "+" (list :var "x") (list :int 1)))))))) + +(hk-test + "nested generators" + (hk-parse "[(x, y) | x <- xs, y <- ys]") + (list + :list-comp + (list :tuple (list (list :var "x") (list :var "y"))) + (list + (list :q-gen (list :p-var "x") (list :var "xs")) + (list :q-gen (list :p-var "y") (list :var "ys"))))) + +(hk-test + "comprehension with constructor pattern" + (hk-parse "[v | Just v <- xs]") + (list + :list-comp + (list :var "v") + (list + (list + :q-gen + (list :p-con "Just" (list (list :p-var "v"))) + (list :var "xs"))))) + +(hk-test + "comprehension with tuple pattern" + (hk-parse "[x + y | (x, y) <- pairs]") + (list + :list-comp + (list :op "+" (list :var "x") (list :var "y")) + (list + (list + :q-gen + (list + :p-tuple + (list (list :p-var "x") (list :p-var "y"))) + (list :var "pairs"))))) + +(hk-test + "combination: generator, let, guard" + (hk-parse "[z | x <- xs, let z = x * 2, z > 10]") + (list + :list-comp + (list :var "z") + (list + (list :q-gen (list :p-var "x") (list :var "xs")) + (list + :q-let + (list + (list + :bind + (list :p-var "z") + (list :op "*" (list :var "x") (list :int 2))))) + (list + :q-guard + (list :op ">" (list :var "z") (list :int 10)))))) + +{:fails hk-test-fails :pass hk-test-pass :fail hk-test-fail} diff --git a/plans/haskell-on-sx.md b/plans/haskell-on-sx.md index 9f611647..63f88c06 100644 --- a/plans/haskell-on-sx.md +++ b/plans/haskell-on-sx.md @@ -63,8 +63,8 @@ Key mappings: - [x] Top-level decls: function clauses (simple — no guards/where yet), pattern bindings, multi-name type signatures, `data` with type vars and recursive constructors, `type` synonyms, `newtype`, fixity (`infix`/`infixl`/`infixr` with optional precedence, comma-separated ops, backtick names). Types: vars / constructors / application / `->` (right-assoc) / tuples / lists. `hk-parse-top` entry. - [x] `where` clauses + guards (on fun-clauses, case alts, and let/do-let bindings — with the let funclause shorthand `let f x = …` now supported) - [x] Module header + imports — `module NAME [exports] where …`, qualified/as/hiding/explicit imports, operator exports, `module Foo` exports, dotted names, headerless-with-imports - - [ ] List comprehensions + operator sections -- [ ] AST design modelled on GHC's HsSyn at a surface level + - [x] List comprehensions + operator sections — `(op)` / `(op e)` / `(e op)` (excluding `-` from right sections), `[e | q1, q2, …]` with `q-gen` / `q-guard` / `q-let` qualifiers +- [x] AST design modelled on GHC's HsSyn at a surface level — keyword-tagged lists cover modules/imports/decls/types/patterns/expressions; see parser.sx docstrings for the full node catalogue - [x] Unit tests in `lib/haskell/tests/parse.sx` (43 tokenizer tests, all green) ### Phase 2 — desugar + eager-ish eval + ADTs (untyped) @@ -114,6 +114,38 @@ Key mappings: _Newest first._ +- **2026-04-24** — Phase 1 parser is now complete. This iteration adds + operator sections and list comprehensions, the two remaining + aexp-level forms, plus ticks the “AST design” item (the keyword- + tagged list shape has accumulated a full HsSyn-level surface). + Changes: + - `hk-parse-infix` now bails on `op )` without consuming the op, so + the paren parser can claim it as a left section. + - `hk-parse-parens` rewritten to recognise five new forms: + `()` (unit), `(op)` → `(:var OP)`, `(op e)` → `(:sect-right OP E)` + (excluded for `-` so that `(- 5)` stays `(:neg 5)`), `(e op)` → + `(:sect-left OP E)`, plus regular parens and tuples. Works for + varsym, consym, reservedop `:`, and backtick-quoted varids. + - `hk-section-op-info` inspects the current token and returns a + `{:name :len}` dict, so the same logic handles 1-token ops and + 3-token backtick ops uniformly. + - `hk-parse-list-lit` now recognises a `|` after the first element + and dispatches to `hk-parse-qual` per qualifier (comma-separated), + producing `(:list-comp EXPR QUALS)`. Qualifiers are: + `(:q-gen PAT EXPR)` when a paren-balanced lookahead + (`hk-comp-qual-is-gen?`) finds `<-` before the next `,`/`]`, + `(:q-let BINDS)` for `let …`, and `(:q-guard EXPR)` otherwise. + - `hk-parse-comp-let` accepts `]` or `,` as an implicit block close + (single-line comprehensions never see layout's vrbrace before the + qualifier terminator arrives); explicit `{ }` still closes + strictly. + 22 new tests in `lib/haskell/tests/parser-sect-comp.sx` cover + op-references (inc. `(-)`, `(:)`, backtick), right sections (inc. + backtick), left sections, the `(- 5)` → `:neg` corner, plain parens + and tuples, six comprehension shapes (simple, filter, let, + nested-generators, constructor pattern bind, tuple pattern bind, + and a three-qualifier mix). 211/211 green. + - **2026-04-24** — Phase 1: module header + imports. Added `hk-parse-module-header`, `hk-parse-import`, plus shared helpers for import/export entity lists (`hk-parse-ent`, `hk-parse-ent-member`,