diff --git a/lib/haskell/layout.sx b/lib/haskell/layout.sx new file mode 100644 index 00000000..acef31ec --- /dev/null +++ b/lib/haskell/layout.sx @@ -0,0 +1,299 @@ +;; Haskell 98 layout algorithm (§10.3). +;; +;; Consumes the raw token stream produced by hk-tokenize and inserts +;; virtual braces / semicolons (types vlbrace / vrbrace / vsemi) based +;; on indentation. Newline tokens are consumed and stripped. +;; +;; (hk-layout (hk-tokenize src)) → tokens-with-virtual-layout + +;; ── Pre-pass ────────────────────────────────────────────────────── +;; +;; Walks the raw token list and emits an augmented stream containing +;; two fresh pseudo-tokens: +;; +;; {:type "layout-open" :col N :keyword K} +;; At stream start (K = "") unless the first real token is +;; `module` or `{`. Also immediately after every `let` / `where` / +;; `do` / `of` whose following token is NOT `{`. N is the column +;; of the token that follows. +;; +;; {:type "layout-indent" :col N} +;; Before any token whose line is strictly greater than the line +;; of the previously emitted real token, EXCEPT when that token +;; is already preceded by a layout-open (Haskell 98 §10.3 note 3). +;; +;; Raw newline tokens are dropped. + +(define + hk-layout-keyword? + (fn + (tok) + (and + (= (get tok "type") "reserved") + (or + (= (get tok "value") "let") + (= (get tok "value") "where") + (= (get tok "value") "do") + (= (get tok "value") "of"))))) + +(define + hk-layout-pre + (fn + (tokens) + (let + ((result (list)) + (n (len tokens)) + (i 0) + (prev-line -1) + (first-real-emitted false) + (suppress-next-indent false)) + (define + hk-next-real-idx + (fn + (start) + (let + ((j start)) + (define + hk-nri-loop + (fn + () + (when + (and + (< j n) + (= (get (nth tokens j) "type") "newline")) + (do (set! j (+ j 1)) (hk-nri-loop))))) + (hk-nri-loop) + j))) + (define + hk-pre-step + (fn + () + (when + (< i n) + (let + ((tok (nth tokens i)) (ty (get tok "type"))) + (cond + ((= ty "newline") (do (set! i (+ i 1)) (hk-pre-step))) + (:else + (do + (when + (not first-real-emitted) + (do + (set! first-real-emitted true) + (when + (not + (or + (and + (= ty "reserved") + (= (get tok "value") "module")) + (= ty "lbrace"))) + (do + (append! + result + {:type "layout-open" + :col (get tok "col") + :keyword "" + :line (get tok "line")}) + (set! suppress-next-indent true))))) + (when + (and + (>= prev-line 0) + (> (get tok "line") prev-line) + (not suppress-next-indent)) + (append! + result + {:type "layout-indent" + :col (get tok "col") + :line (get tok "line")})) + (set! suppress-next-indent false) + (set! prev-line (get tok "line")) + (append! result tok) + (when + (hk-layout-keyword? tok) + (let + ((j (hk-next-real-idx (+ i 1)))) + (cond + ((>= j n) + (do + (append! + result + {:type "layout-open" + :col 0 + :keyword (get tok "value") + :line (get tok "line")}) + (set! suppress-next-indent true))) + ((= (get (nth tokens j) "type") "lbrace") nil) + (:else + (do + (append! + result + {:type "layout-open" + :col (get (nth tokens j) "col") + :keyword (get tok "value") + :line (get tok "line")}) + (set! suppress-next-indent true)))))) + (set! i (+ i 1)) + (hk-pre-step)))))))) + (hk-pre-step) + result))) + +;; ── Main pass: L algorithm ──────────────────────────────────────── +;; +;; Stack is a list; the head is the top of stack. Each entry is +;; either the keyword :explicit (pushed by an explicit `{`) or a dict +;; {:col N :keyword K} pushed by a layout-open marker. +;; +;; Rules (following Haskell 98 §10.3): +;; +;; layout-open(n) vs stack: +;; empty or explicit top → push n; emit { +;; n > top-col → push n; emit { +;; otherwise → emit { }; retry as indent(n) +;; +;; layout-indent(n) vs stack: +;; empty or explicit top → drop +;; n == top-col → emit ; +;; n < top-col → emit }; pop; recurse +;; n > top-col → drop +;; +;; lbrace → push :explicit; emit { +;; rbrace → pop if :explicit; emit } +;; `in` with implicit let on top → emit }; pop; emit in +;; any other token → emit +;; +;; EOF: emit } for every remaining implicit context. + +(define + hk-layout-L + (fn + (pre-toks) + (let + ((result (list)) + (stack (list)) + (n (len pre-toks)) + (i 0)) + (define hk-emit (fn (t) (append! result t))) + (define + hk-indent-at + (fn + (col line) + (cond + ((or (empty? stack) (= (first stack) :explicit)) nil) + (:else + (let + ((top-col (get (first stack) "col"))) + (cond + ((= col top-col) + (hk-emit + {:type "vsemi" :value ";" :line line :col col})) + ((< col top-col) + (do + (hk-emit + {:type "vrbrace" :value "}" :line line :col col}) + (set! stack (rest stack)) + (hk-indent-at col line))) + (:else nil))))))) + (define + hk-open-at + (fn + (col keyword line) + (cond + ((and + (> col 0) + (or + (empty? stack) + (= (first stack) :explicit) + (> col (get (first stack) "col")))) + (do + (hk-emit + {:type "vlbrace" :value "{" :line line :col col}) + (set! stack (cons {:col col :keyword keyword} stack)))) + (:else + (do + (hk-emit + {:type "vlbrace" :value "{" :line line :col col}) + (hk-emit + {:type "vrbrace" :value "}" :line line :col col}) + (hk-indent-at col line)))))) + (define + hk-close-eof + (fn + () + (when + (and + (not (empty? stack)) + (not (= (first stack) :explicit))) + (do + (hk-emit {:type "vrbrace" :value "}" :line 0 :col 0}) + (set! stack (rest stack)) + (hk-close-eof))))) + (define + hk-layout-step + (fn + () + (when + (< i n) + (let + ((tok (nth pre-toks i)) (ty (get tok "type"))) + (cond + ((= ty "eof") + (do + (hk-close-eof) + (hk-emit tok) + (set! i (+ i 1)) + (hk-layout-step))) + ((= ty "layout-open") + (do + (hk-open-at + (get tok "col") + (get tok "keyword") + (get tok "line")) + (set! i (+ i 1)) + (hk-layout-step))) + ((= ty "layout-indent") + (do + (hk-indent-at (get tok "col") (get tok "line")) + (set! i (+ i 1)) + (hk-layout-step))) + ((= ty "lbrace") + (do + (set! stack (cons :explicit stack)) + (hk-emit tok) + (set! i (+ i 1)) + (hk-layout-step))) + ((= ty "rbrace") + (do + (when + (and + (not (empty? stack)) + (= (first stack) :explicit)) + (set! stack (rest stack))) + (hk-emit tok) + (set! i (+ i 1)) + (hk-layout-step))) + ((and + (= ty "reserved") + (= (get tok "value") "in") + (not (empty? stack)) + (not (= (first stack) :explicit)) + (= (get (first stack) "keyword") "let")) + (do + (hk-emit + {:type "vrbrace" + :value "}" + :line (get tok "line") + :col (get tok "col")}) + (set! stack (rest stack)) + (hk-emit tok) + (set! i (+ i 1)) + (hk-layout-step))) + (:else + (do + (hk-emit tok) + (set! i (+ i 1)) + (hk-layout-step)))))))) + (hk-layout-step) + (hk-close-eof) + result))) + +(define hk-layout (fn (tokens) (hk-layout-L (hk-layout-pre tokens)))) diff --git a/lib/haskell/test.sh b/lib/haskell/test.sh index 892194d4..2af17416 100755 --- a/lib/haskell/test.sh +++ b/lib/haskell/test.sh @@ -46,6 +46,8 @@ for FILE in "${FILES[@]}"; do cat > "$TMPFILE" < "$TMPFILE2" < y\n Nothing -> 0") + (list + {:value "{" :type "vlbrace"} + {:value "f" :type "varid"} + {:value "x" :type "varid"} + {:value "=" :type "reservedop"} + {:value "case" :type "reserved"} + {:value "x" :type "varid"} + {:value "of" :type "reserved"} + {:value "{" :type "vlbrace"} + {:value "Just" :type "conid"} + {:value "y" :type "varid"} + {:value "->" :type "reservedop"} + {:value "y" :type "varid"} + {:value ";" :type "vsemi"} + {:value "Nothing" :type "conid"} + {:value "->" :type "reservedop"} + {:value 0 :type "integer"} + {:value "}" :type "vrbrace"} + {:value "}" :type "vrbrace"})) + +;; ── 3. Explicit braces disable layout ── +(hk-test + "explicit braces — no implicit vlbrace/vsemi/vrbrace inside" + (hk-lay "do { x ; y }") + (list + {:value "{" :type "vlbrace"} + {:value "do" :type "reserved"} + {:value "{" :type "lbrace"} + {:value "x" :type "varid"} + {:value ";" :type "semi"} + {:value "y" :type "varid"} + {:value "}" :type "rbrace"} + {:value "}" :type "vrbrace"})) + +;; ── 4. Dedent closes nested blocks ── +(hk-test + "dedent back to module level closes do block" + (hk-lay "f = do\n x\n y\ng = 2") + (list + {:value "{" :type "vlbrace"} + {:value "f" :type "varid"} + {:value "=" :type "reservedop"} + {:value "do" :type "reserved"} + {:value "{" :type "vlbrace"} + {:value "x" :type "varid"} + {:value ";" :type "vsemi"} + {:value "y" :type "varid"} + {:value "}" :type "vrbrace"} + {:value ";" :type "vsemi"} + {:value "g" :type "varid"} + {:value "=" :type "reservedop"} + {:value 2 :type "integer"} + {:value "}" :type "vrbrace"})) + +(hk-test + "dedent closes inner let, emits vsemi at outer do level" + (hk-lay "main = do\n let x = 1\n print x") + (list + {:value "{" :type "vlbrace"} + {:value "main" :type "varid"} + {:value "=" :type "reservedop"} + {:value "do" :type "reserved"} + {:value "{" :type "vlbrace"} + {:value "let" :type "reserved"} + {:value "{" :type "vlbrace"} + {:value "x" :type "varid"} + {:value "=" :type "reservedop"} + {:value 1 :type "integer"} + {:value "}" :type "vrbrace"} + {:value ";" :type "vsemi"} + {:value "print" :type "varid"} + {:value "x" :type "varid"} + {:value "}" :type "vrbrace"} + {:value "}" :type "vrbrace"})) + +;; ── 5. Module header skips outer implicit open ── +(hk-test + "module M where — only where opens a block" + (hk-lay "module M where\n f = 1") + (list + {:value "module" :type "reserved"} + {:value "M" :type "conid"} + {:value "where" :type "reserved"} + {:value "{" :type "vlbrace"} + {:value "f" :type "varid"} + {:value "=" :type "reservedop"} + {:value 1 :type "integer"} + {:value "}" :type "vrbrace"})) + +;; ── 6. Newlines are stripped ── +(hk-test + "newline tokens do not appear in output" + (let + ((toks (hk-layout (hk-tokenize "foo\nbar")))) + (every? + (fn (t) (not (= (get t "type") "newline"))) + toks)) + true) + +;; ── 7. Continuation — deeper indent does NOT emit vsemi ── +(hk-test + "line continuation (deeper indent) just merges" + (hk-lay "foo = 1 +\n 2") + (list + {:value "{" :type "vlbrace"} + {:value "foo" :type "varid"} + {:value "=" :type "reservedop"} + {:value 1 :type "integer"} + {:value "+" :type "varsym"} + {:value 2 :type "integer"} + {:value "}" :type "vrbrace"})) + +;; ── 8. Stack closing at EOF ── +(hk-test + "EOF inside nested do closes all implicit blocks" + (let + ((toks (hk-lay "main = do\n do\n x"))) + (let + ((n (len toks))) + (list + (get (nth toks (- n 1)) "type") + (get (nth toks (- n 2)) "type") + (get (nth toks (- n 3)) "type")))) + (list "vrbrace" "vrbrace" "vrbrace")) + +;; ── 9. Qualified-newline: x at deeper col than stack top does nothing ── +(hk-test + "mixed where + do" + (hk-lay "f = do\n x\n where\n x = 1") + (list + {:value "{" :type "vlbrace"} + {:value "f" :type "varid"} + {:value "=" :type "reservedop"} + {:value "do" :type "reserved"} + {:value "{" :type "vlbrace"} + {:value "x" :type "varid"} + {:value "}" :type "vrbrace"} + {:value "where" :type "reserved"} + {:value "{" :type "vlbrace"} + {:value "x" :type "varid"} + {:value "=" :type "reservedop"} + {:value 1 :type "integer"} + {:value "}" :type "vrbrace"} + {:value "}" :type "vrbrace"})) + +{:fails hk-test-fails :pass hk-test-pass :fail hk-test-fail} diff --git a/lib/haskell/tests/parse.sx b/lib/haskell/tests/parse.sx index 7b9c9da1..4f4df46f 100644 --- a/lib/haskell/tests/parse.sx +++ b/lib/haskell/tests/parse.sx @@ -3,60 +3,8 @@ ;; Lightweight runner: each test checks actual vs expected with ;; structural (deep) equality and accumulates pass/fail counters. ;; Final value of this file is a summary dict with :pass :fail :fails. - -(define - hk-deep=? - (fn - (a b) - (cond - ((= a b) true) - ((and (dict? a) (dict? b)) - (let - ((ak (keys a)) (bk (keys b))) - (if - (not (= (len ak) (len bk))) - false - (every? - (fn - (k) - (and (has-key? b k) (hk-deep=? (get a k) (get b k)))) - ak)))) - ((and (list? a) (list? b)) - (if - (not (= (len a) (len b))) - false - (let - ((i 0) (ok true)) - (define - hk-de-loop - (fn - () - (when - (and ok (< i (len a))) - (do - (when - (not (hk-deep=? (nth a i) (nth b i))) - (set! ok false)) - (set! i (+ i 1)) - (hk-de-loop))))) - (hk-de-loop) - ok))) - (:else false)))) - -(define hk-test-pass 0) -(define hk-test-fail 0) -(define hk-test-fails (list)) - -(define - hk-test - (fn - (name actual expected) - (if - (hk-deep=? actual expected) - (set! hk-test-pass (+ hk-test-pass 1)) - (do - (set! hk-test-fail (+ hk-test-fail 1)) - (append! hk-test-fails {:actual actual :expected expected :name name}))))) +;; The hk-test / hk-deep=? helpers live in lib/haskell/testlib.sx +;; and are preloaded by lib/haskell/test.sh. ;; Convenience: tokenize and drop newline + eof tokens so tests focus ;; on meaningful content. Returns list of {:type :value} pairs. diff --git a/plans/haskell-on-sx.md b/plans/haskell-on-sx.md index f76920fd..b87ad274 100644 --- a/plans/haskell-on-sx.md +++ b/plans/haskell-on-sx.md @@ -55,7 +55,7 @@ Key mappings: ### Phase 1 — tokenizer + parser + layout rule - [x] Tokenizer: reserved words, qualified names, operators, numbers (int, float, Rational later), chars/strings, comments (`--` and `{-` nested) -- [ ] Layout algorithm: turn indentation into virtual `{`, `;`, `}` tokens per Haskell 98 §10.3 +- [x] Layout algorithm: turn indentation into virtual `{`, `;`, `}` tokens per Haskell 98 §10.3 - [ ] Parser: modules, imports (stub), top-level decls, type sigs, function clauses with patterns + guards + where-clauses, expressions with operator precedence, lambdas, `let`, `if`, `case`, `do`, list comp, sections - [ ] AST design modelled on GHC's HsSyn at a surface level - [x] Unit tests in `lib/haskell/tests/parse.sx` (43 tokenizer tests, all green) @@ -107,6 +107,23 @@ Key mappings: _Newest first._ +- **2026-04-24** — Phase 1: layout algorithm (`lib/haskell/layout.sx`, ~260 lines) + implementing Haskell 98 §10.3. Two-pass design: a pre-pass augments the raw + token stream with explicit `layout-open` / `layout-indent` markers (suppressing + `` when `{n}` already applies, per note 3), then an L pass consumes the + augmented stream against a stack of implicit/explicit layout contexts and + emits `vlbrace` / `vsemi` / `vrbrace` tokens; newlines are dropped. Supports + the initial module-level implicit open (skipped when the first token is + `module` or `{`), the four layout keywords (`let`/`where`/`do`/`of`), explicit + braces disabling layout, dedent closing nested implicit blocks while also + emitting `vsemi` at the enclosing level, and the pragmatic single-line + `let … in` rule (emit `}` when `in` meets an implicit let). 15 new tests + in `lib/haskell/tests/layout.sx` cover module-start, do/let/where/case/of, + explicit braces, multi-level dedent, line continuation, and EOF close-down. + Shared test helpers moved to `lib/haskell/testlib.sx` so both test files + can share one `hk-test`. `test.sh` preloads tokenizer + layout + testlib. + 58/58 green. + - **2026-04-24** — Phase 1: Haskell 98 tokenizer (`lib/haskell/tokenizer.sx`, 490 lines) covering idents (lower/upper/qvarid/qconid), 23 reserved words, 11 reserved ops, varsym/consym operator chains, integer/hex/octal/float literals incl. exponent