haskell: layout rule per §10.3 (+15 tests, 58/58)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled

This commit is contained in:
2026-04-24 17:05:35 +00:00
parent 30d76537d1
commit c07ff90f6b
6 changed files with 626 additions and 55 deletions

299
lib/haskell/layout.sx Normal file
View File

@@ -0,0 +1,299 @@
;; Haskell 98 layout algorithm (§10.3).
;;
;; Consumes the raw token stream produced by hk-tokenize and inserts
;; virtual braces / semicolons (types vlbrace / vrbrace / vsemi) based
;; on indentation. Newline tokens are consumed and stripped.
;;
;; (hk-layout (hk-tokenize src)) → tokens-with-virtual-layout
;; ── Pre-pass ──────────────────────────────────────────────────────
;;
;; Walks the raw token list and emits an augmented stream containing
;; two fresh pseudo-tokens:
;;
;; {:type "layout-open" :col N :keyword K}
;; At stream start (K = "<module>") unless the first real token is
;; `module` or `{`. Also immediately after every `let` / `where` /
;; `do` / `of` whose following token is NOT `{`. N is the column
;; of the token that follows.
;;
;; {:type "layout-indent" :col N}
;; Before any token whose line is strictly greater than the line
;; of the previously emitted real token, EXCEPT when that token
;; is already preceded by a layout-open (Haskell 98 §10.3 note 3).
;;
;; Raw newline tokens are dropped.
(define
hk-layout-keyword?
(fn
(tok)
(and
(= (get tok "type") "reserved")
(or
(= (get tok "value") "let")
(= (get tok "value") "where")
(= (get tok "value") "do")
(= (get tok "value") "of")))))
(define
hk-layout-pre
(fn
(tokens)
(let
((result (list))
(n (len tokens))
(i 0)
(prev-line -1)
(first-real-emitted false)
(suppress-next-indent false))
(define
hk-next-real-idx
(fn
(start)
(let
((j start))
(define
hk-nri-loop
(fn
()
(when
(and
(< j n)
(= (get (nth tokens j) "type") "newline"))
(do (set! j (+ j 1)) (hk-nri-loop)))))
(hk-nri-loop)
j)))
(define
hk-pre-step
(fn
()
(when
(< i n)
(let
((tok (nth tokens i)) (ty (get tok "type")))
(cond
((= ty "newline") (do (set! i (+ i 1)) (hk-pre-step)))
(:else
(do
(when
(not first-real-emitted)
(do
(set! first-real-emitted true)
(when
(not
(or
(and
(= ty "reserved")
(= (get tok "value") "module"))
(= ty "lbrace")))
(do
(append!
result
{:type "layout-open"
:col (get tok "col")
:keyword "<module>"
:line (get tok "line")})
(set! suppress-next-indent true)))))
(when
(and
(>= prev-line 0)
(> (get tok "line") prev-line)
(not suppress-next-indent))
(append!
result
{:type "layout-indent"
:col (get tok "col")
:line (get tok "line")}))
(set! suppress-next-indent false)
(set! prev-line (get tok "line"))
(append! result tok)
(when
(hk-layout-keyword? tok)
(let
((j (hk-next-real-idx (+ i 1))))
(cond
((>= j n)
(do
(append!
result
{:type "layout-open"
:col 0
:keyword (get tok "value")
:line (get tok "line")})
(set! suppress-next-indent true)))
((= (get (nth tokens j) "type") "lbrace") nil)
(:else
(do
(append!
result
{:type "layout-open"
:col (get (nth tokens j) "col")
:keyword (get tok "value")
:line (get tok "line")})
(set! suppress-next-indent true))))))
(set! i (+ i 1))
(hk-pre-step))))))))
(hk-pre-step)
result)))
;; ── Main pass: L algorithm ────────────────────────────────────────
;;
;; Stack is a list; the head is the top of stack. Each entry is
;; either the keyword :explicit (pushed by an explicit `{`) or a dict
;; {:col N :keyword K} pushed by a layout-open marker.
;;
;; Rules (following Haskell 98 §10.3):
;;
;; layout-open(n) vs stack:
;; empty or explicit top → push n; emit {
;; n > top-col → push n; emit {
;; otherwise → emit { }; retry as indent(n)
;;
;; layout-indent(n) vs stack:
;; empty or explicit top → drop
;; n == top-col → emit ;
;; n < top-col → emit }; pop; recurse
;; n > top-col → drop
;;
;; lbrace → push :explicit; emit {
;; rbrace → pop if :explicit; emit }
;; `in` with implicit let on top → emit }; pop; emit in
;; any other token → emit
;;
;; EOF: emit } for every remaining implicit context.
(define
hk-layout-L
(fn
(pre-toks)
(let
((result (list))
(stack (list))
(n (len pre-toks))
(i 0))
(define hk-emit (fn (t) (append! result t)))
(define
hk-indent-at
(fn
(col line)
(cond
((or (empty? stack) (= (first stack) :explicit)) nil)
(:else
(let
((top-col (get (first stack) "col")))
(cond
((= col top-col)
(hk-emit
{:type "vsemi" :value ";" :line line :col col}))
((< col top-col)
(do
(hk-emit
{:type "vrbrace" :value "}" :line line :col col})
(set! stack (rest stack))
(hk-indent-at col line)))
(:else nil)))))))
(define
hk-open-at
(fn
(col keyword line)
(cond
((and
(> col 0)
(or
(empty? stack)
(= (first stack) :explicit)
(> col (get (first stack) "col"))))
(do
(hk-emit
{:type "vlbrace" :value "{" :line line :col col})
(set! stack (cons {:col col :keyword keyword} stack))))
(:else
(do
(hk-emit
{:type "vlbrace" :value "{" :line line :col col})
(hk-emit
{:type "vrbrace" :value "}" :line line :col col})
(hk-indent-at col line))))))
(define
hk-close-eof
(fn
()
(when
(and
(not (empty? stack))
(not (= (first stack) :explicit)))
(do
(hk-emit {:type "vrbrace" :value "}" :line 0 :col 0})
(set! stack (rest stack))
(hk-close-eof)))))
(define
hk-layout-step
(fn
()
(when
(< i n)
(let
((tok (nth pre-toks i)) (ty (get tok "type")))
(cond
((= ty "eof")
(do
(hk-close-eof)
(hk-emit tok)
(set! i (+ i 1))
(hk-layout-step)))
((= ty "layout-open")
(do
(hk-open-at
(get tok "col")
(get tok "keyword")
(get tok "line"))
(set! i (+ i 1))
(hk-layout-step)))
((= ty "layout-indent")
(do
(hk-indent-at (get tok "col") (get tok "line"))
(set! i (+ i 1))
(hk-layout-step)))
((= ty "lbrace")
(do
(set! stack (cons :explicit stack))
(hk-emit tok)
(set! i (+ i 1))
(hk-layout-step)))
((= ty "rbrace")
(do
(when
(and
(not (empty? stack))
(= (first stack) :explicit))
(set! stack (rest stack)))
(hk-emit tok)
(set! i (+ i 1))
(hk-layout-step)))
((and
(= ty "reserved")
(= (get tok "value") "in")
(not (empty? stack))
(not (= (first stack) :explicit))
(= (get (first stack) "keyword") "let"))
(do
(hk-emit
{:type "vrbrace"
:value "}"
:line (get tok "line")
:col (get tok "col")})
(set! stack (rest stack))
(hk-emit tok)
(set! i (+ i 1))
(hk-layout-step)))
(:else
(do
(hk-emit tok)
(set! i (+ i 1))
(hk-layout-step))))))))
(hk-layout-step)
(hk-close-eof)
result)))
(define hk-layout (fn (tokens) (hk-layout-L (hk-layout-pre tokens))))

View File

@@ -46,6 +46,8 @@ for FILE in "${FILES[@]}"; do
cat > "$TMPFILE" <<EPOCHS
(epoch 1)
(load "lib/haskell/tokenizer.sx")
(load "lib/haskell/layout.sx")
(load "lib/haskell/testlib.sx")
(epoch 2)
(load "$FILE")
(epoch 3)
@@ -81,6 +83,8 @@ EPOCHS
cat > "$TMPFILE2" <<EPOCHS
(epoch 1)
(load "lib/haskell/tokenizer.sx")
(load "lib/haskell/layout.sx")
(load "lib/haskell/testlib.sx")
(epoch 2)
(load "$FILE")
(epoch 3)

58
lib/haskell/testlib.sx Normal file
View File

@@ -0,0 +1,58 @@
;; Shared test harness for Haskell-on-SX tests.
;; Each test file expects hk-test / hk-deep=? / counters to already be bound.
(define
hk-deep=?
(fn
(a b)
(cond
((= a b) true)
((and (dict? a) (dict? b))
(let
((ak (keys a)) (bk (keys b)))
(if
(not (= (len ak) (len bk)))
false
(every?
(fn
(k)
(and (has-key? b k) (hk-deep=? (get a k) (get b k))))
ak))))
((and (list? a) (list? b))
(if
(not (= (len a) (len b)))
false
(let
((i 0) (ok true))
(define
hk-de-loop
(fn
()
(when
(and ok (< i (len a)))
(do
(when
(not (hk-deep=? (nth a i) (nth b i)))
(set! ok false))
(set! i (+ i 1))
(hk-de-loop)))))
(hk-de-loop)
ok)))
(:else false))))
(define hk-test-pass 0)
(define hk-test-fail 0)
(define hk-test-fails (list))
(define
hk-test
(fn
(name actual expected)
(if
(hk-deep=? actual expected)
(set! hk-test-pass (+ hk-test-pass 1))
(do
(set! hk-test-fail (+ hk-test-fail 1))
(append!
hk-test-fails
{:actual actual :expected expected :name name})))))

245
lib/haskell/tests/layout.sx Normal file
View File

@@ -0,0 +1,245 @@
;; Haskell layout-rule tests. hk-tokenizer + hk-layout produce a
;; virtual-brace-annotated stream; these tests cover the algorithm
;; from Haskell 98 §10.3 plus the pragmatic let/in single-line rule.
;; Convenience — tokenize, run layout, strip eof, keep :type/:value.
(define
hk-lay
(fn
(src)
(map
(fn (tok) {:value (get tok "value") :type (get tok "type")})
(filter
(fn (tok) (not (= (get tok "type") "eof")))
(hk-layout (hk-tokenize src))))))
;; ── 1. Basics ──
(hk-test
"empty input produces empty module { }"
(hk-lay "")
(list
{:value "{" :type "vlbrace"}
{:value "}" :type "vrbrace"}))
(hk-test
"single token → module open+close"
(hk-lay "foo")
(list
{:value "{" :type "vlbrace"}
{:value "foo" :type "varid"}
{:value "}" :type "vrbrace"}))
(hk-test
"two top-level decls get vsemi between"
(hk-lay "foo = 1\nbar = 2")
(list
{:value "{" :type "vlbrace"}
{:value "foo" :type "varid"}
{:value "=" :type "reservedop"}
{:value 1 :type "integer"}
{:value ";" :type "vsemi"}
{:value "bar" :type "varid"}
{:value "=" :type "reservedop"}
{:value 2 :type "integer"}
{:value "}" :type "vrbrace"}))
;; ── 2. Layout keywords — do / let / where / of ──
(hk-test
"do block with two stmts"
(hk-lay "f = do\n x\n y")
(list
{:value "{" :type "vlbrace"}
{:value "f" :type "varid"}
{:value "=" :type "reservedop"}
{:value "do" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "x" :type "varid"}
{:value ";" :type "vsemi"}
{:value "y" :type "varid"}
{:value "}" :type "vrbrace"}
{:value "}" :type "vrbrace"}))
(hk-test
"single-line let ... in"
(hk-lay "let x = 1 in x")
(list
{:value "{" :type "vlbrace"}
{:value "let" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "x" :type "varid"}
{:value "=" :type "reservedop"}
{:value 1 :type "integer"}
{:value "}" :type "vrbrace"}
{:value "in" :type "reserved"}
{:value "x" :type "varid"}
{:value "}" :type "vrbrace"}))
(hk-test
"where block with two bindings"
(hk-lay "f = g\n where\n g = 1\n h = 2")
(list
{:value "{" :type "vlbrace"}
{:value "f" :type "varid"}
{:value "=" :type "reservedop"}
{:value "g" :type "varid"}
{:value "where" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "g" :type "varid"}
{:value "=" :type "reservedop"}
{:value 1 :type "integer"}
{:value ";" :type "vsemi"}
{:value "h" :type "varid"}
{:value "=" :type "reservedop"}
{:value 2 :type "integer"}
{:value "}" :type "vrbrace"}
{:value "}" :type "vrbrace"}))
(hk-test
"case … of with arms"
(hk-lay "f x = case x of\n Just y -> y\n Nothing -> 0")
(list
{:value "{" :type "vlbrace"}
{:value "f" :type "varid"}
{:value "x" :type "varid"}
{:value "=" :type "reservedop"}
{:value "case" :type "reserved"}
{:value "x" :type "varid"}
{:value "of" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "Just" :type "conid"}
{:value "y" :type "varid"}
{:value "->" :type "reservedop"}
{:value "y" :type "varid"}
{:value ";" :type "vsemi"}
{:value "Nothing" :type "conid"}
{:value "->" :type "reservedop"}
{:value 0 :type "integer"}
{:value "}" :type "vrbrace"}
{:value "}" :type "vrbrace"}))
;; ── 3. Explicit braces disable layout ──
(hk-test
"explicit braces — no implicit vlbrace/vsemi/vrbrace inside"
(hk-lay "do { x ; y }")
(list
{:value "{" :type "vlbrace"}
{:value "do" :type "reserved"}
{:value "{" :type "lbrace"}
{:value "x" :type "varid"}
{:value ";" :type "semi"}
{:value "y" :type "varid"}
{:value "}" :type "rbrace"}
{:value "}" :type "vrbrace"}))
;; ── 4. Dedent closes nested blocks ──
(hk-test
"dedent back to module level closes do block"
(hk-lay "f = do\n x\n y\ng = 2")
(list
{:value "{" :type "vlbrace"}
{:value "f" :type "varid"}
{:value "=" :type "reservedop"}
{:value "do" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "x" :type "varid"}
{:value ";" :type "vsemi"}
{:value "y" :type "varid"}
{:value "}" :type "vrbrace"}
{:value ";" :type "vsemi"}
{:value "g" :type "varid"}
{:value "=" :type "reservedop"}
{:value 2 :type "integer"}
{:value "}" :type "vrbrace"}))
(hk-test
"dedent closes inner let, emits vsemi at outer do level"
(hk-lay "main = do\n let x = 1\n print x")
(list
{:value "{" :type "vlbrace"}
{:value "main" :type "varid"}
{:value "=" :type "reservedop"}
{:value "do" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "let" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "x" :type "varid"}
{:value "=" :type "reservedop"}
{:value 1 :type "integer"}
{:value "}" :type "vrbrace"}
{:value ";" :type "vsemi"}
{:value "print" :type "varid"}
{:value "x" :type "varid"}
{:value "}" :type "vrbrace"}
{:value "}" :type "vrbrace"}))
;; ── 5. Module header skips outer implicit open ──
(hk-test
"module M where — only where opens a block"
(hk-lay "module M where\n f = 1")
(list
{:value "module" :type "reserved"}
{:value "M" :type "conid"}
{:value "where" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "f" :type "varid"}
{:value "=" :type "reservedop"}
{:value 1 :type "integer"}
{:value "}" :type "vrbrace"}))
;; ── 6. Newlines are stripped ──
(hk-test
"newline tokens do not appear in output"
(let
((toks (hk-layout (hk-tokenize "foo\nbar"))))
(every?
(fn (t) (not (= (get t "type") "newline")))
toks))
true)
;; ── 7. Continuation — deeper indent does NOT emit vsemi ──
(hk-test
"line continuation (deeper indent) just merges"
(hk-lay "foo = 1 +\n 2")
(list
{:value "{" :type "vlbrace"}
{:value "foo" :type "varid"}
{:value "=" :type "reservedop"}
{:value 1 :type "integer"}
{:value "+" :type "varsym"}
{:value 2 :type "integer"}
{:value "}" :type "vrbrace"}))
;; ── 8. Stack closing at EOF ──
(hk-test
"EOF inside nested do closes all implicit blocks"
(let
((toks (hk-lay "main = do\n do\n x")))
(let
((n (len toks)))
(list
(get (nth toks (- n 1)) "type")
(get (nth toks (- n 2)) "type")
(get (nth toks (- n 3)) "type"))))
(list "vrbrace" "vrbrace" "vrbrace"))
;; ── 9. Qualified-newline: x at deeper col than stack top does nothing ──
(hk-test
"mixed where + do"
(hk-lay "f = do\n x\n where\n x = 1")
(list
{:value "{" :type "vlbrace"}
{:value "f" :type "varid"}
{:value "=" :type "reservedop"}
{:value "do" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "x" :type "varid"}
{:value "}" :type "vrbrace"}
{:value "where" :type "reserved"}
{:value "{" :type "vlbrace"}
{:value "x" :type "varid"}
{:value "=" :type "reservedop"}
{:value 1 :type "integer"}
{:value "}" :type "vrbrace"}
{:value "}" :type "vrbrace"}))
{:fails hk-test-fails :pass hk-test-pass :fail hk-test-fail}

View File

@@ -3,60 +3,8 @@
;; Lightweight runner: each test checks actual vs expected with
;; structural (deep) equality and accumulates pass/fail counters.
;; Final value of this file is a summary dict with :pass :fail :fails.
(define
hk-deep=?
(fn
(a b)
(cond
((= a b) true)
((and (dict? a) (dict? b))
(let
((ak (keys a)) (bk (keys b)))
(if
(not (= (len ak) (len bk)))
false
(every?
(fn
(k)
(and (has-key? b k) (hk-deep=? (get a k) (get b k))))
ak))))
((and (list? a) (list? b))
(if
(not (= (len a) (len b)))
false
(let
((i 0) (ok true))
(define
hk-de-loop
(fn
()
(when
(and ok (< i (len a)))
(do
(when
(not (hk-deep=? (nth a i) (nth b i)))
(set! ok false))
(set! i (+ i 1))
(hk-de-loop)))))
(hk-de-loop)
ok)))
(:else false))))
(define hk-test-pass 0)
(define hk-test-fail 0)
(define hk-test-fails (list))
(define
hk-test
(fn
(name actual expected)
(if
(hk-deep=? actual expected)
(set! hk-test-pass (+ hk-test-pass 1))
(do
(set! hk-test-fail (+ hk-test-fail 1))
(append! hk-test-fails {:actual actual :expected expected :name name})))))
;; The hk-test / hk-deep=? helpers live in lib/haskell/testlib.sx
;; and are preloaded by lib/haskell/test.sh.
;; Convenience: tokenize and drop newline + eof tokens so tests focus
;; on meaningful content. Returns list of {:type :value} pairs.

View File

@@ -55,7 +55,7 @@ Key mappings:
### Phase 1 — tokenizer + parser + layout rule
- [x] Tokenizer: reserved words, qualified names, operators, numbers (int, float, Rational later), chars/strings, comments (`--` and `{-` nested)
- [ ] Layout algorithm: turn indentation into virtual `{`, `;`, `}` tokens per Haskell 98 §10.3
- [x] Layout algorithm: turn indentation into virtual `{`, `;`, `}` tokens per Haskell 98 §10.3
- [ ] Parser: modules, imports (stub), top-level decls, type sigs, function clauses with patterns + guards + where-clauses, expressions with operator precedence, lambdas, `let`, `if`, `case`, `do`, list comp, sections
- [ ] AST design modelled on GHC's HsSyn at a surface level
- [x] Unit tests in `lib/haskell/tests/parse.sx` (43 tokenizer tests, all green)
@@ -107,6 +107,23 @@ Key mappings:
_Newest first._
- **2026-04-24** — Phase 1: layout algorithm (`lib/haskell/layout.sx`, ~260 lines)
implementing Haskell 98 §10.3. Two-pass design: a pre-pass augments the raw
token stream with explicit `layout-open` / `layout-indent` markers (suppressing
`<n>` when `{n}` already applies, per note 3), then an L pass consumes the
augmented stream against a stack of implicit/explicit layout contexts and
emits `vlbrace` / `vsemi` / `vrbrace` tokens; newlines are dropped. Supports
the initial module-level implicit open (skipped when the first token is
`module` or `{`), the four layout keywords (`let`/`where`/`do`/`of`), explicit
braces disabling layout, dedent closing nested implicit blocks while also
emitting `vsemi` at the enclosing level, and the pragmatic single-line
`let … in` rule (emit `}` when `in` meets an implicit let). 15 new tests
in `lib/haskell/tests/layout.sx` cover module-start, do/let/where/case/of,
explicit braces, multi-level dedent, line continuation, and EOF close-down.
Shared test helpers moved to `lib/haskell/testlib.sx` so both test files
can share one `hk-test`. `test.sh` preloads tokenizer + layout + testlib.
58/58 green.
- **2026-04-24** — Phase 1: Haskell 98 tokenizer (`lib/haskell/tokenizer.sx`, 490 lines)
covering idents (lower/upper/qvarid/qconid), 23 reserved words, 11 reserved ops,
varsym/consym operator chains, integer/hex/octal/float literals incl. exponent