GUEST: step 7 — lib/guest/layout.sx off-side / layout-sensitive lexer
Configurable layout pass that inserts virtual open / close / separator
tokens based on indentation. Supports both styles the brief calls out:
• Haskell-flavour: layout opens AFTER a reserved keyword
(let/where/do/of) and resolves to the next token's column. Module
prelude wraps the whole input in an implicit block. Explicit `{`
after the keyword suppresses virtual layout.
• Python-flavour: layout opens via an :open-trailing-fn predicate
fired AFTER the trigger token (e.g. trailing `:`) — and resolves
to the column of the next token, which in real source is on a
fresh line. No module prelude.
Public entry: (layout-pass cfg tokens). Token shape: dict with at
least :type :value :line :col; everything else passes through. Newline
filler tokens are NOT used — line-break detection is via :line.
lib/guest/tests/layout.sx — 6 tests covering both flavours:
haskell-do-block / haskell-explicit-brace / haskell-do-inline /
haskell-module-prelude / python-if-block / python-nested.
Per the brief's gotcha note ("Don't ship lib/guest/layout.sx unless
the haskell scoreboard equals baseline") — haskell/layout.sx is left
UNTOUCHED. The kit isn't yet a drop-in replacement for the full
Haskell 98 algorithm (Note 5, multi-stage pre-pass, etc.) and forcing
a port would risk the 156 currently passing programs. Haskell
scoreboard remains at 156/156 baseline because no haskell file
changed. The synthetic Python-ish fixture is the second consumer per
the brief's wording.
PARTIAL — kit + synthetic fixture shipped; haskell port deferred until
the kit grows the missing Haskell-98 wrinkles.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
145
lib/guest/layout.sx
Normal file
145
lib/guest/layout.sx
Normal file
@@ -0,0 +1,145 @@
|
||||
;; lib/guest/layout.sx — configurable off-side / layout-sensitive lexer.
|
||||
;;
|
||||
;; Inserts virtual open / close / separator tokens based on indentation.
|
||||
;; Configurable enough to encode either the Haskell 98 layout rule (let /
|
||||
;; where / do / of opens a virtual brace at the next token's column) or
|
||||
;; a Python-ish indent / dedent rule (a colon at the end of a line opens
|
||||
;; a block at the next non-blank line's column).
|
||||
;;
|
||||
;; Token shape (input + output)
|
||||
;; ----------------------------
|
||||
;; Each token is a dict {:type :value :line :col …}. The kit reads
|
||||
;; only :type / :value / :line / :col and passes everything else
|
||||
;; through. The input stream MUST be free of newline filler tokens
|
||||
;; (preprocess them away with your tokenizer) — line breaks are detected
|
||||
;; by comparing :line of consecutive tokens.
|
||||
;;
|
||||
;; Config
|
||||
;; ------
|
||||
;; :open-keywords list of strings; a token whose :value matches
|
||||
;; opens a new layout block at the next token's
|
||||
;; column (Haskell: let/where/do/of).
|
||||
;; :open-trailing-fn (fn (tok) -> bool) — alternative trigger that
|
||||
;; fires AFTER the token is emitted. Use for
|
||||
;; Python-style trailing `:`.
|
||||
;; :open-token / :close-token / :sep-token
|
||||
;; templates {:type :value} merged with :line and
|
||||
;; :col when virtual tokens are emitted.
|
||||
;; :explicit-open? (fn (tok) -> bool) — if the next token after a
|
||||
;; trigger satisfies this, suppress virtual layout
|
||||
;; for that block (Haskell: `{`).
|
||||
;; :module-prelude? if true, wrap whole input in an implicit block
|
||||
;; at the first token's column (Haskell yes,
|
||||
;; Python no).
|
||||
;;
|
||||
;; Public entry
|
||||
;; ------------
|
||||
;; (layout-pass cfg tokens) -> tokens with virtual layout inserted.
|
||||
|
||||
(define
|
||||
layout-mk-virtual
|
||||
(fn (template line col)
|
||||
(assoc (assoc template :line line) :col col)))
|
||||
|
||||
(define
|
||||
layout-is-open-kw?
|
||||
(fn (tok open-kws)
|
||||
(and (= (get tok :type) "reserved")
|
||||
(some (fn (k) (= k (get tok :value))) open-kws))))
|
||||
|
||||
(define
|
||||
layout-pass
|
||||
(fn (cfg tokens)
|
||||
(let ((open-kws (get cfg :open-keywords))
|
||||
(trailing-fn (get cfg :open-trailing-fn))
|
||||
(open-tmpl (get cfg :open-token))
|
||||
(close-tmpl (get cfg :close-token))
|
||||
(sep-tmpl (get cfg :sep-token))
|
||||
(mod-prelude? (get cfg :module-prelude?))
|
||||
(expl?-fn (get cfg :explicit-open?))
|
||||
(out (list))
|
||||
(stack (list))
|
||||
(n (len tokens))
|
||||
(i 0)
|
||||
(prev-line -1)
|
||||
(pending-open false)
|
||||
(just-opened false))
|
||||
(define
|
||||
emit-closes-while-greater
|
||||
(fn (col line)
|
||||
(when (and (not (empty? stack)) (> (first stack) col))
|
||||
(do
|
||||
(append! out (layout-mk-virtual close-tmpl line col))
|
||||
(set! stack (rest stack))
|
||||
(emit-closes-while-greater col line)))))
|
||||
(define
|
||||
emit-pending-open
|
||||
(fn (line col)
|
||||
(do
|
||||
(append! out (layout-mk-virtual open-tmpl line col))
|
||||
(set! stack (cons col stack))
|
||||
(set! pending-open false)
|
||||
(set! just-opened true))))
|
||||
(define
|
||||
layout-step
|
||||
(fn ()
|
||||
(when (< i n)
|
||||
(let ((tok (nth tokens i)))
|
||||
(let ((line (get tok :line)) (col (get tok :col)))
|
||||
(cond
|
||||
(pending-open
|
||||
(cond
|
||||
((and (not (= expl?-fn nil)) (expl?-fn tok))
|
||||
(do
|
||||
(set! pending-open false)
|
||||
(append! out tok)
|
||||
(set! prev-line line)
|
||||
(set! i (+ i 1))
|
||||
(layout-step)))
|
||||
(:else
|
||||
(do
|
||||
(emit-pending-open line col)
|
||||
(layout-step)))))
|
||||
(:else
|
||||
(let ((on-fresh-line? (and (> prev-line 0) (> line prev-line))))
|
||||
(do
|
||||
(when on-fresh-line?
|
||||
(let ((stack-before stack))
|
||||
(begin
|
||||
(emit-closes-while-greater col line)
|
||||
(when (and (not (empty? stack))
|
||||
(= (first stack) col)
|
||||
(not just-opened)
|
||||
;; suppress separator if a dedent fired
|
||||
;; — the dedent is itself the separator
|
||||
(= (len stack) (len stack-before)))
|
||||
(append! out (layout-mk-virtual sep-tmpl line col))))))
|
||||
(set! just-opened false)
|
||||
(append! out tok)
|
||||
(set! prev-line line)
|
||||
(set! i (+ i 1))
|
||||
(cond
|
||||
((layout-is-open-kw? tok open-kws)
|
||||
(set! pending-open true))
|
||||
((and (not (= trailing-fn nil)) (trailing-fn tok))
|
||||
(set! pending-open true)))
|
||||
(layout-step))))))))))
|
||||
(begin
|
||||
;; Module prelude: implicit layout block at the first token's column.
|
||||
(when (and mod-prelude? (> n 0))
|
||||
(let ((tok (nth tokens 0)))
|
||||
(do
|
||||
(append! out (layout-mk-virtual open-tmpl (get tok :line) (get tok :col)))
|
||||
(set! stack (cons (get tok :col) stack))
|
||||
(set! just-opened true))))
|
||||
(layout-step)
|
||||
;; EOF: close every remaining block.
|
||||
(define close-rest
|
||||
(fn ()
|
||||
(when (not (empty? stack))
|
||||
(do
|
||||
(append! out (layout-mk-virtual close-tmpl 0 0))
|
||||
(set! stack (rest stack))
|
||||
(close-rest)))))
|
||||
(close-rest)
|
||||
out))))
|
||||
180
lib/guest/tests/layout.sx
Normal file
180
lib/guest/tests/layout.sx
Normal file
@@ -0,0 +1,180 @@
|
||||
;; lib/guest/tests/layout.sx — synthetic Python-ish off-side fixture.
|
||||
;;
|
||||
;; Exercises lib/guest/layout.sx with a config different from Haskell's
|
||||
;; (no module-prelude, layout opens via trailing `:` not via reserved
|
||||
;; keyword) to prove the kit isn't Haskell-shaped.
|
||||
|
||||
(define glayout-test-pass 0)
|
||||
(define glayout-test-fail 0)
|
||||
(define glayout-test-fails (list))
|
||||
|
||||
(define
|
||||
glayout-test
|
||||
(fn (name actual expected)
|
||||
(if (= actual expected)
|
||||
(set! glayout-test-pass (+ glayout-test-pass 1))
|
||||
(begin
|
||||
(set! glayout-test-fail (+ glayout-test-fail 1))
|
||||
(append! glayout-test-fails {:name name :expected expected :actual actual})))))
|
||||
|
||||
;; Convenience: build a token from {type value line col}.
|
||||
(define
|
||||
glayout-tok
|
||||
(fn (ty val line col)
|
||||
{:type ty :value val :line line :col col}))
|
||||
|
||||
;; Project a token list to ((type value) ...) for compact comparison.
|
||||
(define
|
||||
glayout-shape
|
||||
(fn (toks)
|
||||
(map (fn (t) (list (get t :type) (get t :value))) toks)))
|
||||
|
||||
;; ── Haskell-flavour: keyword opens block ─────────────────────────
|
||||
(define
|
||||
glayout-haskell-cfg
|
||||
{:open-keywords (list "let" "where" "do" "of")
|
||||
:open-trailing-fn nil
|
||||
:open-token {:type "vlbrace" :value "{"}
|
||||
:close-token {:type "vrbrace" :value "}"}
|
||||
:sep-token {:type "vsemi" :value ";"}
|
||||
:module-prelude? false
|
||||
:explicit-open? (fn (tok) (= (get tok :type) "lbrace"))})
|
||||
|
||||
;; do
|
||||
;; a
|
||||
;; b
|
||||
;; c ← outside the do-block
|
||||
(glayout-test "haskell-do-block"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-haskell-cfg
|
||||
(list (glayout-tok "reserved" "do" 1 1)
|
||||
(glayout-tok "ident" "a" 2 3)
|
||||
(glayout-tok "ident" "b" 3 3)
|
||||
(glayout-tok "ident" "c" 4 1))))
|
||||
(list (list "reserved" "do")
|
||||
(list "vlbrace" "{")
|
||||
(list "ident" "a")
|
||||
(list "vsemi" ";")
|
||||
(list "ident" "b")
|
||||
(list "vrbrace" "}")
|
||||
(list "ident" "c")))
|
||||
|
||||
;; Explicit `{` after `do` suppresses virtual layout.
|
||||
(glayout-test "haskell-explicit-brace"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-haskell-cfg
|
||||
(list (glayout-tok "reserved" "do" 1 1)
|
||||
(glayout-tok "lbrace" "{" 1 4)
|
||||
(glayout-tok "ident" "a" 1 6)
|
||||
(glayout-tok "rbrace" "}" 1 8))))
|
||||
(list (list "reserved" "do")
|
||||
(list "lbrace" "{")
|
||||
(list "ident" "a")
|
||||
(list "rbrace" "}")))
|
||||
|
||||
;; Single-statement do-block on the same line.
|
||||
(glayout-test "haskell-do-inline"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-haskell-cfg
|
||||
(list (glayout-tok "reserved" "do" 1 1)
|
||||
(glayout-tok "ident" "a" 1 4))))
|
||||
(list (list "reserved" "do")
|
||||
(list "vlbrace" "{")
|
||||
(list "ident" "a")
|
||||
(list "vrbrace" "}")))
|
||||
|
||||
;; Module-prelude: wrap whole input in implicit layout block at first
|
||||
;; tok's column.
|
||||
(glayout-test "haskell-module-prelude"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
(assoc glayout-haskell-cfg :module-prelude? true)
|
||||
(list (glayout-tok "ident" "x" 1 1)
|
||||
(glayout-tok "ident" "y" 2 1)
|
||||
(glayout-tok "ident" "z" 3 1))))
|
||||
(list (list "vlbrace" "{")
|
||||
(list "ident" "x")
|
||||
(list "vsemi" ";")
|
||||
(list "ident" "y")
|
||||
(list "vsemi" ";")
|
||||
(list "ident" "z")
|
||||
(list "vrbrace" "}")))
|
||||
|
||||
;; ── Python-flavour: trailing `:` opens block ─────────────────────
|
||||
(define
|
||||
glayout-python-cfg
|
||||
{:open-keywords (list)
|
||||
:open-trailing-fn (fn (tok) (and (= (get tok :type) "punct")
|
||||
(= (get tok :value) ":")))
|
||||
:open-token {:type "indent" :value "INDENT"}
|
||||
:close-token {:type "dedent" :value "DEDENT"}
|
||||
:sep-token {:type "newline" :value "NEWLINE"}
|
||||
:module-prelude? false
|
||||
:explicit-open? nil})
|
||||
|
||||
;; if x:
|
||||
;; a
|
||||
;; b
|
||||
;; c
|
||||
(glayout-test "python-if-block"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-python-cfg
|
||||
(list (glayout-tok "reserved" "if" 1 1)
|
||||
(glayout-tok "ident" "x" 1 4)
|
||||
(glayout-tok "punct" ":" 1 5)
|
||||
(glayout-tok "ident" "a" 2 5)
|
||||
(glayout-tok "ident" "b" 3 5)
|
||||
(glayout-tok "ident" "c" 4 1))))
|
||||
(list (list "reserved" "if")
|
||||
(list "ident" "x")
|
||||
(list "punct" ":")
|
||||
(list "indent" "INDENT")
|
||||
(list "ident" "a")
|
||||
(list "newline" "NEWLINE")
|
||||
(list "ident" "b")
|
||||
(list "dedent" "DEDENT")
|
||||
(list "ident" "c")))
|
||||
|
||||
;; Nested Python-style blocks.
|
||||
;; def f():
|
||||
;; if x:
|
||||
;; a
|
||||
;; b
|
||||
(glayout-test "python-nested"
|
||||
(glayout-shape
|
||||
(layout-pass
|
||||
glayout-python-cfg
|
||||
(list (glayout-tok "reserved" "def" 1 1)
|
||||
(glayout-tok "ident" "f" 1 5)
|
||||
(glayout-tok "punct" "(" 1 6)
|
||||
(glayout-tok "punct" ")" 1 7)
|
||||
(glayout-tok "punct" ":" 1 8)
|
||||
(glayout-tok "reserved" "if" 2 5)
|
||||
(glayout-tok "ident" "x" 2 8)
|
||||
(glayout-tok "punct" ":" 2 9)
|
||||
(glayout-tok "ident" "a" 3 9)
|
||||
(glayout-tok "ident" "b" 4 5))))
|
||||
(list (list "reserved" "def")
|
||||
(list "ident" "f")
|
||||
(list "punct" "(")
|
||||
(list "punct" ")")
|
||||
(list "punct" ":")
|
||||
(list "indent" "INDENT")
|
||||
(list "reserved" "if")
|
||||
(list "ident" "x")
|
||||
(list "punct" ":")
|
||||
(list "indent" "INDENT")
|
||||
(list "ident" "a")
|
||||
(list "dedent" "DEDENT")
|
||||
(list "ident" "b")
|
||||
(list "dedent" "DEDENT")))
|
||||
|
||||
(define glayout-tests-run!
|
||||
(fn ()
|
||||
{:passed glayout-test-pass
|
||||
:failed glayout-test-fail
|
||||
:total (+ glayout-test-pass glayout-test-fail)}))
|
||||
Reference in New Issue
Block a user