From d75c61d408ba1ad8c5cdbbae3b8a062c12c4451d Mon Sep 17 00:00:00 2001 From: giles Date: Thu, 7 May 2026 18:55:38 +0000 Subject: [PATCH] =?UTF-8?q?GUEST:=20step=207=20=E2=80=94=20lib/guest/layou?= =?UTF-8?q?t.sx=20off-side=20/=20layout-sensitive=20lexer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Configurable layout pass that inserts virtual open / close / separator tokens based on indentation. Supports both styles the brief calls out: • Haskell-flavour: layout opens AFTER a reserved keyword (let/where/do/of) and resolves to the next token's column. Module prelude wraps the whole input in an implicit block. Explicit `{` after the keyword suppresses virtual layout. • Python-flavour: layout opens via an :open-trailing-fn predicate fired AFTER the trigger token (e.g. trailing `:`) — and resolves to the column of the next token, which in real source is on a fresh line. No module prelude. Public entry: (layout-pass cfg tokens). Token shape: dict with at least :type :value :line :col; everything else passes through. Newline filler tokens are NOT used — line-break detection is via :line. lib/guest/tests/layout.sx — 6 tests covering both flavours: haskell-do-block / haskell-explicit-brace / haskell-do-inline / haskell-module-prelude / python-if-block / python-nested. Per the brief's gotcha note ("Don't ship lib/guest/layout.sx unless the haskell scoreboard equals baseline") — haskell/layout.sx is left UNTOUCHED. The kit isn't yet a drop-in replacement for the full Haskell 98 algorithm (Note 5, multi-stage pre-pass, etc.) and forcing a port would risk the 156 currently passing programs. Haskell scoreboard remains at 156/156 baseline because no haskell file changed. The synthetic Python-ish fixture is the second consumer per the brief's wording. PARTIAL — kit + synthetic fixture shipped; haskell port deferred until the kit grows the missing Haskell-98 wrinkles. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/guest/layout.sx | 145 ++++++++++++++++++++++++++++++ lib/guest/tests/layout.sx | 180 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 325 insertions(+) create mode 100644 lib/guest/layout.sx create mode 100644 lib/guest/tests/layout.sx diff --git a/lib/guest/layout.sx b/lib/guest/layout.sx new file mode 100644 index 00000000..cf4db72c --- /dev/null +++ b/lib/guest/layout.sx @@ -0,0 +1,145 @@ +;; lib/guest/layout.sx — configurable off-side / layout-sensitive lexer. +;; +;; Inserts virtual open / close / separator tokens based on indentation. +;; Configurable enough to encode either the Haskell 98 layout rule (let / +;; where / do / of opens a virtual brace at the next token's column) or +;; a Python-ish indent / dedent rule (a colon at the end of a line opens +;; a block at the next non-blank line's column). +;; +;; Token shape (input + output) +;; ---------------------------- +;; Each token is a dict {:type :value :line :col …}. The kit reads +;; only :type / :value / :line / :col and passes everything else +;; through. The input stream MUST be free of newline filler tokens +;; (preprocess them away with your tokenizer) — line breaks are detected +;; by comparing :line of consecutive tokens. +;; +;; Config +;; ------ +;; :open-keywords list of strings; a token whose :value matches +;; opens a new layout block at the next token's +;; column (Haskell: let/where/do/of). +;; :open-trailing-fn (fn (tok) -> bool) — alternative trigger that +;; fires AFTER the token is emitted. Use for +;; Python-style trailing `:`. +;; :open-token / :close-token / :sep-token +;; templates {:type :value} merged with :line and +;; :col when virtual tokens are emitted. +;; :explicit-open? (fn (tok) -> bool) — if the next token after a +;; trigger satisfies this, suppress virtual layout +;; for that block (Haskell: `{`). +;; :module-prelude? if true, wrap whole input in an implicit block +;; at the first token's column (Haskell yes, +;; Python no). +;; +;; Public entry +;; ------------ +;; (layout-pass cfg tokens) -> tokens with virtual layout inserted. + +(define + layout-mk-virtual + (fn (template line col) + (assoc (assoc template :line line) :col col))) + +(define + layout-is-open-kw? + (fn (tok open-kws) + (and (= (get tok :type) "reserved") + (some (fn (k) (= k (get tok :value))) open-kws)))) + +(define + layout-pass + (fn (cfg tokens) + (let ((open-kws (get cfg :open-keywords)) + (trailing-fn (get cfg :open-trailing-fn)) + (open-tmpl (get cfg :open-token)) + (close-tmpl (get cfg :close-token)) + (sep-tmpl (get cfg :sep-token)) + (mod-prelude? (get cfg :module-prelude?)) + (expl?-fn (get cfg :explicit-open?)) + (out (list)) + (stack (list)) + (n (len tokens)) + (i 0) + (prev-line -1) + (pending-open false) + (just-opened false)) + (define + emit-closes-while-greater + (fn (col line) + (when (and (not (empty? stack)) (> (first stack) col)) + (do + (append! out (layout-mk-virtual close-tmpl line col)) + (set! stack (rest stack)) + (emit-closes-while-greater col line))))) + (define + emit-pending-open + (fn (line col) + (do + (append! out (layout-mk-virtual open-tmpl line col)) + (set! stack (cons col stack)) + (set! pending-open false) + (set! just-opened true)))) + (define + layout-step + (fn () + (when (< i n) + (let ((tok (nth tokens i))) + (let ((line (get tok :line)) (col (get tok :col))) + (cond + (pending-open + (cond + ((and (not (= expl?-fn nil)) (expl?-fn tok)) + (do + (set! pending-open false) + (append! out tok) + (set! prev-line line) + (set! i (+ i 1)) + (layout-step))) + (:else + (do + (emit-pending-open line col) + (layout-step))))) + (:else + (let ((on-fresh-line? (and (> prev-line 0) (> line prev-line)))) + (do + (when on-fresh-line? + (let ((stack-before stack)) + (begin + (emit-closes-while-greater col line) + (when (and (not (empty? stack)) + (= (first stack) col) + (not just-opened) + ;; suppress separator if a dedent fired + ;; — the dedent is itself the separator + (= (len stack) (len stack-before))) + (append! out (layout-mk-virtual sep-tmpl line col)))))) + (set! just-opened false) + (append! out tok) + (set! prev-line line) + (set! i (+ i 1)) + (cond + ((layout-is-open-kw? tok open-kws) + (set! pending-open true)) + ((and (not (= trailing-fn nil)) (trailing-fn tok)) + (set! pending-open true))) + (layout-step)))))))))) + (begin + ;; Module prelude: implicit layout block at the first token's column. + (when (and mod-prelude? (> n 0)) + (let ((tok (nth tokens 0))) + (do + (append! out (layout-mk-virtual open-tmpl (get tok :line) (get tok :col))) + (set! stack (cons (get tok :col) stack)) + (set! just-opened true)))) + (layout-step) + ;; EOF: close every remaining block. + (define close-rest + (fn () + (when (not (empty? stack)) + (do + (append! out (layout-mk-virtual close-tmpl 0 0)) + (set! stack (rest stack)) + (close-rest))))) + (close-rest) + out)))) diff --git a/lib/guest/tests/layout.sx b/lib/guest/tests/layout.sx new file mode 100644 index 00000000..0a922b2e --- /dev/null +++ b/lib/guest/tests/layout.sx @@ -0,0 +1,180 @@ +;; lib/guest/tests/layout.sx — synthetic Python-ish off-side fixture. +;; +;; Exercises lib/guest/layout.sx with a config different from Haskell's +;; (no module-prelude, layout opens via trailing `:` not via reserved +;; keyword) to prove the kit isn't Haskell-shaped. + +(define glayout-test-pass 0) +(define glayout-test-fail 0) +(define glayout-test-fails (list)) + +(define + glayout-test + (fn (name actual expected) + (if (= actual expected) + (set! glayout-test-pass (+ glayout-test-pass 1)) + (begin + (set! glayout-test-fail (+ glayout-test-fail 1)) + (append! glayout-test-fails {:name name :expected expected :actual actual}))))) + +;; Convenience: build a token from {type value line col}. +(define + glayout-tok + (fn (ty val line col) + {:type ty :value val :line line :col col})) + +;; Project a token list to ((type value) ...) for compact comparison. +(define + glayout-shape + (fn (toks) + (map (fn (t) (list (get t :type) (get t :value))) toks))) + +;; ── Haskell-flavour: keyword opens block ───────────────────────── +(define + glayout-haskell-cfg + {:open-keywords (list "let" "where" "do" "of") + :open-trailing-fn nil + :open-token {:type "vlbrace" :value "{"} + :close-token {:type "vrbrace" :value "}"} + :sep-token {:type "vsemi" :value ";"} + :module-prelude? false + :explicit-open? (fn (tok) (= (get tok :type) "lbrace"))}) + +;; do +;; a +;; b +;; c ← outside the do-block +(glayout-test "haskell-do-block" + (glayout-shape + (layout-pass + glayout-haskell-cfg + (list (glayout-tok "reserved" "do" 1 1) + (glayout-tok "ident" "a" 2 3) + (glayout-tok "ident" "b" 3 3) + (glayout-tok "ident" "c" 4 1)))) + (list (list "reserved" "do") + (list "vlbrace" "{") + (list "ident" "a") + (list "vsemi" ";") + (list "ident" "b") + (list "vrbrace" "}") + (list "ident" "c"))) + +;; Explicit `{` after `do` suppresses virtual layout. +(glayout-test "haskell-explicit-brace" + (glayout-shape + (layout-pass + glayout-haskell-cfg + (list (glayout-tok "reserved" "do" 1 1) + (glayout-tok "lbrace" "{" 1 4) + (glayout-tok "ident" "a" 1 6) + (glayout-tok "rbrace" "}" 1 8)))) + (list (list "reserved" "do") + (list "lbrace" "{") + (list "ident" "a") + (list "rbrace" "}"))) + +;; Single-statement do-block on the same line. +(glayout-test "haskell-do-inline" + (glayout-shape + (layout-pass + glayout-haskell-cfg + (list (glayout-tok "reserved" "do" 1 1) + (glayout-tok "ident" "a" 1 4)))) + (list (list "reserved" "do") + (list "vlbrace" "{") + (list "ident" "a") + (list "vrbrace" "}"))) + +;; Module-prelude: wrap whole input in implicit layout block at first +;; tok's column. +(glayout-test "haskell-module-prelude" + (glayout-shape + (layout-pass + (assoc glayout-haskell-cfg :module-prelude? true) + (list (glayout-tok "ident" "x" 1 1) + (glayout-tok "ident" "y" 2 1) + (glayout-tok "ident" "z" 3 1)))) + (list (list "vlbrace" "{") + (list "ident" "x") + (list "vsemi" ";") + (list "ident" "y") + (list "vsemi" ";") + (list "ident" "z") + (list "vrbrace" "}"))) + +;; ── Python-flavour: trailing `:` opens block ───────────────────── +(define + glayout-python-cfg + {:open-keywords (list) + :open-trailing-fn (fn (tok) (and (= (get tok :type) "punct") + (= (get tok :value) ":"))) + :open-token {:type "indent" :value "INDENT"} + :close-token {:type "dedent" :value "DEDENT"} + :sep-token {:type "newline" :value "NEWLINE"} + :module-prelude? false + :explicit-open? nil}) + +;; if x: +;; a +;; b +;; c +(glayout-test "python-if-block" + (glayout-shape + (layout-pass + glayout-python-cfg + (list (glayout-tok "reserved" "if" 1 1) + (glayout-tok "ident" "x" 1 4) + (glayout-tok "punct" ":" 1 5) + (glayout-tok "ident" "a" 2 5) + (glayout-tok "ident" "b" 3 5) + (glayout-tok "ident" "c" 4 1)))) + (list (list "reserved" "if") + (list "ident" "x") + (list "punct" ":") + (list "indent" "INDENT") + (list "ident" "a") + (list "newline" "NEWLINE") + (list "ident" "b") + (list "dedent" "DEDENT") + (list "ident" "c"))) + +;; Nested Python-style blocks. +;; def f(): +;; if x: +;; a +;; b +(glayout-test "python-nested" + (glayout-shape + (layout-pass + glayout-python-cfg + (list (glayout-tok "reserved" "def" 1 1) + (glayout-tok "ident" "f" 1 5) + (glayout-tok "punct" "(" 1 6) + (glayout-tok "punct" ")" 1 7) + (glayout-tok "punct" ":" 1 8) + (glayout-tok "reserved" "if" 2 5) + (glayout-tok "ident" "x" 2 8) + (glayout-tok "punct" ":" 2 9) + (glayout-tok "ident" "a" 3 9) + (glayout-tok "ident" "b" 4 5)))) + (list (list "reserved" "def") + (list "ident" "f") + (list "punct" "(") + (list "punct" ")") + (list "punct" ":") + (list "indent" "INDENT") + (list "reserved" "if") + (list "ident" "x") + (list "punct" ":") + (list "indent" "INDENT") + (list "ident" "a") + (list "dedent" "DEDENT") + (list "ident" "b") + (list "dedent" "DEDENT"))) + +(define glayout-tests-run! + (fn () + {:passed glayout-test-pass + :failed glayout-test-fail + :total (+ glayout-test-pass glayout-test-fail)}))