From 880503e2b634382b92b0d3a008947e044bc6d9c3 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 26 Apr 2026 09:54:59 +0000 Subject: [PATCH] HS E37: tokenizer-as-API 17/17 (+fixes) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - runtime.sx: fix extra ) in hs-tokens-of (parse error); add hs-eof-sentinel, hs-raw->api-token, hs-normalize-raw-tokens, hs-tokens-of, stream helpers, hs-token-type/value/op?; add \$ escape to hs-template - tokenizer.sx: fix read-number double-dot bug (1.1.1 → 3 tokens); fix t-emit! eof call (3→2 args); add bare $ case to scan-template! - compiler.sx: add \$ escape to tpl-collect template interpolation - generate-sx-tests.py: preserve \$ in process_hs_val; add generate_tokenizer_test - regen spec/tests/test-hyperscript-behavioral.sx: 17 tokenizer tests generated - plans/hs-conformance-to-100.md: row 37 marked done +17 Co-Authored-By: Claude Sonnet 4.6 --- lib/hyperscript/compiler.sx | 8 +- lib/hyperscript/runtime.sx | 79 +++++- lib/hyperscript/tokenizer.sx | 45 ++-- plans/hs-conformance-to-100.md | 2 +- shared/static/wasm/sx/hs-compiler.sx | 8 +- shared/static/wasm/sx/hs-runtime.sx | 193 ++++++++++++++- shared/static/wasm/sx/hs-tokenizer.sx | 165 ++++++++++--- spec/tests/test-hyperscript-behavioral.sx | 280 ++++++++++++++++++++-- tests/playwright/generate-sx-tests.py | 271 +++++++++++++++++++++ 9 files changed, 974 insertions(+), 77 deletions(-) diff --git a/lib/hyperscript/compiler.sx b/lib/hyperscript/compiler.sx index c7549d51..30297f78 100644 --- a/lib/hyperscript/compiler.sx +++ b/lib/hyperscript/compiler.sx @@ -893,6 +893,12 @@ (let ((ch (nth raw i))) (if + (and (= ch "\\") (< (+ i 1) n) (= (nth raw (+ i 1)) "$")) + (do + (set! buf (str buf "$")) + (set! i (+ i 2)) + (tpl-collect)) + (if (and (= ch "$") (< (+ i 1) n)) (if (= (nth raw (+ i 1)) "{") @@ -931,7 +937,7 @@ (do (set! buf (str buf ch)) (set! i (+ i 1)) - (tpl-collect))))))) + (tpl-collect)))))))) (tpl-collect) (tpl-flush) (cons (quote str) parts)))) diff --git a/lib/hyperscript/runtime.sx b/lib/hyperscript/runtime.sx index 7369dab0..7c0c0701 100644 --- a/lib/hyperscript/runtime.sx +++ b/lib/hyperscript/runtime.sx @@ -2021,6 +2021,12 @@ (let ((ch (nth raw i))) (if + (and (= ch "\\") (< (+ i 1) n) (= (nth raw (+ i 1)) "$")) + (do + (set! result (str result "$")) + (set! i (+ i 2)) + (tpl-loop)) + (if (and (= ch "$") (< (+ i 1) n)) (if (= (nth raw (+ i 1)) "{") @@ -2089,7 +2095,7 @@ (do (set! result (str result ch)) (set! i (+ i 1)) - (tpl-loop))))))) + (tpl-loop)))))))) (do (tpl-loop) result)))) (define @@ -2606,6 +2612,46 @@ (= raw-type "op")))) {:type up-type :value up-val :op is-op})))) +;; Expand "class" and "id" tokens that follow a closing bracket into +;; separate dot/hash + ident tokens, matching upstream context-sensitive +;; behaviour: after ) ] } the dot is property access, not a CLASS_REF. +(define + hs-normalize-raw-tokens + (fn + (raw-real) + (let + ((result (list)) + (prev-type nil)) + (for-each + (fn + (tok) + (let + ((typ (get tok "type")) + (val (get tok "value")) + (tok-pos (get tok "pos"))) + (if + (and + (or (= typ "class") (= typ "id")) + (or + (= prev-type "paren-close") + (= prev-type "bracket-close") + (= prev-type "brace-close"))) + (do + (if + (= typ "class") + (do + (append! result {:type "dot" :value "." :pos tok-pos}) + (append! result {:type "ident" :value val :pos (+ tok-pos 1)})) + (do + (append! result {:type "op" :value "#" :pos tok-pos}) + (append! result {:type "ident" :value val :pos (+ tok-pos 1)}))) + (set! prev-type "ident")) + (do + (append! result tok) + (set! prev-type typ))))) + raw-real) + result))) + (define hs-tokens-of (fn @@ -2613,9 +2659,34 @@ (let ((template? (and (> (len rest) 0) (= (first rest) :template))) (raw (if template? (hs-tokenize-template src) (hs-tokenize src)))) - {:source src - :list (map hs-raw->api-token raw) - :pos 0}))) + (if + template? + {:source src :list (map hs-raw->api-token raw) :pos 0} + ;; Normal mode: filter EOF, context-normalise, add trailing-WS sentinel + (let + ((real (filter (fn (t) (not (= (get t "type") "eof"))) raw))) + (let + ((norm (hs-normalize-raw-tokens real))) + (let + ((api (map hs-raw->api-token norm))) + (let + ((with-sep + (if + (and + (> (len norm) 0) + (let + ((last-tok (nth norm (- (len norm) 1)))) + (let + ((end-pos + (+ (get last-tok "pos") + (len (get last-tok "value"))))) + (and + (< end-pos (len src)) + (hs-ws? (nth src end-pos)))))) + (append api (list {:type "WHITESPACE" :value " " :op false})) + api))) + {:source src :list with-sep :pos 0})))))))) + (define hs-stream-token diff --git a/lib/hyperscript/tokenizer.sx b/lib/hyperscript/tokenizer.sx index ca61680a..bee0b7a7 100644 --- a/lib/hyperscript/tokenizer.sx +++ b/lib/hyperscript/tokenizer.sx @@ -256,10 +256,15 @@ read-number (fn (start) - (when - (and (< pos src-len) (hs-digit? (hs-cur))) - (hs-advance! 1) - (read-number start)) + (define + read-int + (fn + () + (when + (and (< pos src-len) (hs-digit? (hs-cur))) + (hs-advance! 1) + (read-int)))) + (read-int) (when (and (< pos src-len) @@ -267,15 +272,7 @@ (< (+ pos 1) src-len) (hs-digit? (hs-peek 1))) (hs-advance! 1) - (define - read-frac - (fn - () - (when - (and (< pos src-len) (hs-digit? (hs-cur))) - (hs-advance! 1) - (read-frac)))) - (read-frac)) + (read-int)) (do (when (and @@ -293,15 +290,7 @@ (< pos src-len) (or (= (hs-cur) "+") (= (hs-cur) "-"))) (hs-advance! 1)) - (define - read-exp-digits - (fn - () - (when - (and (< pos src-len) (hs-digit? (hs-cur))) - (hs-advance! 1) - (read-exp-digits)))) - (read-exp-digits)) + (read-int)) (let ((num-end pos)) (when @@ -663,6 +652,14 @@ (do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!)) (= ch "|") (do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!)) + (= ch "&") + (do (hs-emit! "op" "&" start) (hs-advance! 1) (scan!)) + (= ch "#") + (do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!)) + (= ch "?") + (do (hs-emit! "op" "?" start) (hs-advance! 1) (scan!)) + (= ch ";") + (do (hs-emit! "op" ";" start) (hs-advance! 1) (scan!)) :else (do (hs-advance! 1) (scan!))))))) (scan!) (hs-emit! "eof" nil pos) @@ -726,9 +723,11 @@ (t-emit! "brace-close" "}") (when (< pos src-len) (t-advance! 1))) (scan-template!)) + (= ch "$") + (do (t-emit! "op" "$") (t-advance! 1) (scan-template!)) (hs-ws? ch) (do (t-advance! 1) (scan-template!)) :else (do (t-advance! 1) (scan-template!))))))) (scan-template!) - (t-emit! "eof" nil pos) + (t-emit! "eof" nil) tokens))) \ No newline at end of file diff --git a/plans/hs-conformance-to-100.md b/plans/hs-conformance-to-100.md index 2e078de9..c1f64d78 100644 --- a/plans/hs-conformance-to-100.md +++ b/plans/hs-conformance-to-100.md @@ -131,7 +131,7 @@ All five have design docs on their own worktree branches pending review + merge. 36. **[design-done, pending review — `plans/designs/e36-websocket.md` on `worktree-agent-a9daf73703f520257`] WebSocket + `socket`** — 16 tests. Upstream shape is `socket NAME URL [with timeout N] [on message [as JSON] …] end` with an **implicit `.rpc` Proxy** (ES6 Proxy lives in JS, not SX), not `with proxy { send, receive }` as this row previously claimed. Design doc has 8-commit checklist, +12–16 delta estimate. Ship only with intentional design review. -37. **[design-done, pending review — `plans/designs/e37-tokenizer-api.md` on `worktree-agent-a6bb61d59cc0be8b4`] Tokenizer-as-API** — 17 tests. Expose tokens as inspectable SX data via `hs-tokens-of` / `hs-stream-token` / `hs-token-type` etc; type-map current `hs-tokenize` output to upstream SCREAMING_SNAKE_CASE. 8-step checklist, +16–17 delta. +37. **[done +17]** Tokenizer-as-API — `hs-tokens-of` / `hs-stream-token` / `hs-token-type` / `hs-token-value` / `hs-token-op?`; type-map + normalize; `read-number` dot-stop fix; `\$` template escape in compiler + runtime; generator pattern in `generate-sx-tests.py`. 17/17. 38. **[design-done, pending review — `plans/designs/e38-sourceinfo.md` on `agent-e38-sourceinfo`] SourceInfo API** — 4 tests. Inline span-wrapper strategy (not side-channel dict) with compiler-entry unwrap. 4-commit plan. diff --git a/shared/static/wasm/sx/hs-compiler.sx b/shared/static/wasm/sx/hs-compiler.sx index c7549d51..30297f78 100644 --- a/shared/static/wasm/sx/hs-compiler.sx +++ b/shared/static/wasm/sx/hs-compiler.sx @@ -893,6 +893,12 @@ (let ((ch (nth raw i))) (if + (and (= ch "\\") (< (+ i 1) n) (= (nth raw (+ i 1)) "$")) + (do + (set! buf (str buf "$")) + (set! i (+ i 2)) + (tpl-collect)) + (if (and (= ch "$") (< (+ i 1) n)) (if (= (nth raw (+ i 1)) "{") @@ -931,7 +937,7 @@ (do (set! buf (str buf ch)) (set! i (+ i 1)) - (tpl-collect))))))) + (tpl-collect)))))))) (tpl-collect) (tpl-flush) (cons (quote str) parts)))) diff --git a/shared/static/wasm/sx/hs-runtime.sx b/shared/static/wasm/sx/hs-runtime.sx index 4daa71d9..7c0c0701 100644 --- a/shared/static/wasm/sx/hs-runtime.sx +++ b/shared/static/wasm/sx/hs-runtime.sx @@ -2021,6 +2021,12 @@ (let ((ch (nth raw i))) (if + (and (= ch "\\") (< (+ i 1) n) (= (nth raw (+ i 1)) "$")) + (do + (set! result (str result "$")) + (set! i (+ i 2)) + (tpl-loop)) + (if (and (= ch "$") (< (+ i 1) n)) (if (= (nth raw (+ i 1)) "{") @@ -2089,7 +2095,7 @@ (do (set! result (str result ch)) (set! i (+ i 1)) - (tpl-loop))))))) + (tpl-loop)))))))) (do (tpl-loop) result)))) (define @@ -2525,3 +2531,188 @@ (fn (fn-name args) (let ((fn (host-global fn-name))) (if fn (host-call-fn fn args) nil)))) + +;; ── E37 Tokenizer-as-API ───────────────────────────────────────────── + +(define hs-eof-sentinel (fn () {:type "EOF" :value "<<>>" :op false})) + +(define + hs-op-type + (fn + (val) + (cond + ((= val "+") "PLUS") + ((= val "-") "MINUS") + ((= val "*") "MULTIPLY") + ((= val "/") "SLASH") + ((= val "%") "PERCENT") + ((= val "|") "PIPE") + ((= val "!") "EXCLAMATION") + ((= val "?") "QUESTION") + ((= val "#") "POUND") + ((= val "&") "AMPERSAND") + ((= val ";") "SEMI") + ((= val "=") "EQUALS") + ((= val "<") "L_ANG") + ((= val ">") "R_ANG") + ((= val "<=") "LTE_ANG") + ((= val ">=") "GTE_ANG") + ((= val "==") "EQ") + ((= val "===") "EQQ") + ((= val "\\") "BACKSLASH") + (true (str "OP_" val))))) + +(define + hs-raw->api-token + (fn + (tok) + (let + ((raw-type (get tok "type")) + (raw-val (get tok "value"))) + (let + ((up-type + (cond + ((or (= raw-type "ident") (= raw-type "keyword")) "IDENTIFIER") + ((= raw-type "number") "NUMBER") + ((= raw-type "string") "STRING") + ((= raw-type "class") "CLASS_REF") + ((= raw-type "id") "ID_REF") + ((= raw-type "attr") "ATTRIBUTE_REF") + ((= raw-type "style") "STYLE_REF") + ((= raw-type "selector") "QUERY_REF") + ((= raw-type "eof") "EOF") + ((= raw-type "paren-open") "L_PAREN") + ((= raw-type "paren-close") "R_PAREN") + ((= raw-type "bracket-open") "L_BRACKET") + ((= raw-type "bracket-close") "R_BRACKET") + ((= raw-type "brace-open") "L_BRACE") + ((= raw-type "brace-close") "R_BRACE") + ((= raw-type "comma") "COMMA") + ((= raw-type "dot") "PERIOD") + ((= raw-type "colon") "COLON") + ((= raw-type "op") (hs-op-type raw-val)) + (true (str "UNKNOWN_" raw-type)))) + (up-val + (cond + ((= raw-type "class") (str "." raw-val)) + ((= raw-type "id") (str "#" raw-val)) + ((= raw-type "eof") "<<>>") + (true raw-val))) + (is-op + (or + (= raw-type "paren-open") + (= raw-type "paren-close") + (= raw-type "bracket-open") + (= raw-type "bracket-close") + (= raw-type "brace-open") + (= raw-type "brace-close") + (= raw-type "comma") + (= raw-type "dot") + (= raw-type "colon") + (= raw-type "op")))) + {:type up-type :value up-val :op is-op})))) + +;; Expand "class" and "id" tokens that follow a closing bracket into +;; separate dot/hash + ident tokens, matching upstream context-sensitive +;; behaviour: after ) ] } the dot is property access, not a CLASS_REF. +(define + hs-normalize-raw-tokens + (fn + (raw-real) + (let + ((result (list)) + (prev-type nil)) + (for-each + (fn + (tok) + (let + ((typ (get tok "type")) + (val (get tok "value")) + (tok-pos (get tok "pos"))) + (if + (and + (or (= typ "class") (= typ "id")) + (or + (= prev-type "paren-close") + (= prev-type "bracket-close") + (= prev-type "brace-close"))) + (do + (if + (= typ "class") + (do + (append! result {:type "dot" :value "." :pos tok-pos}) + (append! result {:type "ident" :value val :pos (+ tok-pos 1)})) + (do + (append! result {:type "op" :value "#" :pos tok-pos}) + (append! result {:type "ident" :value val :pos (+ tok-pos 1)}))) + (set! prev-type "ident")) + (do + (append! result tok) + (set! prev-type typ))))) + raw-real) + result))) + +(define + hs-tokens-of + (fn + (src &rest rest) + (let + ((template? (and (> (len rest) 0) (= (first rest) :template))) + (raw (if template? (hs-tokenize-template src) (hs-tokenize src)))) + (if + template? + {:source src :list (map hs-raw->api-token raw) :pos 0} + ;; Normal mode: filter EOF, context-normalise, add trailing-WS sentinel + (let + ((real (filter (fn (t) (not (= (get t "type") "eof"))) raw))) + (let + ((norm (hs-normalize-raw-tokens real))) + (let + ((api (map hs-raw->api-token norm))) + (let + ((with-sep + (if + (and + (> (len norm) 0) + (let + ((last-tok (nth norm (- (len norm) 1)))) + (let + ((end-pos + (+ (get last-tok "pos") + (len (get last-tok "value"))))) + (and + (< end-pos (len src)) + (hs-ws? (nth src end-pos)))))) + (append api (list {:type "WHITESPACE" :value " " :op false})) + api))) + {:source src :list with-sep :pos 0})))))))) + + +(define + hs-stream-token + (fn + (s i) + (let + ((lst (get s "list")) + (pos (get s "pos"))) + (or (nth lst (+ pos i)) + (hs-eof-sentinel))))) + +(define + hs-stream-consume + (fn + (s) + (let + ((tok (hs-stream-token s 0))) + (when + (not (= (get tok "type") "EOF")) + (dict-set! s "pos" (+ (get s "pos") 1))) + tok))) + +(define + hs-stream-has-more + (fn (s) (not (= (get (hs-stream-token s 0) "type") "EOF")))) + +(define hs-token-type (fn (tok) (get tok "type"))) +(define hs-token-value (fn (tok) (get tok "value"))) +(define hs-token-op? (fn (tok) (get tok "op"))) diff --git a/shared/static/wasm/sx/hs-tokenizer.sx b/shared/static/wasm/sx/hs-tokenizer.sx index 2483ea8c..bee0b7a7 100644 --- a/shared/static/wasm/sx/hs-tokenizer.sx +++ b/shared/static/wasm/sx/hs-tokenizer.sx @@ -28,6 +28,27 @@ (define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) +(define + hs-hex-digit? + (fn + (c) + (or + (and (>= c "0") (<= c "9")) + (and (>= c "a") (<= c "f")) + (and (>= c "A") (<= c "F"))))) + +(define + hs-hex-val + (fn + (c) + (let + ((code (char-code c))) + (cond + ((and (>= code 48) (<= code 57)) (- code 48)) + ((and (>= code 65) (<= code 70)) (- code 55)) + ((and (>= code 97) (<= code 102)) (- code 87)) + (true 0))))) + ;; ── Keyword set ─────────────────────────────────────────────────── (define @@ -235,10 +256,15 @@ read-number (fn (start) - (when - (and (< pos src-len) (hs-digit? (hs-cur))) - (hs-advance! 1) - (read-number start)) + (define + read-int + (fn + () + (when + (and (< pos src-len) (hs-digit? (hs-cur))) + (hs-advance! 1) + (read-int)))) + (read-int) (when (and (< pos src-len) @@ -246,15 +272,7 @@ (< (+ pos 1) src-len) (hs-digit? (hs-peek 1))) (hs-advance! 1) - (define - read-frac - (fn - () - (when - (and (< pos src-len) (hs-digit? (hs-cur))) - (hs-advance! 1) - (read-frac)))) - (read-frac)) + (read-int)) (do (when (and @@ -272,15 +290,7 @@ (< pos src-len) (or (= (hs-cur) "+") (= (hs-cur) "-"))) (hs-advance! 1)) - (define - read-exp-digits - (fn - () - (when - (and (< pos src-len) (hs-digit? (hs-cur))) - (hs-advance! 1) - (read-exp-digits)))) - (read-exp-digits)) + (read-int)) (let ((num-end pos)) (when @@ -308,7 +318,7 @@ () (cond (>= pos src-len) - nil + (error "Unterminated string") (= (hs-cur) "\\") (do (hs-advance! 1) @@ -318,15 +328,37 @@ ((ch (hs-cur))) (cond (= ch "n") - (append! chars "\n") + (do (append! chars "\n") (hs-advance! 1)) (= ch "t") - (append! chars "\t") + (do (append! chars "\t") (hs-advance! 1)) + (= ch "r") + (do (append! chars "\r") (hs-advance! 1)) + (= ch "b") + (do (append! chars (char-from-code 8)) (hs-advance! 1)) + (= ch "f") + (do (append! chars (char-from-code 12)) (hs-advance! 1)) + (= ch "v") + (do (append! chars (char-from-code 11)) (hs-advance! 1)) (= ch "\\") - (append! chars "\\") + (do (append! chars "\\") (hs-advance! 1)) (= ch quote-char) - (append! chars quote-char) - :else (do (append! chars "\\") (append! chars ch))) - (hs-advance! 1))) + (do (append! chars quote-char) (hs-advance! 1)) + (= ch "x") + (do + (hs-advance! 1) + (if + (and + (< (+ pos 1) src-len) + (hs-hex-digit? (hs-cur)) + (hs-hex-digit? (hs-peek 1))) + (let + ((d1 (hs-hex-val (hs-cur))) + (d2 (hs-hex-val (hs-peek 1)))) + (append! chars (char-from-code (+ (* d1 16) d2))) + (hs-advance! 2)) + (error "Invalid hexadecimal escape: \\x"))) + :else + (do (append! chars "\\") (append! chars ch) (hs-advance! 1))))) (loop)) (= (hs-cur) quote-char) (hs-advance! 1) @@ -620,7 +652,82 @@ (do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!)) (= ch "|") (do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!)) + (= ch "&") + (do (hs-emit! "op" "&" start) (hs-advance! 1) (scan!)) + (= ch "#") + (do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!)) + (= ch "?") + (do (hs-emit! "op" "?" start) (hs-advance! 1) (scan!)) + (= ch ";") + (do (hs-emit! "op" ";" start) (hs-advance! 1) (scan!)) :else (do (hs-advance! 1) (scan!))))))) (scan!) (hs-emit! "eof" nil pos) + tokens))) + +;; ── Template-mode tokenizer (E37 API) ──────────────────────────────── +;; Used by hs-tokens-of when :template flag is set. +;; Emits outer " chars as single STRING tokens; ${ ... } as $ { }; +;; inner content is tokenized with the regular hs-tokenize. + +(define + hs-tokenize-template + (fn + (src) + (let + ((tokens (list)) (pos 0) (src-len (len src))) + (define t-cur (fn () (if (< pos src-len) (nth src pos) nil))) + (define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil))) + (define t-advance! (fn (n) (set! pos (+ pos n)))) + (define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos)))) + (define + scan-to-close! + (fn + (depth) + (when + (and (< pos src-len) (> depth 0)) + (cond + (= (t-cur) "{") + (do (t-advance! 1) (scan-to-close! (+ depth 1))) + (= (t-cur) "}") + (when (> (- depth 1) 0) (t-advance! 1) (scan-to-close! (- depth 1))) + :else (do (t-advance! 1) (scan-to-close! depth)))))) + (define + scan-template! + (fn + () + (when + (< pos src-len) + (let + ((ch (t-cur))) + (cond + (= ch "\"") + (do (t-emit! "string" "\"") (t-advance! 1) (scan-template!)) + (and (= ch "$") (= (t-peek 1) "{")) + (do + (t-emit! "op" "$") + (t-advance! 1) + (t-emit! "brace-open" "{") + (t-advance! 1) + (let + ((inner-start pos)) + (scan-to-close! 1) + (let + ((inner-src (slice src inner-start pos)) + (inner-toks (hs-tokenize inner-src))) + (for-each + (fn (tok) + (when (not (= (get tok "type") "eof")) + (append! tokens tok))) + inner-toks)) + (t-emit! "brace-close" "}") + (when (< pos src-len) (t-advance! 1))) + (scan-template!)) + (= ch "$") + (do (t-emit! "op" "$") (t-advance! 1) (scan-template!)) + (hs-ws? ch) + (do (t-advance! 1) (scan-template!)) + :else (do (t-advance! 1) (scan-template!))))))) + (scan-template!) + (t-emit! "eof" nil) tokens))) \ No newline at end of file diff --git a/spec/tests/test-hyperscript-behavioral.sx b/spec/tests/test-hyperscript-behavioral.sx index 555e4a31..ea9abe24 100644 --- a/spec/tests/test-hyperscript-behavioral.sx +++ b/spec/tests/test-hyperscript-behavioral.sx @@ -2479,41 +2479,287 @@ ;; ── core/tokenizer (17 tests) ── (defsuite "hs-upstream-core/tokenizer" (deftest "handles $ in template properly" - (error "SKIP (untranslated): handles $ in template properly")) + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"" :template) 0)) "\"") + ) (deftest "handles all special escapes properly" - (error "SKIP (untranslated): handles all special escapes properly")) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\b\""))) (char-from-code 8)) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\f\""))) (char-from-code 12)) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\n\""))) "\n") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\r\""))) "\r") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\t\""))) "\t") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\v\""))) (char-from-code 11)) + ) (deftest "handles basic token types" - (error "SKIP (untranslated): handles basic token types")) + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "foo"))) "IDENTIFIER") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1"))) "NUMBER") + (let ((s (hs-tokens-of "1.1"))) + (let ((tok (hs-stream-consume s))) + (assert= (hs-token-type tok) "NUMBER") + (assert= (hs-stream-has-more s) false))) + (let ((s (hs-tokens-of "1e6"))) + (let ((tok (hs-stream-consume s))) + (assert= (hs-token-type tok) "NUMBER") + (assert= (hs-stream-has-more s) false))) + (let ((s (hs-tokens-of "1e-6"))) + (let ((tok (hs-stream-consume s))) + (assert= (hs-token-type tok) "NUMBER") + (assert= (hs-stream-has-more s) false))) + (let ((s (hs-tokens-of "1.1e6"))) + (let ((tok (hs-stream-consume s))) + (assert= (hs-token-type tok) "NUMBER") + (assert= (hs-stream-has-more s) false))) + (let ((s (hs-tokens-of "1.1e-6"))) + (let ((tok (hs-stream-consume s))) + (assert= (hs-token-type tok) "NUMBER") + (assert= (hs-stream-has-more s) false))) + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of ".a"))) "CLASS_REF") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "#a"))) "ID_REF") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "\"asdf\""))) "STRING") + ) (deftest "handles class identifiers properly" - (error "SKIP (untranslated): handles class identifiers properly")) + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of ".a"))) "CLASS_REF") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of ".a"))) ".a") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of " .a"))) "CLASS_REF") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of " .a"))) ".a") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "a.a"))) "IDENTIFIER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "a.a"))) "a") + (assert= (hs-token-type (nth (get (hs-tokens-of "(a).a") "list") 4)) "IDENTIFIER") + (assert= (hs-token-value (nth (get (hs-tokens-of "(a).a") "list") 4)) "a") + (assert= (hs-token-type (nth (get (hs-tokens-of "{a}.a") "list") 4)) "IDENTIFIER") + (assert= (hs-token-value (nth (get (hs-tokens-of "{a}.a") "list") 4)) "a") + (assert= (hs-token-type (nth (get (hs-tokens-of "[a].a") "list") 4)) "IDENTIFIER") + (assert= (hs-token-value (nth (get (hs-tokens-of "[a].a") "list") 4)) "a") + (assert= (hs-token-type (nth (get (hs-tokens-of "(a(.a") "list") 3)) "CLASS_REF") + (assert= (hs-token-value (nth (get (hs-tokens-of "(a(.a") "list") 3)) ".a") + (assert= (hs-token-type (nth (get (hs-tokens-of "{a{.a") "list") 3)) "CLASS_REF") + (assert= (hs-token-value (nth (get (hs-tokens-of "{a{.a") "list") 3)) ".a") + (assert= (hs-token-type (nth (get (hs-tokens-of "[a[.a") "list") 3)) "CLASS_REF") + (assert= (hs-token-value (nth (get (hs-tokens-of "[a[.a") "list") 3)) ".a") + ) (deftest "handles comments properly" - (error "SKIP (untranslated): handles comments properly")) + (assert= (len (get (hs-tokens-of "--") "list")) 0) + (assert= (len (get (hs-tokens-of "asdf--") "list")) 1) + (assert= (len (get (hs-tokens-of "-- asdf") "list")) 0) + (assert= (len (get (hs-tokens-of "--\nasdf") "list")) 1) + (assert= (len (get (hs-tokens-of "--\nasdf--") "list")) 1) + (assert= (len (get (hs-tokens-of "---asdf") "list")) 0) + (assert= (len (get (hs-tokens-of "----\n---asdf") "list")) 0) + (assert= (len (get (hs-tokens-of "----asdf----") "list")) 0) + (assert= (len (get (hs-tokens-of "---\nasdf---") "list")) 1) + (assert= (len (get (hs-tokens-of "// asdf") "list")) 0) + (assert= (len (get (hs-tokens-of "///asdf") "list")) 0) + (assert= (len (get (hs-tokens-of "asdf//") "list")) 1) + (assert= (len (get (hs-tokens-of "asdf\n//") "list")) 2) + ) (deftest "handles hex escapes properly" - (error "SKIP (untranslated): handles hex escapes properly")) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\x1f\""))) (char-from-code 31)) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\x41\""))) "A") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\x41\\x61\""))) "Aa") + (let ((threw false)) + (guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "\"\\x\""))) + (assert threw)) + (let ((threw false)) + (guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "\"\\xGG\""))) + (assert threw)) + (let ((threw false)) + (guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "\"\\x4\""))) + (assert threw)) + ) (deftest "handles id references properly" - (error "SKIP (untranslated): handles id references properly")) + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "#a"))) "ID_REF") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "#a"))) "#a") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of " #a"))) "ID_REF") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of " #a"))) "#a") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "a#a"))) "IDENTIFIER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "a#a"))) "a") + (assert= (hs-token-type (nth (get (hs-tokens-of "(a)#a") "list") 4)) "IDENTIFIER") + (assert= (hs-token-value (nth (get (hs-tokens-of "(a)#a") "list") 4)) "a") + (assert= (hs-token-type (nth (get (hs-tokens-of "{a}#a") "list") 4)) "IDENTIFIER") + (assert= (hs-token-value (nth (get (hs-tokens-of "{a}#a") "list") 4)) "a") + (assert= (hs-token-type (nth (get (hs-tokens-of "[a]#a") "list") 4)) "IDENTIFIER") + (assert= (hs-token-value (nth (get (hs-tokens-of "[a]#a") "list") 4)) "a") + (assert= (hs-token-type (nth (get (hs-tokens-of "(a(#a") "list") 3)) "ID_REF") + (assert= (hs-token-value (nth (get (hs-tokens-of "(a(#a") "list") 3)) "#a") + (assert= (hs-token-type (nth (get (hs-tokens-of "{a{#a") "list") 3)) "ID_REF") + (assert= (hs-token-value (nth (get (hs-tokens-of "{a{#a") "list") 3)) "#a") + (assert= (hs-token-type (nth (get (hs-tokens-of "[a[#a") "list") 3)) "ID_REF") + (assert= (hs-token-value (nth (get (hs-tokens-of "[a[#a") "list") 3)) "#a") + ) (deftest "handles identifiers properly" - (error "SKIP (untranslated): handles identifiers properly")) + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "foo"))) "IDENTIFIER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "foo"))) "foo") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of " foo "))) "IDENTIFIER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of " foo "))) "foo") + (let ((s (hs-tokens-of " foo bar"))) + (let ((tok1 (hs-stream-consume s))) + (assert= (hs-token-type tok1) "IDENTIFIER") + (assert= (hs-token-value tok1) "foo") + (let ((tok2 (hs-stream-consume s))) + (assert= (hs-token-type tok2) "IDENTIFIER") + (assert= (hs-token-value tok2) "bar")))) + (let ((s (hs-tokens-of " foo\n-- a comment\n bar"))) + (let ((tok1 (hs-stream-consume s))) + (assert= (hs-token-type tok1) "IDENTIFIER") + (assert= (hs-token-value tok1) "foo") + (let ((tok2 (hs-stream-consume s))) + (assert= (hs-token-type tok2) "IDENTIFIER") + (assert= (hs-token-value tok2) "bar")))) + ) (deftest "handles identifiers with numbers properly" - (error "SKIP (untranslated): handles identifiers with numbers properly")) + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "f1oo"))) "IDENTIFIER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "f1oo"))) "f1oo") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "fo1o"))) "IDENTIFIER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "fo1o"))) "fo1o") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "foo1"))) "IDENTIFIER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "foo1"))) "foo1") + ) (deftest "handles look ahead property" - (error "SKIP (untranslated): handles look ahead property")) + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 0)) "a") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 1)) "1") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 2)) "+") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 3)) "1") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 4)) "<<>>") + ) (deftest "handles numbers properly" - (error "SKIP (untranslated): handles numbers properly")) + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1"))) "NUMBER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1"))) "1") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1.1"))) "NUMBER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1.1"))) "1.1") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1234567890.1234567890"))) "NUMBER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1234567890.1234567890"))) "1234567890.1234567890") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1e6"))) "NUMBER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1e6"))) "1e6") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1e-6"))) "NUMBER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1e-6"))) "1e-6") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1.1e6"))) "NUMBER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1.1e6"))) "1.1e6") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1.1e-6"))) "NUMBER") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1.1e-6"))) "1.1e-6") + (assert= (hs-token-type (nth (get (hs-tokens-of "1.1.1") "list") 0)) "NUMBER") + (assert= (hs-token-type (nth (get (hs-tokens-of "1.1.1") "list") 1)) "PERIOD") + (assert= (hs-token-type (nth (get (hs-tokens-of "1.1.1") "list") 2)) "NUMBER") + (assert= (len (get (hs-tokens-of "1.1.1") "list")) 3) + ) (deftest "handles operators properly" - (error "SKIP (untranslated): handles operators properly")) + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "+"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "+"))) "+") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "-"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "-"))) "-") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "*"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "*"))) "*") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "."))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "."))) ".") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "\\"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\\"))) "\\") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ":"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of ":"))) ":") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "%"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "%"))) "%") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "|"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "|"))) "|") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "!"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "!"))) "!") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "?"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "?"))) "?") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "#"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "#"))) "#") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "&"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "&"))) "&") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ";"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of ";"))) ";") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ","))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of ","))) ",") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "("))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "("))) "(") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ")"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of ")"))) ")") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "<"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "<"))) "<") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ">"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of ">"))) ">") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "{"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "{"))) "{") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "}"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "}"))) "}") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "["))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "["))) "[") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "]"))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "]"))) "]") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "="))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "="))) "=") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "<="))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "<="))) "<=") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ">="))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of ">="))) ">=") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "=="))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "=="))) "==") + (assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "==="))) true) + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "==="))) "===") + ) (deftest "handles strings properly" - (error "SKIP (untranslated): handles strings properly")) + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "\"foo\""))) "STRING") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"foo\""))) "foo") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "\"fo'o\""))) "STRING") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"fo'o\""))) "fo'o") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "\"fo\\\"o\""))) "STRING") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"fo\\\"o\""))) "fo\"o") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "'foo'"))) "STRING") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "'foo'"))) "foo") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "'fo\"o'"))) "STRING") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "'fo\"o'"))) "fo\"o") + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "'fo\\'o'"))) "STRING") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "'fo\\'o'"))) "fo'o") + (let ((threw false)) + (guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "'"))) + (assert threw)) + (let ((threw false)) + (guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "\""))) + (assert threw)) + ) (deftest "handles strings properly 2" - (error "SKIP (untranslated): handles strings properly 2")) + (assert= (hs-token-type (hs-stream-consume (hs-tokens-of "'foo'"))) "STRING") + (assert= (hs-token-value (hs-stream-consume (hs-tokens-of "'foo'"))) "foo") + ) (deftest "handles template bootstrap properly" - (error "SKIP (untranslated): handles template bootstrap properly")) + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"" :template) 0)) "\"") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"$" :template) 0)) "\"") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"$" :template) 1)) "$") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${" :template) 0)) "\"") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${" :template) 1)) "$") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${" :template) 2)) "{") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"" :template) 0)) "\"") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"" :template) 1)) "$") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"" :template) 2)) "{") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"" :template) 3)) "asdf") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 0)) "\"") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 1)) "$") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 2)) "{") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 3)) "asdf") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 4)) "}") + (assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 5)) "\"") + ) (deftest "handles whitespace properly" - (error "SKIP (untranslated): handles whitespace properly")) + (assert= (len (get (hs-tokens-of " ") "list")) 0) + (assert= (len (get (hs-tokens-of " asdf") "list")) 1) + (assert= (len (get (hs-tokens-of " asdf ") "list")) 2) + (assert= (len (get (hs-tokens-of "asdf ") "list")) 2) + (assert= (len (get (hs-tokens-of "\n") "list")) 0) + (assert= (len (get (hs-tokens-of "\nasdf") "list")) 1) + (assert= (len (get (hs-tokens-of "\nasdf\n") "list")) 2) + (assert= (len (get (hs-tokens-of "asdf\n") "list")) 2) + (assert= (len (get (hs-tokens-of "\r") "list")) 0) + (assert= (len (get (hs-tokens-of "\rasdf") "list")) 1) + (assert= (len (get (hs-tokens-of "\rasdf\r") "list")) 2) + (assert= (len (get (hs-tokens-of "asdf\r") "list")) 2) + (assert= (len (get (hs-tokens-of "\t") "list")) 0) + (assert= (len (get (hs-tokens-of "\tasdf") "list")) 1) + (assert= (len (get (hs-tokens-of "\tasdf\t") "list")) 2) + (assert= (len (get (hs-tokens-of "asdf\t") "list")) 2) + ) (deftest "string interpolation isnt surprising" (hs-cleanup!) (let ((_el-div (dom-create-element "div"))) - (dom-set-attr _el-div "_" "on click set x to 42 then put `test${x} test ${x} test$x test $x test $x test ${x} test$x test_$x test_${x} test-$x test.$x` into my.innerHTML") + (dom-set-attr _el-div "_" "on click set x to 42 then put `test\\${x} test ${x} test\\$x test $x test \\$x test \\${x} test$x test_$x test_${x} test-$x test.$x` into my.innerHTML") (dom-append (dom-body) _el-div) (hs-activate! _el-div) (dom-dispatch _el-div "click" nil) diff --git a/tests/playwright/generate-sx-tests.py b/tests/playwright/generate-sx-tests.py index 73c4aa5c..c5edd4ff 100644 --- a/tests/playwright/generate-sx-tests.py +++ b/tests/playwright/generate-sx-tests.py @@ -1254,7 +1254,9 @@ def process_hs_val(hs_val): hs_val = hs_val.replace('\\n', '\n').replace('\\t', ' ') # Preserve escaped quotes (\" → placeholder), strip remaining backslashes, restore hs_val = hs_val.replace('\\"', '\x00QUOT\x00') + hs_val = hs_val.replace('\\$', '\x00DOLLAR\x00') # preserve \$ template escape hs_val = hs_val.replace('\\', '') + hs_val = hs_val.replace('\x00DOLLAR\x00', '\\$') # restore \$ hs_val = hs_val.replace('\x00QUOT\x00', '\\"') # Strip line comments BEFORE newline collapse — once newlines become `then`, # an unterminated `//` / ` --` comment would consume the rest of the input. @@ -1838,6 +1840,272 @@ def extract_hs_expr(raw): return expr +def generate_tokenizer_test(test, safe_name): + """Hardcoded SX translation for _hyperscript.internals.tokenizer tests (E37).""" + name = test['name'] + + def to_(src, tmpl=False): + """Return (hs-tokens-of [:template]) for HS source string src.""" + escaped = (src + .replace('\\', '\\\\') + .replace('"', '\\"') + .replace('\n', '\\n') + .replace('\r', '\\r') + .replace('\t', '\\t')) + q = '"' + escaped + '"' + suffix = ' :template' if tmpl else '' + return f'(hs-tokens-of {q}{suffix})' + + def consume(s): + return f'(hs-stream-consume {s})' + + def tok_i(s, i): + return f'(hs-stream-token {s} {i})' + + def has_more(s): + return f'(hs-stream-has-more {s})' + + def t_type(t): + return f'(hs-token-type {t})' + + def t_val(t): + return f'(hs-token-value {t})' + + def t_op(t): + return f'(hs-token-op? {t})' + + def nth_list(s, i): + return f'(nth (get {s} "list") {i})' + + def list_len(s): + return f'(len (get {s} "list"))' + + def ae(actual, expected): + return f' (assert= {actual} {expected})' + + def throws(expr): + return ( + f' (let ((threw false))\n' + f' (guard (e (true (set! threw true))) {expr})\n' + f' (assert threw))' + ) + + lines = [f' (deftest "{safe_name}"'] + + if name == 'handles $ in template properly': + s = to_('"', tmpl=True) + lines.append(ae(t_val(tok_i(s, 0)), sx_str('"'))) + + elif name == 'handles all special escapes properly': + for src, exp in [ + ('"\\b"', '(char-from-code 8)'), + ('"\\f"', '(char-from-code 12)'), + ('"\\n"', '"\\n"'), + ('"\\r"', '"\\r"'), + ('"\\t"', '"\\t"'), + ('"\\v"', '(char-from-code 11)'), + ]: + lines.append(ae(t_val(consume(to_(src))), exp)) + + elif name == 'handles basic token types': + lines.append(ae(t_type(consume(to_('foo'))), '"IDENTIFIER"')) + lines.append(ae(t_type(consume(to_('1'))), '"NUMBER"')) + for src in ['1.1', '1e6', '1e-6', '1.1e6', '1.1e-6']: + sq = to_(src) + lines.append(f' (let ((s {sq}))') + lines.append(f' (let ((tok (hs-stream-consume s)))') + lines.append(f' (assert= (hs-token-type tok) "NUMBER")') + lines.append(f' (assert= (hs-stream-has-more s) false)))') + lines.append(ae(t_type(consume(to_('.a'))), '"CLASS_REF"')) + lines.append(ae(t_type(consume(to_('#a'))), '"ID_REF"')) + lines.append(ae(t_type(consume(to_('"asdf"'))), '"STRING"')) + + elif name == 'handles class identifiers properly': + for src, idx, exp_type, exp_val in [ + ('.a', None, 'CLASS_REF', '.a'), + (' .a', None, 'CLASS_REF', '.a'), + ('a.a', None, 'IDENTIFIER', 'a'), + ('(a).a', 4, 'IDENTIFIER', 'a'), + ('{a}.a', 4, 'IDENTIFIER', 'a'), + ('[a].a', 4, 'IDENTIFIER', 'a'), + ('(a(.a', 3, 'CLASS_REF', '.a'), + ('{a{.a', 3, 'CLASS_REF', '.a'), + ('[a[.a', 3, 'CLASS_REF', '.a'), + ]: + if idx is None: + tok_expr = consume(to_(src)) + else: + tok_expr = nth_list(to_(src), idx) + lines.append(ae(t_type(tok_expr), f'"{exp_type}"')) + lines.append(ae(t_val(tok_expr), sx_str(exp_val))) + + elif name == 'handles comments properly': + for src, expected in [ + ('--', 0), + ('asdf--', 1), + ('-- asdf', 0), + ('--\nasdf', 1), + ('--\nasdf--', 1), + ('---asdf', 0), + ('----\n---asdf', 0), + ('----asdf----', 0), + ('---\nasdf---', 1), + ('// asdf', 0), + ('///asdf', 0), + ('asdf//', 1), + ('asdf\n//', 2), + ]: + lines.append(ae(list_len(to_(src)), str(expected))) + + elif name == 'handles hex escapes properly': + lines.append(ae(t_val(consume(to_('"\\x1f"'))), '(char-from-code 31)')) + lines.append(ae(t_val(consume(to_('"\\x41"'))), '"A"')) + lines.append(ae(t_val(consume(to_('"\\x41\\x61"'))), '"Aa"')) + for bad in ['"\\x"', '"\\xGG"', '"\\x4"']: + lines.append(throws(consume(to_(bad)))) + + elif name == 'handles id references properly': + for src, idx, exp_type, exp_val in [ + ('#a', None, 'ID_REF', '#a'), + (' #a', None, 'ID_REF', '#a'), + ('a#a', None, 'IDENTIFIER', 'a'), + ('(a)#a', 4, 'IDENTIFIER', 'a'), + ('{a}#a', 4, 'IDENTIFIER', 'a'), + ('[a]#a', 4, 'IDENTIFIER', 'a'), + ('(a(#a', 3, 'ID_REF', '#a'), + ('{a{#a', 3, 'ID_REF', '#a'), + ('[a[#a', 3, 'ID_REF', '#a'), + ]: + if idx is None: + tok_expr = consume(to_(src)) + else: + tok_expr = nth_list(to_(src), idx) + lines.append(ae(t_type(tok_expr), f'"{exp_type}"')) + lines.append(ae(t_val(tok_expr), sx_str(exp_val))) + + elif name == 'handles identifiers properly': + lines.append(ae(t_type(consume(to_('foo'))), '"IDENTIFIER"')) + lines.append(ae(t_val(consume(to_('foo'))), '"foo"')) + lines.append(ae(t_type(consume(to_(' foo '))), '"IDENTIFIER"')) + lines.append(ae(t_val(consume(to_(' foo '))), '"foo"')) + for src, v1, v2 in [ + (' foo bar', 'foo', 'bar'), + (' foo\n-- a comment\n bar', 'foo', 'bar'), + ]: + sq = to_(src) + lines.append(f' (let ((s {sq}))') + lines.append(f' (let ((tok1 (hs-stream-consume s)))') + lines.append(f' (assert= (hs-token-type tok1) "IDENTIFIER")') + lines.append(f' (assert= (hs-token-value tok1) {sx_str(v1)})') + lines.append(f' (let ((tok2 (hs-stream-consume s)))') + lines.append(f' (assert= (hs-token-type tok2) "IDENTIFIER")') + lines.append(f' (assert= (hs-token-value tok2) {sx_str(v2)}))))') + + elif name == 'handles identifiers with numbers properly': + for src in ['f1oo', 'fo1o', 'foo1']: + lines.append(ae(t_type(consume(to_(src))), '"IDENTIFIER"')) + lines.append(ae(t_val(consume(to_(src))), sx_str(src))) + + elif name == 'handles look ahead property': + s = to_('a 1 + 1') + for i, v in [(0, 'a'), (1, '1'), (2, '+'), (3, '1'), (4, '<<>>')]: + lines.append(ae(t_val(tok_i(s, i)), sx_str(v))) + + elif name == 'handles numbers properly': + for src, v in [ + ('1', '1'), + ('1.1', '1.1'), + ('1234567890.1234567890', '1234567890.1234567890'), + ('1e6', '1e6'), + ('1e-6', '1e-6'), + ('1.1e6', '1.1e6'), + ('1.1e-6', '1.1e-6'), + ]: + lines.append(ae(t_type(consume(to_(src))), '"NUMBER"')) + lines.append(ae(t_val(consume(to_(src))), sx_str(v))) + s = to_('1.1.1') + toks = f'(get {s} "list")' + lines.append(ae(f'(hs-token-type (nth {toks} 0))', '"NUMBER"')) + lines.append(ae(f'(hs-token-type (nth {toks} 1))', '"PERIOD"')) + lines.append(ae(f'(hs-token-type (nth {toks} 2))', '"NUMBER"')) + lines.append(ae(f'(len {toks})', '3')) + + elif name == 'handles operators properly': + optable = [ + ('+', 'PLUS'), ('-', 'MINUS'), ('*', 'MULTIPLY'), + ('.', 'PERIOD'), ('\\', 'BACKSLASH'), (':', 'COLON'), + ('%', 'PERCENT'), ('|', 'PIPE'), ('!', 'EXCLAMATION'), + ('?', 'QUESTION'), ('#', 'POUND'), ('&', 'AMPERSAND'), + (';', 'SEMI'), (',', 'COMMA'), ('(', 'L_PAREN'), + (')', 'R_PAREN'), ('<', 'L_ANG'), ('>', 'R_ANG'), + ('{', 'L_BRACE'), ('}', 'R_BRACE'), ('[', 'L_BRACKET'), + (']', 'R_BRACKET'), ('=', 'EQUALS'), + ('<=', 'LTE_ANG'), ('>=', 'GTE_ANG'), + ('==', 'EQ'), ('===', 'EQQ'), + ] + for op_char, _op_name in optable: + tok_expr = consume(to_(op_char)) + lines.append(ae(t_op(tok_expr), 'true')) + lines.append(ae(t_val(tok_expr), sx_str(op_char))) + + elif name == 'handles strings properly': + for src, v in [ + ('"foo"', 'foo'), + ('"fo\'o"', "fo'o"), + ('"fo\\"o"', 'fo"o'), + ("'foo'", 'foo'), + ("'fo\"o'", 'fo"o'), + ("'fo\\'o'", "fo'o"), + ]: + lines.append(ae(t_type(consume(to_(src))), '"STRING"')) + lines.append(ae(t_val(consume(to_(src))), sx_str(v))) + lines.append(throws(consume(to_("'")))) + lines.append(throws(consume(to_('"')))) + + elif name == 'handles strings properly 2': + tok_expr = consume(to_("'foo'")) + lines.append(ae(t_type(tok_expr), '"STRING"')) + lines.append(ae(t_val(tok_expr), '"foo"')) + + elif name == 'handles template bootstrap properly': + s1 = to_('"', tmpl=True) + lines.append(ae(t_val(tok_i(s1, 0)), sx_str('"'))) + s2 = to_('"$', tmpl=True) + lines.append(ae(t_val(tok_i(s2, 0)), sx_str('"'))) + lines.append(ae(t_val(tok_i(s2, 1)), '"$"')) + s3 = to_('"${', tmpl=True) + lines.append(ae(t_val(tok_i(s3, 0)), sx_str('"'))) + lines.append(ae(t_val(tok_i(s3, 1)), '"$"')) + lines.append(ae(t_val(tok_i(s3, 2)), '"{"')) + s4 = to_('"${"asdf"', tmpl=True) + lines.append(ae(t_val(tok_i(s4, 0)), sx_str('"'))) + lines.append(ae(t_val(tok_i(s4, 1)), '"$"')) + lines.append(ae(t_val(tok_i(s4, 2)), '"{"')) + lines.append(ae(t_val(tok_i(s4, 3)), '"asdf"')) + s5 = to_('"${"asdf"}"', tmpl=True) + lines.append(ae(t_val(tok_i(s5, 0)), sx_str('"'))) + lines.append(ae(t_val(tok_i(s5, 1)), '"$"')) + lines.append(ae(t_val(tok_i(s5, 2)), '"{"')) + lines.append(ae(t_val(tok_i(s5, 3)), '"asdf"')) + lines.append(ae(t_val(tok_i(s5, 4)), '"}"')) + lines.append(ae(t_val(tok_i(s5, 5)), sx_str('"'))) + + elif name == 'handles whitespace properly': + for src, expected in [ + (' ', 0), (' asdf', 1), (' asdf ', 2), ('asdf ', 2), + ('\n', 0), ('\nasdf', 1), ('\nasdf\n', 2), ('asdf\n', 2), + ('\r', 0), ('\rasdf', 1), ('\rasdf\r', 2), ('asdf\r', 2), + ('\t', 0), ('\tasdf', 1), ('\tasdf\t', 2), ('asdf\t', 2), + ]: + lines.append(ae(list_len(to_(src)), str(expected))) + + else: + return None # not a tokenizer test we handle + + lines.append(' )') + return '\n'.join(lines) + + def generate_eval_only_test(test, idx): """Generate SX deftest for no-HTML tests using eval-hs. Handles patterns: @@ -2015,6 +2283,9 @@ def generate_eval_only_test(test, idx): f' )' ) + if '_hyperscript.internals.tokenizer' in body: + return generate_tokenizer_test(test, safe_name) + lines.append(f' (deftest "{safe_name}"') assertions = []