Merge branch 'hs-e37-tokenizer' into loops/hs

This commit is contained in:
2026-04-26 17:54:11 +00:00
7 changed files with 3593 additions and 3010 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
;; _hyperscript tokenizer — produces token stream from hyperscript source
;;
;; Tokens: {:type T :value V :pos P :end E :line L}
;; Tokens: {:type T :value V :pos P}
;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style"
;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open"
;; "bracket-close" "brace-open" "brace-close" "comma" "colon"
@@ -8,7 +8,7 @@
;; ── Token constructor ─────────────────────────────────────────────
(define hs-make-token (fn (type value pos end line) {:pos pos :end end :line line :value value :type type}))
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
@@ -28,6 +28,27 @@
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
(define
hs-hex-digit?
(fn
(c)
(or
(and (>= c "0") (<= c "9"))
(and (>= c "a") (<= c "f"))
(and (>= c "A") (<= c "F")))))
(define
hs-hex-val
(fn
(c)
(let
((code (char-code c)))
(cond
((and (>= code 48) (<= code 57)) (- code 48))
((and (>= code 65) (<= code 70)) (- code 55))
((and (>= code 97) (<= code 102)) (- code 87))
(true 0)))))
;; ── Keyword set ───────────────────────────────────────────────────
(define
@@ -198,22 +219,14 @@
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
((tokens (list)) (pos 0) (src-len (len src)))
(define
hs-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define hs-cur (fn () (hs-peek 0)))
(define
hs-advance!
(fn
(n)
(when
(> n 0)
(when (= (hs-cur) "\n") (set! current-line (+ current-line 1)))
(set! pos (+ pos 1))
(hs-advance! (- n 1)))))
(define hs-advance! (fn (n) (set! pos (+ pos n))))
(define
skip-ws!
(fn
@@ -243,10 +256,15 @@
read-number
(fn
(start)
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-number start))
(define
read-int
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-int))))
(read-int)
(when
(and
(< pos src-len)
@@ -254,15 +272,7 @@
(< (+ pos 1) src-len)
(hs-digit? (hs-peek 1)))
(hs-advance! 1)
(define
read-frac
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-frac))))
(read-frac))
(read-int))
(do
(when
(and
@@ -280,15 +290,7 @@
(< pos src-len)
(or (= (hs-cur) "+") (= (hs-cur) "-")))
(hs-advance! 1))
(define
read-exp-digits
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-exp-digits))))
(read-exp-digits))
(read-int))
(let
((num-end pos))
(when
@@ -316,7 +318,7 @@
()
(cond
(>= pos src-len)
nil
(error "Unterminated string")
(= (hs-cur) "\\")
(do
(hs-advance! 1)
@@ -326,15 +328,37 @@
((ch (hs-cur)))
(cond
(= ch "n")
(append! chars "\n")
(do (append! chars "\n") (hs-advance! 1))
(= ch "t")
(append! chars "\t")
(do (append! chars "\t") (hs-advance! 1))
(= ch "r")
(do (append! chars "\r") (hs-advance! 1))
(= ch "b")
(do (append! chars (char-from-code 8)) (hs-advance! 1))
(= ch "f")
(do (append! chars (char-from-code 12)) (hs-advance! 1))
(= ch "v")
(do (append! chars (char-from-code 11)) (hs-advance! 1))
(= ch "\\")
(append! chars "\\")
(do (append! chars "\\") (hs-advance! 1))
(= ch quote-char)
(append! chars quote-char)
:else (do (append! chars "\\") (append! chars ch)))
(hs-advance! 1)))
(do (append! chars quote-char) (hs-advance! 1))
(= ch "x")
(do
(hs-advance! 1)
(if
(and
(< (+ pos 1) src-len)
(hs-hex-digit? (hs-cur))
(hs-hex-digit? (hs-peek 1)))
(let
((d1 (hs-hex-val (hs-cur)))
(d2 (hs-hex-val (hs-peek 1))))
(append! chars (char-from-code (+ (* d1 16) d2)))
(hs-advance! 2))
(error "Invalid hexadecimal escape: \\x")))
:else
(do (append! chars "\\") (append! chars ch) (hs-advance! 1)))))
(loop))
(= (hs-cur) quote-char)
(hs-advance! 1)
@@ -435,8 +459,8 @@
(define
hs-emit!
(fn
(type value start start-line)
(append! tokens (hs-make-token type value start pos start-line))))
(type value start)
(append! tokens (hs-make-token type value start))))
(define
scan!
(fn
@@ -445,7 +469,7 @@
(when
(< pos src-len)
(let
((ch (hs-cur)) (start pos) (start-line current-line))
((ch (hs-cur)) (start pos))
(cond
(and (= ch "-") (< (+ pos 1) src-len) (= (hs-peek 1) "-"))
(do (hs-advance! 2) (skip-comment!) (scan!))
@@ -462,9 +486,9 @@
(= (hs-peek 1) "[")
(= (hs-peek 1) "*")
(= (hs-peek 1) ":")))
(do (hs-emit! "selector" (read-selector) start start-line) (scan!))
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
(do (hs-advance! 2) (hs-emit! "op" ".." start start-line) (scan!))
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
@@ -474,7 +498,7 @@
(= (hs-peek 1) "_")))
(do
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start start-line)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
@@ -482,7 +506,7 @@
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "id" (read-ident pos) start start-line)
(hs-emit! "id" (read-ident pos) start)
(scan!))
(and
(= ch "@")
@@ -490,7 +514,7 @@
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "attr" (read-ident pos) start start-line)
(hs-emit! "attr" (read-ident pos) start)
(scan!))
(and
(= ch "^")
@@ -498,7 +522,7 @@
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "hat" (read-ident pos) start start-line)
(hs-emit! "hat" (read-ident pos) start)
(scan!))
(and
(= ch "~")
@@ -506,7 +530,7 @@
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "component" (str "~" (read-ident pos)) start start-line)
(hs-emit! "component" (str "~" (read-ident pos)) start)
(scan!))
(and
(= ch "*")
@@ -514,7 +538,7 @@
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "style" (read-ident pos) start start-line)
(hs-emit! "style" (read-ident pos) start)
(scan!))
(and
(= ch ":")
@@ -522,7 +546,7 @@
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "local" (read-ident pos) start start-line)
(hs-emit! "local" (read-ident pos) start)
(scan!))
(or
(= ch "\"")
@@ -535,11 +559,11 @@
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2))))))))
(do (hs-emit! "string" (read-string ch) start start-line) (scan!))
(do (hs-emit! "string" (read-string ch) start) (scan!))
(= ch "`")
(do (hs-emit! "template" (read-template) start start-line) (scan!))
(do (hs-emit! "template" (read-template) start) (scan!))
(hs-digit? ch)
(do (hs-emit! "number" (read-number start) start start-line) (scan!))
(do (hs-emit! "number" (read-number start) start) (scan!))
(hs-ident-start? ch)
(do
(let
@@ -547,8 +571,7 @@
(hs-emit!
(if (hs-keyword? word) "keyword" "ident")
word
start
start-line))
start))
(scan!))
(and
(or (= ch "=") (= ch "!") (= ch "<") (= ch ">"))
@@ -560,8 +583,8 @@
(or (= ch "=") (= ch "!"))
(< (+ pos 2) src-len)
(= (hs-peek 2) "="))
(do (hs-advance! 3) (hs-emit! "op" (str ch "==") start start-line))
(do (hs-advance! 2) (hs-emit! "op" (str ch "=") start start-line)))
(do (hs-emit! "op" (str ch "==") start) (hs-advance! 3))
(do (hs-emit! "op" (str ch "=") start) (hs-advance! 2)))
(scan!))
(and
(= ch "'")
@@ -570,66 +593,141 @@
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2)))))
(do (hs-advance! 2) (hs-emit! "op" "'s" start start-line) (scan!))
(do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!))
(= ch "(")
(do
(hs-emit! "paren-open" "(" start)
(hs-advance! 1)
(hs-emit! "paren-open" "(" start start-line)
(scan!))
(= ch ")")
(do
(hs-emit! "paren-close" ")" start)
(hs-advance! 1)
(hs-emit! "paren-close" ")" start start-line)
(scan!))
(= ch "[")
(do
(hs-emit! "bracket-open" "[" start)
(hs-advance! 1)
(hs-emit! "bracket-open" "[" start start-line)
(scan!))
(= ch "]")
(do
(hs-emit! "bracket-close" "]" start)
(hs-advance! 1)
(hs-emit! "bracket-close" "]" start start-line)
(scan!))
(= ch "{")
(do
(hs-emit! "brace-open" "{" start)
(hs-advance! 1)
(hs-emit! "brace-open" "{" start start-line)
(scan!))
(= ch "}")
(do
(hs-emit! "brace-close" "}" start)
(hs-advance! 1)
(hs-emit! "brace-close" "}" start start-line)
(scan!))
(= ch ",")
(do (hs-advance! 1) (hs-emit! "comma" "," start start-line) (scan!))
(do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!))
(= ch "+")
(do (hs-advance! 1) (hs-emit! "op" "+" start start-line) (scan!))
(do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!))
(= ch "-")
(do (hs-advance! 1) (hs-emit! "op" "-" start start-line) (scan!))
(do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!))
(= ch "/")
(do (hs-advance! 1) (hs-emit! "op" "/" start start-line) (scan!))
(do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!))
(= ch "=")
(do (hs-advance! 1) (hs-emit! "op" "=" start start-line) (scan!))
(do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!))
(= ch "<")
(do (hs-advance! 1) (hs-emit! "op" "<" start start-line) (scan!))
(do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!))
(= ch ">")
(do (hs-advance! 1) (hs-emit! "op" ">" start start-line) (scan!))
(do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!))
(= ch "!")
(do (hs-advance! 1) (hs-emit! "op" "!" start start-line) (scan!))
(do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!))
(= ch "*")
(do (hs-advance! 1) (hs-emit! "op" "*" start start-line) (scan!))
(do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!))
(= ch "%")
(do (hs-advance! 1) (hs-emit! "op" "%" start start-line) (scan!))
(do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!))
(= ch ".")
(do (hs-advance! 1) (hs-emit! "dot" "." start start-line) (scan!))
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
(= ch "\\")
(do (hs-advance! 1) (hs-emit! "op" "\\" start start-line) (scan!))
(do (hs-emit! "op" "\\" start) (hs-advance! 1) (scan!))
(= ch ":")
(do (hs-advance! 1) (hs-emit! "colon" ":" start start-line) (scan!))
(do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!))
(= ch "|")
(do (hs-advance! 1) (hs-emit! "op" "|" start start-line) (scan!))
(do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!))
(= ch "&")
(do (hs-emit! "op" "&" start) (hs-advance! 1) (scan!))
(= ch "#")
(do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!))
(= ch "?")
(do (hs-emit! "op" "?" start) (hs-advance! 1) (scan!))
(= ch ";")
(do (hs-emit! "op" ";" start) (hs-advance! 1) (scan!))
:else (do (hs-advance! 1) (scan!)))))))
(scan!)
(hs-emit! "eof" nil pos current-line)
(hs-emit! "eof" nil pos)
tokens)))
;; ── Template-mode tokenizer (E37 API) ────────────────────────────────
;; Used by hs-tokens-of when :template flag is set.
;; Emits outer " chars as single STRING tokens; ${ ... } as $ { <inner-tokens> };
;; inner content is tokenized with the regular hs-tokenize.
(define
hs-tokenize-template
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
(define t-advance! (fn (n) (set! pos (+ pos n))))
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
(define
scan-to-close!
(fn
(depth)
(when
(and (< pos src-len) (> depth 0))
(cond
(= (t-cur) "{")
(do (t-advance! 1) (scan-to-close! (+ depth 1)))
(= (t-cur) "}")
(when (> (- depth 1) 0) (t-advance! 1) (scan-to-close! (- depth 1)))
:else (do (t-advance! 1) (scan-to-close! depth))))))
(define
scan-template!
(fn
()
(when
(< pos src-len)
(let
((ch (t-cur)))
(cond
(= ch "\"")
(do (t-emit! "string" "\"") (t-advance! 1) (scan-template!))
(and (= ch "$") (= (t-peek 1) "{"))
(do
(t-emit! "op" "$")
(t-advance! 1)
(t-emit! "brace-open" "{")
(t-advance! 1)
(let
((inner-start pos))
(scan-to-close! 1)
(let
((inner-src (slice src inner-start pos))
(inner-toks (hs-tokenize inner-src)))
(for-each
(fn (tok)
(when (not (= (get tok "type") "eof"))
(append! tokens tok)))
inner-toks))
(t-emit! "brace-close" "}")
(when (< pos src-len) (t-advance! 1)))
(scan-template!))
(= ch "$")
(do (t-emit! "op" "$") (t-advance! 1) (scan-template!))
(hs-ws? ch)
(do (t-advance! 1) (scan-template!))
:else (do (t-advance! 1) (scan-template!)))))))
(scan-template!)
(t-emit! "eof" nil)
tokens)))

View File

@@ -133,7 +133,7 @@ All five have design docs on their own worktree branches pending review + merge.
36. **[design-done, pending review — `plans/designs/e36-websocket.md` on `worktree-agent-a9daf73703f520257`] WebSocket + `socket`** — 16 tests. Upstream shape is `socket NAME URL [with timeout N] [on message [as JSON] …] end` with an **implicit `.rpc` Proxy** (ES6 Proxy lives in JS, not SX), not `with proxy { send, receive }` as this row previously claimed. Design doc has 8-commit checklist, +1216 delta estimate. Ship only with intentional design review.
37. **[design-done, pending review — `plans/designs/e37-tokenizer-api.md` on `worktree-agent-a6bb61d59cc0be8b4`] Tokenizer-as-API**17 tests. Expose tokens as inspectable SX data via `hs-tokens-of` / `hs-stream-token` / `hs-token-type` etc; type-map current `hs-tokenize` output to upstream SCREAMING_SNAKE_CASE. 8-step checklist, +1617 delta.
37. **[done +17]** Tokenizer-as-API — `hs-tokens-of` / `hs-stream-token` / `hs-token-type` / `hs-token-value` / `hs-token-op?`; type-map + normalize; `read-number` dot-stop fix; `\$` template escape in compiler + runtime; generator pattern in `generate-sx-tests.py`. 17/17.
38. **[design-done, pending review — `plans/designs/e38-sourceinfo.md` on `agent-e38-sourceinfo`] SourceInfo API** — 4 tests. Inline span-wrapper strategy (not side-channel dict) with compiler-entry unwrap. 4-commit plan.

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
;; _hyperscript tokenizer — produces token stream from hyperscript source
;;
;; Tokens: {:type T :value V :pos P :end E :line L}
;; Tokens: {:type T :value V :pos P}
;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style"
;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open"
;; "bracket-close" "brace-open" "brace-close" "comma" "colon"
@@ -8,7 +8,7 @@
;; ── Token constructor ─────────────────────────────────────────────
(define hs-make-token (fn (type value pos end line) {:pos pos :end end :line line :value value :type type}))
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
@@ -28,6 +28,27 @@
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
(define
hs-hex-digit?
(fn
(c)
(or
(and (>= c "0") (<= c "9"))
(and (>= c "a") (<= c "f"))
(and (>= c "A") (<= c "F")))))
(define
hs-hex-val
(fn
(c)
(let
((code (char-code c)))
(cond
((and (>= code 48) (<= code 57)) (- code 48))
((and (>= code 65) (<= code 70)) (- code 55))
((and (>= code 97) (<= code 102)) (- code 87))
(true 0)))))
;; ── Keyword set ───────────────────────────────────────────────────
(define
@@ -198,22 +219,14 @@
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)) (current-line 1))
((tokens (list)) (pos 0) (src-len (len src)))
(define
hs-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define hs-cur (fn () (hs-peek 0)))
(define
hs-advance!
(fn
(n)
(when
(> n 0)
(when (= (hs-cur) "\n") (set! current-line (+ current-line 1)))
(set! pos (+ pos 1))
(hs-advance! (- n 1)))))
(define hs-advance! (fn (n) (set! pos (+ pos n))))
(define
skip-ws!
(fn
@@ -243,10 +256,15 @@
read-number
(fn
(start)
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-number start))
(define
read-int
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-int))))
(read-int)
(when
(and
(< pos src-len)
@@ -254,15 +272,7 @@
(< (+ pos 1) src-len)
(hs-digit? (hs-peek 1)))
(hs-advance! 1)
(define
read-frac
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-frac))))
(read-frac))
(read-int))
(do
(when
(and
@@ -280,15 +290,7 @@
(< pos src-len)
(or (= (hs-cur) "+") (= (hs-cur) "-")))
(hs-advance! 1))
(define
read-exp-digits
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-exp-digits))))
(read-exp-digits))
(read-int))
(let
((num-end pos))
(when
@@ -316,7 +318,7 @@
()
(cond
(>= pos src-len)
nil
(error "Unterminated string")
(= (hs-cur) "\\")
(do
(hs-advance! 1)
@@ -326,15 +328,37 @@
((ch (hs-cur)))
(cond
(= ch "n")
(append! chars "\n")
(do (append! chars "\n") (hs-advance! 1))
(= ch "t")
(append! chars "\t")
(do (append! chars "\t") (hs-advance! 1))
(= ch "r")
(do (append! chars "\r") (hs-advance! 1))
(= ch "b")
(do (append! chars (char-from-code 8)) (hs-advance! 1))
(= ch "f")
(do (append! chars (char-from-code 12)) (hs-advance! 1))
(= ch "v")
(do (append! chars (char-from-code 11)) (hs-advance! 1))
(= ch "\\")
(append! chars "\\")
(do (append! chars "\\") (hs-advance! 1))
(= ch quote-char)
(append! chars quote-char)
:else (do (append! chars "\\") (append! chars ch)))
(hs-advance! 1)))
(do (append! chars quote-char) (hs-advance! 1))
(= ch "x")
(do
(hs-advance! 1)
(if
(and
(< (+ pos 1) src-len)
(hs-hex-digit? (hs-cur))
(hs-hex-digit? (hs-peek 1)))
(let
((d1 (hs-hex-val (hs-cur)))
(d2 (hs-hex-val (hs-peek 1))))
(append! chars (char-from-code (+ (* d1 16) d2)))
(hs-advance! 2))
(error "Invalid hexadecimal escape: \\x")))
:else
(do (append! chars "\\") (append! chars ch) (hs-advance! 1)))))
(loop))
(= (hs-cur) quote-char)
(hs-advance! 1)
@@ -435,8 +459,8 @@
(define
hs-emit!
(fn
(type value start start-line)
(append! tokens (hs-make-token type value start pos start-line))))
(type value start)
(append! tokens (hs-make-token type value start))))
(define
scan!
(fn
@@ -445,7 +469,7 @@
(when
(< pos src-len)
(let
((ch (hs-cur)) (start pos) (start-line current-line))
((ch (hs-cur)) (start pos))
(cond
(and (= ch "-") (< (+ pos 1) src-len) (= (hs-peek 1) "-"))
(do (hs-advance! 2) (skip-comment!) (scan!))
@@ -462,9 +486,9 @@
(= (hs-peek 1) "[")
(= (hs-peek 1) "*")
(= (hs-peek 1) ":")))
(do (hs-emit! "selector" (read-selector) start start-line) (scan!))
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
(do (hs-advance! 2) (hs-emit! "op" ".." start start-line) (scan!))
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
@@ -474,7 +498,7 @@
(= (hs-peek 1) "_")))
(do
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start start-line)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
@@ -482,7 +506,7 @@
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "id" (read-ident pos) start start-line)
(hs-emit! "id" (read-ident pos) start)
(scan!))
(and
(= ch "@")
@@ -490,7 +514,7 @@
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "attr" (read-ident pos) start start-line)
(hs-emit! "attr" (read-ident pos) start)
(scan!))
(and
(= ch "^")
@@ -498,7 +522,7 @@
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "hat" (read-ident pos) start start-line)
(hs-emit! "hat" (read-ident pos) start)
(scan!))
(and
(= ch "~")
@@ -506,7 +530,7 @@
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "component" (str "~" (read-ident pos)) start start-line)
(hs-emit! "component" (str "~" (read-ident pos)) start)
(scan!))
(and
(= ch "*")
@@ -514,7 +538,7 @@
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "style" (read-ident pos) start start-line)
(hs-emit! "style" (read-ident pos) start)
(scan!))
(and
(= ch ":")
@@ -522,7 +546,7 @@
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "local" (read-ident pos) start start-line)
(hs-emit! "local" (read-ident pos) start)
(scan!))
(or
(= ch "\"")
@@ -535,11 +559,11 @@
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2))))))))
(do (hs-emit! "string" (read-string ch) start start-line) (scan!))
(do (hs-emit! "string" (read-string ch) start) (scan!))
(= ch "`")
(do (hs-emit! "template" (read-template) start start-line) (scan!))
(do (hs-emit! "template" (read-template) start) (scan!))
(hs-digit? ch)
(do (hs-emit! "number" (read-number start) start start-line) (scan!))
(do (hs-emit! "number" (read-number start) start) (scan!))
(hs-ident-start? ch)
(do
(let
@@ -547,8 +571,7 @@
(hs-emit!
(if (hs-keyword? word) "keyword" "ident")
word
start
start-line))
start))
(scan!))
(and
(or (= ch "=") (= ch "!") (= ch "<") (= ch ">"))
@@ -560,8 +583,8 @@
(or (= ch "=") (= ch "!"))
(< (+ pos 2) src-len)
(= (hs-peek 2) "="))
(do (hs-advance! 3) (hs-emit! "op" (str ch "==") start start-line))
(do (hs-advance! 2) (hs-emit! "op" (str ch "=") start start-line)))
(do (hs-emit! "op" (str ch "==") start) (hs-advance! 3))
(do (hs-emit! "op" (str ch "=") start) (hs-advance! 2)))
(scan!))
(and
(= ch "'")
@@ -570,66 +593,141 @@
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2)))))
(do (hs-advance! 2) (hs-emit! "op" "'s" start start-line) (scan!))
(do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!))
(= ch "(")
(do
(hs-emit! "paren-open" "(" start)
(hs-advance! 1)
(hs-emit! "paren-open" "(" start start-line)
(scan!))
(= ch ")")
(do
(hs-emit! "paren-close" ")" start)
(hs-advance! 1)
(hs-emit! "paren-close" ")" start start-line)
(scan!))
(= ch "[")
(do
(hs-emit! "bracket-open" "[" start)
(hs-advance! 1)
(hs-emit! "bracket-open" "[" start start-line)
(scan!))
(= ch "]")
(do
(hs-emit! "bracket-close" "]" start)
(hs-advance! 1)
(hs-emit! "bracket-close" "]" start start-line)
(scan!))
(= ch "{")
(do
(hs-emit! "brace-open" "{" start)
(hs-advance! 1)
(hs-emit! "brace-open" "{" start start-line)
(scan!))
(= ch "}")
(do
(hs-emit! "brace-close" "}" start)
(hs-advance! 1)
(hs-emit! "brace-close" "}" start start-line)
(scan!))
(= ch ",")
(do (hs-advance! 1) (hs-emit! "comma" "," start start-line) (scan!))
(do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!))
(= ch "+")
(do (hs-advance! 1) (hs-emit! "op" "+" start start-line) (scan!))
(do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!))
(= ch "-")
(do (hs-advance! 1) (hs-emit! "op" "-" start start-line) (scan!))
(do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!))
(= ch "/")
(do (hs-advance! 1) (hs-emit! "op" "/" start start-line) (scan!))
(do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!))
(= ch "=")
(do (hs-advance! 1) (hs-emit! "op" "=" start start-line) (scan!))
(do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!))
(= ch "<")
(do (hs-advance! 1) (hs-emit! "op" "<" start start-line) (scan!))
(do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!))
(= ch ">")
(do (hs-advance! 1) (hs-emit! "op" ">" start start-line) (scan!))
(do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!))
(= ch "!")
(do (hs-advance! 1) (hs-emit! "op" "!" start start-line) (scan!))
(do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!))
(= ch "*")
(do (hs-advance! 1) (hs-emit! "op" "*" start start-line) (scan!))
(do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!))
(= ch "%")
(do (hs-advance! 1) (hs-emit! "op" "%" start start-line) (scan!))
(do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!))
(= ch ".")
(do (hs-advance! 1) (hs-emit! "dot" "." start start-line) (scan!))
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
(= ch "\\")
(do (hs-advance! 1) (hs-emit! "op" "\\" start start-line) (scan!))
(do (hs-emit! "op" "\\" start) (hs-advance! 1) (scan!))
(= ch ":")
(do (hs-advance! 1) (hs-emit! "colon" ":" start start-line) (scan!))
(do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!))
(= ch "|")
(do (hs-advance! 1) (hs-emit! "op" "|" start start-line) (scan!))
(do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!))
(= ch "&")
(do (hs-emit! "op" "&" start) (hs-advance! 1) (scan!))
(= ch "#")
(do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!))
(= ch "?")
(do (hs-emit! "op" "?" start) (hs-advance! 1) (scan!))
(= ch ";")
(do (hs-emit! "op" ";" start) (hs-advance! 1) (scan!))
:else (do (hs-advance! 1) (scan!)))))))
(scan!)
(hs-emit! "eof" nil pos current-line)
(hs-emit! "eof" nil pos)
tokens)))
;; ── Template-mode tokenizer (E37 API) ────────────────────────────────
;; Used by hs-tokens-of when :template flag is set.
;; Emits outer " chars as single STRING tokens; ${ ... } as $ { <inner-tokens> };
;; inner content is tokenized with the regular hs-tokenize.
(define
hs-tokenize-template
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define t-cur (fn () (if (< pos src-len) (nth src pos) nil)))
(define t-peek (fn (n) (if (< (+ pos n) src-len) (nth src (+ pos n)) nil)))
(define t-advance! (fn (n) (set! pos (+ pos n))))
(define t-emit! (fn (type value) (append! tokens (hs-make-token type value pos))))
(define
scan-to-close!
(fn
(depth)
(when
(and (< pos src-len) (> depth 0))
(cond
(= (t-cur) "{")
(do (t-advance! 1) (scan-to-close! (+ depth 1)))
(= (t-cur) "}")
(when (> (- depth 1) 0) (t-advance! 1) (scan-to-close! (- depth 1)))
:else (do (t-advance! 1) (scan-to-close! depth))))))
(define
scan-template!
(fn
()
(when
(< pos src-len)
(let
((ch (t-cur)))
(cond
(= ch "\"")
(do (t-emit! "string" "\"") (t-advance! 1) (scan-template!))
(and (= ch "$") (= (t-peek 1) "{"))
(do
(t-emit! "op" "$")
(t-advance! 1)
(t-emit! "brace-open" "{")
(t-advance! 1)
(let
((inner-start pos))
(scan-to-close! 1)
(let
((inner-src (slice src inner-start pos))
(inner-toks (hs-tokenize inner-src)))
(for-each
(fn (tok)
(when (not (= (get tok "type") "eof"))
(append! tokens tok)))
inner-toks))
(t-emit! "brace-close" "}")
(when (< pos src-len) (t-advance! 1)))
(scan-template!))
(= ch "$")
(do (t-emit! "op" "$") (t-advance! 1) (scan-template!))
(hs-ws? ch)
(do (t-advance! 1) (scan-template!))
:else (do (t-advance! 1) (scan-template!)))))))
(scan-template!)
(t-emit! "eof" nil)
tokens)))

View File

@@ -2494,41 +2494,287 @@
;; ── core/tokenizer (17 tests) ──
(defsuite "hs-upstream-core/tokenizer"
(deftest "handles $ in template properly"
(error "SKIP (untranslated): handles $ in template properly"))
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"" :template) 0)) "\"")
)
(deftest "handles all special escapes properly"
(error "SKIP (untranslated): handles all special escapes properly"))
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\b\""))) (char-from-code 8))
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\f\""))) (char-from-code 12))
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\n\""))) "\n")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\r\""))) "\r")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\t\""))) "\t")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\v\""))) (char-from-code 11))
)
(deftest "handles basic token types"
(error "SKIP (untranslated): handles basic token types"))
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "foo"))) "IDENTIFIER")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1"))) "NUMBER")
(let ((s (hs-tokens-of "1.1")))
(let ((tok (hs-stream-consume s)))
(assert= (hs-token-type tok) "NUMBER")
(assert= (hs-stream-has-more s) false)))
(let ((s (hs-tokens-of "1e6")))
(let ((tok (hs-stream-consume s)))
(assert= (hs-token-type tok) "NUMBER")
(assert= (hs-stream-has-more s) false)))
(let ((s (hs-tokens-of "1e-6")))
(let ((tok (hs-stream-consume s)))
(assert= (hs-token-type tok) "NUMBER")
(assert= (hs-stream-has-more s) false)))
(let ((s (hs-tokens-of "1.1e6")))
(let ((tok (hs-stream-consume s)))
(assert= (hs-token-type tok) "NUMBER")
(assert= (hs-stream-has-more s) false)))
(let ((s (hs-tokens-of "1.1e-6")))
(let ((tok (hs-stream-consume s)))
(assert= (hs-token-type tok) "NUMBER")
(assert= (hs-stream-has-more s) false)))
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of ".a"))) "CLASS_REF")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "#a"))) "ID_REF")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "\"asdf\""))) "STRING")
)
(deftest "handles class identifiers properly"
(error "SKIP (untranslated): handles class identifiers properly"))
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of ".a"))) "CLASS_REF")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of ".a"))) ".a")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of " .a"))) "CLASS_REF")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of " .a"))) ".a")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "a.a"))) "IDENTIFIER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "a.a"))) "a")
(assert= (hs-token-type (nth (get (hs-tokens-of "(a).a") "list") 4)) "IDENTIFIER")
(assert= (hs-token-value (nth (get (hs-tokens-of "(a).a") "list") 4)) "a")
(assert= (hs-token-type (nth (get (hs-tokens-of "{a}.a") "list") 4)) "IDENTIFIER")
(assert= (hs-token-value (nth (get (hs-tokens-of "{a}.a") "list") 4)) "a")
(assert= (hs-token-type (nth (get (hs-tokens-of "[a].a") "list") 4)) "IDENTIFIER")
(assert= (hs-token-value (nth (get (hs-tokens-of "[a].a") "list") 4)) "a")
(assert= (hs-token-type (nth (get (hs-tokens-of "(a(.a") "list") 3)) "CLASS_REF")
(assert= (hs-token-value (nth (get (hs-tokens-of "(a(.a") "list") 3)) ".a")
(assert= (hs-token-type (nth (get (hs-tokens-of "{a{.a") "list") 3)) "CLASS_REF")
(assert= (hs-token-value (nth (get (hs-tokens-of "{a{.a") "list") 3)) ".a")
(assert= (hs-token-type (nth (get (hs-tokens-of "[a[.a") "list") 3)) "CLASS_REF")
(assert= (hs-token-value (nth (get (hs-tokens-of "[a[.a") "list") 3)) ".a")
)
(deftest "handles comments properly"
(error "SKIP (untranslated): handles comments properly"))
(assert= (len (get (hs-tokens-of "--") "list")) 0)
(assert= (len (get (hs-tokens-of "asdf--") "list")) 1)
(assert= (len (get (hs-tokens-of "-- asdf") "list")) 0)
(assert= (len (get (hs-tokens-of "--\nasdf") "list")) 1)
(assert= (len (get (hs-tokens-of "--\nasdf--") "list")) 1)
(assert= (len (get (hs-tokens-of "---asdf") "list")) 0)
(assert= (len (get (hs-tokens-of "----\n---asdf") "list")) 0)
(assert= (len (get (hs-tokens-of "----asdf----") "list")) 0)
(assert= (len (get (hs-tokens-of "---\nasdf---") "list")) 1)
(assert= (len (get (hs-tokens-of "// asdf") "list")) 0)
(assert= (len (get (hs-tokens-of "///asdf") "list")) 0)
(assert= (len (get (hs-tokens-of "asdf//") "list")) 1)
(assert= (len (get (hs-tokens-of "asdf\n//") "list")) 2)
)
(deftest "handles hex escapes properly"
(error "SKIP (untranslated): handles hex escapes properly"))
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\x1f\""))) (char-from-code 31))
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\x41\""))) "A")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"\\x41\\x61\""))) "Aa")
(let ((threw false))
(guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "\"\\x\"")))
(assert threw))
(let ((threw false))
(guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "\"\\xGG\"")))
(assert threw))
(let ((threw false))
(guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "\"\\x4\"")))
(assert threw))
)
(deftest "handles id references properly"
(error "SKIP (untranslated): handles id references properly"))
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "#a"))) "ID_REF")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "#a"))) "#a")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of " #a"))) "ID_REF")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of " #a"))) "#a")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "a#a"))) "IDENTIFIER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "a#a"))) "a")
(assert= (hs-token-type (nth (get (hs-tokens-of "(a)#a") "list") 4)) "IDENTIFIER")
(assert= (hs-token-value (nth (get (hs-tokens-of "(a)#a") "list") 4)) "a")
(assert= (hs-token-type (nth (get (hs-tokens-of "{a}#a") "list") 4)) "IDENTIFIER")
(assert= (hs-token-value (nth (get (hs-tokens-of "{a}#a") "list") 4)) "a")
(assert= (hs-token-type (nth (get (hs-tokens-of "[a]#a") "list") 4)) "IDENTIFIER")
(assert= (hs-token-value (nth (get (hs-tokens-of "[a]#a") "list") 4)) "a")
(assert= (hs-token-type (nth (get (hs-tokens-of "(a(#a") "list") 3)) "ID_REF")
(assert= (hs-token-value (nth (get (hs-tokens-of "(a(#a") "list") 3)) "#a")
(assert= (hs-token-type (nth (get (hs-tokens-of "{a{#a") "list") 3)) "ID_REF")
(assert= (hs-token-value (nth (get (hs-tokens-of "{a{#a") "list") 3)) "#a")
(assert= (hs-token-type (nth (get (hs-tokens-of "[a[#a") "list") 3)) "ID_REF")
(assert= (hs-token-value (nth (get (hs-tokens-of "[a[#a") "list") 3)) "#a")
)
(deftest "handles identifiers properly"
(error "SKIP (untranslated): handles identifiers properly"))
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "foo"))) "IDENTIFIER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "foo"))) "foo")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of " foo "))) "IDENTIFIER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of " foo "))) "foo")
(let ((s (hs-tokens-of " foo bar")))
(let ((tok1 (hs-stream-consume s)))
(assert= (hs-token-type tok1) "IDENTIFIER")
(assert= (hs-token-value tok1) "foo")
(let ((tok2 (hs-stream-consume s)))
(assert= (hs-token-type tok2) "IDENTIFIER")
(assert= (hs-token-value tok2) "bar"))))
(let ((s (hs-tokens-of " foo\n-- a comment\n bar")))
(let ((tok1 (hs-stream-consume s)))
(assert= (hs-token-type tok1) "IDENTIFIER")
(assert= (hs-token-value tok1) "foo")
(let ((tok2 (hs-stream-consume s)))
(assert= (hs-token-type tok2) "IDENTIFIER")
(assert= (hs-token-value tok2) "bar"))))
)
(deftest "handles identifiers with numbers properly"
(error "SKIP (untranslated): handles identifiers with numbers properly"))
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "f1oo"))) "IDENTIFIER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "f1oo"))) "f1oo")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "fo1o"))) "IDENTIFIER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "fo1o"))) "fo1o")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "foo1"))) "IDENTIFIER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "foo1"))) "foo1")
)
(deftest "handles look ahead property"
(error "SKIP (untranslated): handles look ahead property"))
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 0)) "a")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 1)) "1")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 2)) "+")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 3)) "1")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "a 1 + 1") 4)) "<<<EOF>>>")
)
(deftest "handles numbers properly"
(error "SKIP (untranslated): handles numbers properly"))
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1"))) "NUMBER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1"))) "1")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1.1"))) "NUMBER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1.1"))) "1.1")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1234567890.1234567890"))) "NUMBER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1234567890.1234567890"))) "1234567890.1234567890")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1e6"))) "NUMBER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1e6"))) "1e6")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1e-6"))) "NUMBER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1e-6"))) "1e-6")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1.1e6"))) "NUMBER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1.1e6"))) "1.1e6")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "1.1e-6"))) "NUMBER")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "1.1e-6"))) "1.1e-6")
(assert= (hs-token-type (nth (get (hs-tokens-of "1.1.1") "list") 0)) "NUMBER")
(assert= (hs-token-type (nth (get (hs-tokens-of "1.1.1") "list") 1)) "PERIOD")
(assert= (hs-token-type (nth (get (hs-tokens-of "1.1.1") "list") 2)) "NUMBER")
(assert= (len (get (hs-tokens-of "1.1.1") "list")) 3)
)
(deftest "handles operators properly"
(error "SKIP (untranslated): handles operators properly"))
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "+"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "+"))) "+")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "-"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "-"))) "-")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "*"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "*"))) "*")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "."))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "."))) ".")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "\\"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\\"))) "\\")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ":"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of ":"))) ":")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "%"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "%"))) "%")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "|"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "|"))) "|")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "!"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "!"))) "!")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "?"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "?"))) "?")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "#"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "#"))) "#")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "&"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "&"))) "&")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ";"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of ";"))) ";")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ","))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of ","))) ",")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "("))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "("))) "(")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ")"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of ")"))) ")")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "<"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "<"))) "<")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ">"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of ">"))) ">")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "{"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "{"))) "{")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "}"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "}"))) "}")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "["))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "["))) "[")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "]"))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "]"))) "]")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "="))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "="))) "=")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "<="))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "<="))) "<=")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of ">="))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of ">="))) ">=")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "=="))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "=="))) "==")
(assert= (hs-token-op? (hs-stream-consume (hs-tokens-of "==="))) true)
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "==="))) "===")
)
(deftest "handles strings properly"
(error "SKIP (untranslated): handles strings properly"))
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "\"foo\""))) "STRING")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"foo\""))) "foo")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "\"fo'o\""))) "STRING")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"fo'o\""))) "fo'o")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "\"fo\\\"o\""))) "STRING")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "\"fo\\\"o\""))) "fo\"o")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "'foo'"))) "STRING")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "'foo'"))) "foo")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "'fo\"o'"))) "STRING")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "'fo\"o'"))) "fo\"o")
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "'fo\\'o'"))) "STRING")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "'fo\\'o'"))) "fo'o")
(let ((threw false))
(guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "'")))
(assert threw))
(let ((threw false))
(guard (e (true (set! threw true))) (hs-stream-consume (hs-tokens-of "\"")))
(assert threw))
)
(deftest "handles strings properly 2"
(error "SKIP (untranslated): handles strings properly 2"))
(assert= (hs-token-type (hs-stream-consume (hs-tokens-of "'foo'"))) "STRING")
(assert= (hs-token-value (hs-stream-consume (hs-tokens-of "'foo'"))) "foo")
)
(deftest "handles template bootstrap properly"
(error "SKIP (untranslated): handles template bootstrap properly"))
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"" :template) 0)) "\"")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"$" :template) 0)) "\"")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"$" :template) 1)) "$")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${" :template) 0)) "\"")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${" :template) 1)) "$")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${" :template) 2)) "{")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"" :template) 0)) "\"")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"" :template) 1)) "$")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"" :template) 2)) "{")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"" :template) 3)) "asdf")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 0)) "\"")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 1)) "$")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 2)) "{")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 3)) "asdf")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 4)) "}")
(assert= (hs-token-value (hs-stream-token (hs-tokens-of "\"${\"asdf\"}\"" :template) 5)) "\"")
)
(deftest "handles whitespace properly"
(error "SKIP (untranslated): handles whitespace properly"))
(assert= (len (get (hs-tokens-of " ") "list")) 0)
(assert= (len (get (hs-tokens-of " asdf") "list")) 1)
(assert= (len (get (hs-tokens-of " asdf ") "list")) 2)
(assert= (len (get (hs-tokens-of "asdf ") "list")) 2)
(assert= (len (get (hs-tokens-of "\n") "list")) 0)
(assert= (len (get (hs-tokens-of "\nasdf") "list")) 1)
(assert= (len (get (hs-tokens-of "\nasdf\n") "list")) 2)
(assert= (len (get (hs-tokens-of "asdf\n") "list")) 2)
(assert= (len (get (hs-tokens-of "\r") "list")) 0)
(assert= (len (get (hs-tokens-of "\rasdf") "list")) 1)
(assert= (len (get (hs-tokens-of "\rasdf\r") "list")) 2)
(assert= (len (get (hs-tokens-of "asdf\r") "list")) 2)
(assert= (len (get (hs-tokens-of "\t") "list")) 0)
(assert= (len (get (hs-tokens-of "\tasdf") "list")) 1)
(assert= (len (get (hs-tokens-of "\tasdf\t") "list")) 2)
(assert= (len (get (hs-tokens-of "asdf\t") "list")) 2)
)
(deftest "string interpolation isnt surprising"
(hs-cleanup!)
(let ((_el-div (dom-create-element "div")))
(dom-set-attr _el-div "_" "on click set x to 42 then put `test${x} test ${x} test$x test $x test $x test ${x} test$x test_$x test_${x} test-$x test.$x` into my.innerHTML")
(dom-set-attr _el-div "_" "on click set x to 42 then put `test\\${x} test ${x} test\\$x test $x test \\$x test \\${x} test$x test_$x test_${x} test-$x test.$x` into my.innerHTML")
(dom-append (dom-body) _el-div)
(hs-activate! _el-div)
(dom-dispatch _el-div "click" nil)

View File

@@ -1264,7 +1264,9 @@ def process_hs_val(hs_val):
hs_val = hs_val.replace('\\n', '\n').replace('\\t', ' ')
# Preserve escaped quotes (\" → placeholder), strip remaining backslashes, restore
hs_val = hs_val.replace('\\"', '\x00QUOT\x00')
hs_val = hs_val.replace('\\$', '\x00DOLLAR\x00') # preserve \$ template escape
hs_val = hs_val.replace('\\', '')
hs_val = hs_val.replace('\x00DOLLAR\x00', '\\$') # restore \$
hs_val = hs_val.replace('\x00QUOT\x00', '\\"')
# Strip line comments BEFORE newline collapse — once newlines become `then`,
# an unterminated `//` / ` --` comment would consume the rest of the input.
@@ -1855,6 +1857,272 @@ def extract_hs_expr(raw):
return expr
def generate_tokenizer_test(test, safe_name):
"""Hardcoded SX translation for _hyperscript.internals.tokenizer tests (E37)."""
name = test['name']
def to_(src, tmpl=False):
"""Return (hs-tokens-of <sx-str> [:template]) for HS source string src."""
escaped = (src
.replace('\\', '\\\\')
.replace('"', '\\"')
.replace('\n', '\\n')
.replace('\r', '\\r')
.replace('\t', '\\t'))
q = '"' + escaped + '"'
suffix = ' :template' if tmpl else ''
return f'(hs-tokens-of {q}{suffix})'
def consume(s):
return f'(hs-stream-consume {s})'
def tok_i(s, i):
return f'(hs-stream-token {s} {i})'
def has_more(s):
return f'(hs-stream-has-more {s})'
def t_type(t):
return f'(hs-token-type {t})'
def t_val(t):
return f'(hs-token-value {t})'
def t_op(t):
return f'(hs-token-op? {t})'
def nth_list(s, i):
return f'(nth (get {s} "list") {i})'
def list_len(s):
return f'(len (get {s} "list"))'
def ae(actual, expected):
return f' (assert= {actual} {expected})'
def throws(expr):
return (
f' (let ((threw false))\n'
f' (guard (e (true (set! threw true))) {expr})\n'
f' (assert threw))'
)
lines = [f' (deftest "{safe_name}"']
if name == 'handles $ in template properly':
s = to_('"', tmpl=True)
lines.append(ae(t_val(tok_i(s, 0)), sx_str('"')))
elif name == 'handles all special escapes properly':
for src, exp in [
('"\\b"', '(char-from-code 8)'),
('"\\f"', '(char-from-code 12)'),
('"\\n"', '"\\n"'),
('"\\r"', '"\\r"'),
('"\\t"', '"\\t"'),
('"\\v"', '(char-from-code 11)'),
]:
lines.append(ae(t_val(consume(to_(src))), exp))
elif name == 'handles basic token types':
lines.append(ae(t_type(consume(to_('foo'))), '"IDENTIFIER"'))
lines.append(ae(t_type(consume(to_('1'))), '"NUMBER"'))
for src in ['1.1', '1e6', '1e-6', '1.1e6', '1.1e-6']:
sq = to_(src)
lines.append(f' (let ((s {sq}))')
lines.append(f' (let ((tok (hs-stream-consume s)))')
lines.append(f' (assert= (hs-token-type tok) "NUMBER")')
lines.append(f' (assert= (hs-stream-has-more s) false)))')
lines.append(ae(t_type(consume(to_('.a'))), '"CLASS_REF"'))
lines.append(ae(t_type(consume(to_('#a'))), '"ID_REF"'))
lines.append(ae(t_type(consume(to_('"asdf"'))), '"STRING"'))
elif name == 'handles class identifiers properly':
for src, idx, exp_type, exp_val in [
('.a', None, 'CLASS_REF', '.a'),
(' .a', None, 'CLASS_REF', '.a'),
('a.a', None, 'IDENTIFIER', 'a'),
('(a).a', 4, 'IDENTIFIER', 'a'),
('{a}.a', 4, 'IDENTIFIER', 'a'),
('[a].a', 4, 'IDENTIFIER', 'a'),
('(a(.a', 3, 'CLASS_REF', '.a'),
('{a{.a', 3, 'CLASS_REF', '.a'),
('[a[.a', 3, 'CLASS_REF', '.a'),
]:
if idx is None:
tok_expr = consume(to_(src))
else:
tok_expr = nth_list(to_(src), idx)
lines.append(ae(t_type(tok_expr), f'"{exp_type}"'))
lines.append(ae(t_val(tok_expr), sx_str(exp_val)))
elif name == 'handles comments properly':
for src, expected in [
('--', 0),
('asdf--', 1),
('-- asdf', 0),
('--\nasdf', 1),
('--\nasdf--', 1),
('---asdf', 0),
('----\n---asdf', 0),
('----asdf----', 0),
('---\nasdf---', 1),
('// asdf', 0),
('///asdf', 0),
('asdf//', 1),
('asdf\n//', 2),
]:
lines.append(ae(list_len(to_(src)), str(expected)))
elif name == 'handles hex escapes properly':
lines.append(ae(t_val(consume(to_('"\\x1f"'))), '(char-from-code 31)'))
lines.append(ae(t_val(consume(to_('"\\x41"'))), '"A"'))
lines.append(ae(t_val(consume(to_('"\\x41\\x61"'))), '"Aa"'))
for bad in ['"\\x"', '"\\xGG"', '"\\x4"']:
lines.append(throws(consume(to_(bad))))
elif name == 'handles id references properly':
for src, idx, exp_type, exp_val in [
('#a', None, 'ID_REF', '#a'),
(' #a', None, 'ID_REF', '#a'),
('a#a', None, 'IDENTIFIER', 'a'),
('(a)#a', 4, 'IDENTIFIER', 'a'),
('{a}#a', 4, 'IDENTIFIER', 'a'),
('[a]#a', 4, 'IDENTIFIER', 'a'),
('(a(#a', 3, 'ID_REF', '#a'),
('{a{#a', 3, 'ID_REF', '#a'),
('[a[#a', 3, 'ID_REF', '#a'),
]:
if idx is None:
tok_expr = consume(to_(src))
else:
tok_expr = nth_list(to_(src), idx)
lines.append(ae(t_type(tok_expr), f'"{exp_type}"'))
lines.append(ae(t_val(tok_expr), sx_str(exp_val)))
elif name == 'handles identifiers properly':
lines.append(ae(t_type(consume(to_('foo'))), '"IDENTIFIER"'))
lines.append(ae(t_val(consume(to_('foo'))), '"foo"'))
lines.append(ae(t_type(consume(to_(' foo '))), '"IDENTIFIER"'))
lines.append(ae(t_val(consume(to_(' foo '))), '"foo"'))
for src, v1, v2 in [
(' foo bar', 'foo', 'bar'),
(' foo\n-- a comment\n bar', 'foo', 'bar'),
]:
sq = to_(src)
lines.append(f' (let ((s {sq}))')
lines.append(f' (let ((tok1 (hs-stream-consume s)))')
lines.append(f' (assert= (hs-token-type tok1) "IDENTIFIER")')
lines.append(f' (assert= (hs-token-value tok1) {sx_str(v1)})')
lines.append(f' (let ((tok2 (hs-stream-consume s)))')
lines.append(f' (assert= (hs-token-type tok2) "IDENTIFIER")')
lines.append(f' (assert= (hs-token-value tok2) {sx_str(v2)}))))')
elif name == 'handles identifiers with numbers properly':
for src in ['f1oo', 'fo1o', 'foo1']:
lines.append(ae(t_type(consume(to_(src))), '"IDENTIFIER"'))
lines.append(ae(t_val(consume(to_(src))), sx_str(src)))
elif name == 'handles look ahead property':
s = to_('a 1 + 1')
for i, v in [(0, 'a'), (1, '1'), (2, '+'), (3, '1'), (4, '<<<EOF>>>')]:
lines.append(ae(t_val(tok_i(s, i)), sx_str(v)))
elif name == 'handles numbers properly':
for src, v in [
('1', '1'),
('1.1', '1.1'),
('1234567890.1234567890', '1234567890.1234567890'),
('1e6', '1e6'),
('1e-6', '1e-6'),
('1.1e6', '1.1e6'),
('1.1e-6', '1.1e-6'),
]:
lines.append(ae(t_type(consume(to_(src))), '"NUMBER"'))
lines.append(ae(t_val(consume(to_(src))), sx_str(v)))
s = to_('1.1.1')
toks = f'(get {s} "list")'
lines.append(ae(f'(hs-token-type (nth {toks} 0))', '"NUMBER"'))
lines.append(ae(f'(hs-token-type (nth {toks} 1))', '"PERIOD"'))
lines.append(ae(f'(hs-token-type (nth {toks} 2))', '"NUMBER"'))
lines.append(ae(f'(len {toks})', '3'))
elif name == 'handles operators properly':
optable = [
('+', 'PLUS'), ('-', 'MINUS'), ('*', 'MULTIPLY'),
('.', 'PERIOD'), ('\\', 'BACKSLASH'), (':', 'COLON'),
('%', 'PERCENT'), ('|', 'PIPE'), ('!', 'EXCLAMATION'),
('?', 'QUESTION'), ('#', 'POUND'), ('&', 'AMPERSAND'),
(';', 'SEMI'), (',', 'COMMA'), ('(', 'L_PAREN'),
(')', 'R_PAREN'), ('<', 'L_ANG'), ('>', 'R_ANG'),
('{', 'L_BRACE'), ('}', 'R_BRACE'), ('[', 'L_BRACKET'),
(']', 'R_BRACKET'), ('=', 'EQUALS'),
('<=', 'LTE_ANG'), ('>=', 'GTE_ANG'),
('==', 'EQ'), ('===', 'EQQ'),
]
for op_char, _op_name in optable:
tok_expr = consume(to_(op_char))
lines.append(ae(t_op(tok_expr), 'true'))
lines.append(ae(t_val(tok_expr), sx_str(op_char)))
elif name == 'handles strings properly':
for src, v in [
('"foo"', 'foo'),
('"fo\'o"', "fo'o"),
('"fo\\"o"', 'fo"o'),
("'foo'", 'foo'),
("'fo\"o'", 'fo"o'),
("'fo\\'o'", "fo'o"),
]:
lines.append(ae(t_type(consume(to_(src))), '"STRING"'))
lines.append(ae(t_val(consume(to_(src))), sx_str(v)))
lines.append(throws(consume(to_("'"))))
lines.append(throws(consume(to_('"'))))
elif name == 'handles strings properly 2':
tok_expr = consume(to_("'foo'"))
lines.append(ae(t_type(tok_expr), '"STRING"'))
lines.append(ae(t_val(tok_expr), '"foo"'))
elif name == 'handles template bootstrap properly':
s1 = to_('"', tmpl=True)
lines.append(ae(t_val(tok_i(s1, 0)), sx_str('"')))
s2 = to_('"$', tmpl=True)
lines.append(ae(t_val(tok_i(s2, 0)), sx_str('"')))
lines.append(ae(t_val(tok_i(s2, 1)), '"$"'))
s3 = to_('"${', tmpl=True)
lines.append(ae(t_val(tok_i(s3, 0)), sx_str('"')))
lines.append(ae(t_val(tok_i(s3, 1)), '"$"'))
lines.append(ae(t_val(tok_i(s3, 2)), '"{"'))
s4 = to_('"${"asdf"', tmpl=True)
lines.append(ae(t_val(tok_i(s4, 0)), sx_str('"')))
lines.append(ae(t_val(tok_i(s4, 1)), '"$"'))
lines.append(ae(t_val(tok_i(s4, 2)), '"{"'))
lines.append(ae(t_val(tok_i(s4, 3)), '"asdf"'))
s5 = to_('"${"asdf"}"', tmpl=True)
lines.append(ae(t_val(tok_i(s5, 0)), sx_str('"')))
lines.append(ae(t_val(tok_i(s5, 1)), '"$"'))
lines.append(ae(t_val(tok_i(s5, 2)), '"{"'))
lines.append(ae(t_val(tok_i(s5, 3)), '"asdf"'))
lines.append(ae(t_val(tok_i(s5, 4)), '"}"'))
lines.append(ae(t_val(tok_i(s5, 5)), sx_str('"')))
elif name == 'handles whitespace properly':
for src, expected in [
(' ', 0), (' asdf', 1), (' asdf ', 2), ('asdf ', 2),
('\n', 0), ('\nasdf', 1), ('\nasdf\n', 2), ('asdf\n', 2),
('\r', 0), ('\rasdf', 1), ('\rasdf\r', 2), ('asdf\r', 2),
('\t', 0), ('\tasdf', 1), ('\tasdf\t', 2), ('asdf\t', 2),
]:
lines.append(ae(list_len(to_(src)), str(expected)))
else:
return None # not a tokenizer test we handle
lines.append(' )')
return '\n'.join(lines)
def generate_eval_only_test(test, idx):
"""Generate SX deftest for no-HTML tests using eval-hs.
Handles patterns:
@@ -2073,6 +2341,9 @@ def generate_eval_only_test(test, idx):
f" (assert= (hs-line-at \"{src}\" (list :true-branch :next)) \" log 'it was true'\"))"
)
if '_hyperscript.internals.tokenizer' in body:
return generate_tokenizer_test(test, safe_name)
lines.append(f' (deftest "{safe_name}"')
assertions = []