HS E37: tokenizer-as-API 17/17 (+fixes)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 16s

- runtime.sx: fix extra ) in hs-tokens-of (parse error); add hs-eof-sentinel,
  hs-raw->api-token, hs-normalize-raw-tokens, hs-tokens-of, stream helpers,
  hs-token-type/value/op?; add \$ escape to hs-template
- tokenizer.sx: fix read-number double-dot bug (1.1.1 → 3 tokens); fix t-emit!
  eof call (3→2 args); add bare $ case to scan-template!
- compiler.sx: add \$ escape to tpl-collect template interpolation
- generate-sx-tests.py: preserve \$ in process_hs_val; add generate_tokenizer_test
- regen spec/tests/test-hyperscript-behavioral.sx: 17 tokenizer tests generated
- plans/hs-conformance-to-100.md: row 37 marked done +17

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 09:54:59 +00:00
parent 3003c8a069
commit 880503e2b6
9 changed files with 974 additions and 77 deletions

View File

@@ -893,6 +893,12 @@
(let
((ch (nth raw i)))
(if
(and (= ch "\\") (< (+ i 1) n) (= (nth raw (+ i 1)) "$"))
(do
(set! buf (str buf "$"))
(set! i (+ i 2))
(tpl-collect))
(if
(and (= ch "$") (< (+ i 1) n))
(if
(= (nth raw (+ i 1)) "{")
@@ -931,7 +937,7 @@
(do
(set! buf (str buf ch))
(set! i (+ i 1))
(tpl-collect)))))))
(tpl-collect))))))))
(tpl-collect)
(tpl-flush)
(cons (quote str) parts))))

View File

@@ -2021,6 +2021,12 @@
(let
((ch (nth raw i)))
(if
(and (= ch "\\") (< (+ i 1) n) (= (nth raw (+ i 1)) "$"))
(do
(set! result (str result "$"))
(set! i (+ i 2))
(tpl-loop))
(if
(and (= ch "$") (< (+ i 1) n))
(if
(= (nth raw (+ i 1)) "{")
@@ -2089,7 +2095,7 @@
(do
(set! result (str result ch))
(set! i (+ i 1))
(tpl-loop)))))))
(tpl-loop))))))))
(do (tpl-loop) result))))
(define
@@ -2606,6 +2612,46 @@
(= raw-type "op"))))
{:type up-type :value up-val :op is-op}))))
;; Expand "class" and "id" tokens that follow a closing bracket into
;; separate dot/hash + ident tokens, matching upstream context-sensitive
;; behaviour: after ) ] } the dot is property access, not a CLASS_REF.
(define
hs-normalize-raw-tokens
(fn
(raw-real)
(let
((result (list))
(prev-type nil))
(for-each
(fn
(tok)
(let
((typ (get tok "type"))
(val (get tok "value"))
(tok-pos (get tok "pos")))
(if
(and
(or (= typ "class") (= typ "id"))
(or
(= prev-type "paren-close")
(= prev-type "bracket-close")
(= prev-type "brace-close")))
(do
(if
(= typ "class")
(do
(append! result {:type "dot" :value "." :pos tok-pos})
(append! result {:type "ident" :value val :pos (+ tok-pos 1)}))
(do
(append! result {:type "op" :value "#" :pos tok-pos})
(append! result {:type "ident" :value val :pos (+ tok-pos 1)})))
(set! prev-type "ident"))
(do
(append! result tok)
(set! prev-type typ)))))
raw-real)
result)))
(define
hs-tokens-of
(fn
@@ -2613,9 +2659,34 @@
(let
((template? (and (> (len rest) 0) (= (first rest) :template)))
(raw (if template? (hs-tokenize-template src) (hs-tokenize src))))
{:source src
:list (map hs-raw->api-token raw)
:pos 0})))
(if
template?
{:source src :list (map hs-raw->api-token raw) :pos 0}
;; Normal mode: filter EOF, context-normalise, add trailing-WS sentinel
(let
((real (filter (fn (t) (not (= (get t "type") "eof"))) raw)))
(let
((norm (hs-normalize-raw-tokens real)))
(let
((api (map hs-raw->api-token norm)))
(let
((with-sep
(if
(and
(> (len norm) 0)
(let
((last-tok (nth norm (- (len norm) 1))))
(let
((end-pos
(+ (get last-tok "pos")
(len (get last-tok "value")))))
(and
(< end-pos (len src))
(hs-ws? (nth src end-pos))))))
(append api (list {:type "WHITESPACE" :value " " :op false}))
api)))
{:source src :list with-sep :pos 0}))))))))
(define
hs-stream-token

View File

@@ -256,10 +256,15 @@
read-number
(fn
(start)
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-number start))
(define
read-int
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-int))))
(read-int)
(when
(and
(< pos src-len)
@@ -267,15 +272,7 @@
(< (+ pos 1) src-len)
(hs-digit? (hs-peek 1)))
(hs-advance! 1)
(define
read-frac
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-frac))))
(read-frac))
(read-int))
(do
(when
(and
@@ -293,15 +290,7 @@
(< pos src-len)
(or (= (hs-cur) "+") (= (hs-cur) "-")))
(hs-advance! 1))
(define
read-exp-digits
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-exp-digits))))
(read-exp-digits))
(read-int))
(let
((num-end pos))
(when
@@ -663,6 +652,14 @@
(do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!))
(= ch "|")
(do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!))
(= ch "&")
(do (hs-emit! "op" "&" start) (hs-advance! 1) (scan!))
(= ch "#")
(do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!))
(= ch "?")
(do (hs-emit! "op" "?" start) (hs-advance! 1) (scan!))
(= ch ";")
(do (hs-emit! "op" ";" start) (hs-advance! 1) (scan!))
:else (do (hs-advance! 1) (scan!)))))))
(scan!)
(hs-emit! "eof" nil pos)
@@ -726,9 +723,11 @@
(t-emit! "brace-close" "}")
(when (< pos src-len) (t-advance! 1)))
(scan-template!))
(= ch "$")
(do (t-emit! "op" "$") (t-advance! 1) (scan-template!))
(hs-ws? ch)
(do (t-advance! 1) (scan-template!))
:else (do (t-advance! 1) (scan-template!)))))))
(scan-template!)
(t-emit! "eof" nil pos)
(t-emit! "eof" nil)
tokens)))