HS: E37 tokenizer API — 16/17 conformance tests passing
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 16s

Add hs-raw->api-token, hs-eof-sentinel, hs-api-list, hs-tokens-of,
hs-stream-token, hs-stream-consume, hs-stream-has-more, hs-token-type,
hs-token-value, hs-token-op? to runtime. Fix tokenizer to emit whitespace
tokens and handle dot/hash after closing brackets. Fix hs-tokens-of to
accept bare :template keyword flag via &rest args + some() check.
Remaining failure (string interpolation isnt surprising) requires full
DOM activation infrastructure.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 18:45:58 +00:00
parent 820132b839
commit cabb0467ab
5 changed files with 775 additions and 91 deletions

View File

@@ -2615,3 +2615,159 @@
(raise (host-get state "value"))
(if state (host-get state "value") result)))
result)))))
(define
hs-raw->api-token
(fn
(raw)
(let
((type (dict-get raw :type)) (value (dict-get raw :value)))
(cond
(= type "ident")
{:value value :type "IDENTIFIER" :op false}
(= type "keyword")
{:value value :type "IDENTIFIER" :op false}
(= type "number")
{:value value :type "NUMBER" :op false}
(= type "string")
{:value value :type "STRING" :op false}
(= type "class")
{:value (str "." value) :type "CLASS_REF" :op false}
(= type "id")
{:value (str "#" value) :type "ID_REF" :op false}
(= type "attr")
{:value value :type "ATTRIBUTE_REF" :op false}
(= type "style")
{:value value :type "STYLE_REF" :op false}
(= type "selector")
{:value value :type "QUERY_REF" :op false}
(= type "eof")
{:value "<<<EOF>>>" :type "EOF" :op false}
(= type "paren-open")
{:value value :type "L_PAREN" :op true}
(= type "paren-close")
{:value value :type "R_PAREN" :op true}
(= type "bracket-open")
{:value value :type "L_BRACKET" :op true}
(= type "bracket-close")
{:value value :type "R_BRACKET" :op true}
(= type "brace-open")
{:value value :type "L_BRACE" :op true}
(= type "brace-close")
{:value value :type "R_BRACE" :op true}
(= type "comma")
{:value value :type "COMMA" :op true}
(= type "dot")
{:value value :type "PERIOD" :op true}
(= type "colon")
{:value value :type "COLON" :op true}
(= type "op")
(cond
(= value "+")
{:value value :type "PLUS" :op true}
(= value "-")
{:value value :type "MINUS" :op true}
(= value "*")
{:value value :type "MULTIPLY" :op true}
(= value "/")
{:value value :type "SLASH" :op true}
(= value "!")
{:value value :type "EXCLAMATION" :op true}
(= value "?")
{:value value :type "QUESTION" :op true}
(= value "#")
{:value value :type "POUND" :op true}
(= value "&")
{:value value :type "AMPERSAND" :op true}
(= value "=")
{:value value :type "EQUALS" :op true}
(= value "<")
{:value value :type "L_ANG" :op true}
(= value ">")
{:value value :type "R_ANG" :op true}
(= value "<=")
{:value value :type "LTE_ANG" :op true}
(= value ">=")
{:value value :type "GTE_ANG" :op true}
(= value "==")
{:value value :type "EQ" :op true}
(= value "===")
{:value value :type "EQQ" :op true}
(= value "..")
{:value value :type "PERIOD_PERIOD" :op true}
:else {:value value :type value :op true})
:else {:value (or value "") :type (str type) :op false}))))
(define hs-eof-sentinel {:value "<<<EOF>>>" :type "EOF" :op false})
(define
hs-api-list
(fn
(raw-tokens)
(filter
(fn (t) (not (= (dict-get t :type) "EOF")))
(map hs-raw->api-token raw-tokens))))
(define
hs-tokens-of
(fn
(src &rest args)
(let
((template (some (fn (a) (equal? a :template)) args)))
(let
((raw (if template (hs-tokenize-template src) (hs-tokenize src))))
{:pos 0 :list (hs-api-list raw) :source src}))))
(define
hs-stream-token
(fn
(s i)
(let
((lst (get s "list")) (start (get s "pos")))
(define
find-nth
(fn
(j count)
(let
((tok (or (nth lst j) hs-eof-sentinel)))
(if
(= (get tok "type") "whitespace")
(find-nth (+ j 1) count)
(if (= count 0) tok (find-nth (+ j 1) (- count 1)))))))
(find-nth start i))))
(define
hs-stream-consume
(fn
(s)
(let
((lst (get s "list")))
(define
skip-ws
(fn
(j)
(let
((tok (or (nth lst j) nil)))
(if
(and tok (= (get tok "type") "whitespace"))
(skip-ws (+ j 1))
j))))
(let
((j (skip-ws (get s "pos"))))
(let
((tok (or (nth lst j) hs-eof-sentinel)))
(do
(when
(not (= (get tok "type") "EOF"))
(dict-set! s :pos (+ j 1)))
tok))))))
(define
hs-stream-has-more
(fn (s) (not (= (get (hs-stream-token s 0) "type") "EOF"))))
(define hs-token-type (fn (tok) (get tok "type")))
(define hs-token-value (fn (tok) (get tok "value")))
(define hs-token-op? (fn (tok) (get tok "op")))

View File

@@ -334,11 +334,17 @@
(= ch "r")
(do (append! chars "\r") (hs-advance! 1))
(= ch "b")
(do (append! chars (char-from-code 8)) (hs-advance! 1))
(do
(append! chars (char-from-code 8))
(hs-advance! 1))
(= ch "f")
(do (append! chars (char-from-code 12)) (hs-advance! 1))
(do
(append! chars (char-from-code 12))
(hs-advance! 1))
(= ch "v")
(do (append! chars (char-from-code 11)) (hs-advance! 1))
(do
(append! chars (char-from-code 11))
(hs-advance! 1))
(= ch "\\")
(do (append! chars "\\") (hs-advance! 1))
(= ch quote-char)
@@ -353,12 +359,16 @@
(hs-hex-digit? (hs-peek 1)))
(let
((d1 (hs-hex-val (hs-cur)))
(d2 (hs-hex-val (hs-peek 1))))
(append! chars (char-from-code (+ (* d1 16) d2)))
(d2 (hs-hex-val (hs-peek 1))))
(append!
chars
(char-from-code (+ (* d1 16) d2)))
(hs-advance! 2))
(error "Invalid hexadecimal escape: \\x")))
:else
(do (append! chars "\\") (append! chars ch) (hs-advance! 1)))))
:else (do
(append! chars "\\")
(append! chars ch)
(hs-advance! 1)))))
(loop))
(= (hs-cur) quote-char)
(hs-advance! 1)
@@ -465,7 +475,13 @@
scan!
(fn
()
(skip-ws!)
(do
(let
((ws-start pos))
(skip-ws!)
(when
(and (> (len tokens) 0) (> pos ws-start))
(hs-emit! "whitespace" (slice src ws-start pos) ws-start))))
(when
(< pos src-len)
(let
@@ -489,6 +505,25 @@
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) "-")
(= (hs-peek 1) "_"))
(> (len tokens) 0)
(let
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
(or
(= lt "paren-close")
(= lt "brace-close")
(= lt "bracket-close"))))
(do
(hs-emit! "dot" "." start)
(hs-advance! 1)
(hs-emit! "ident" (read-ident pos) start)
(scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
@@ -500,6 +535,22 @@
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1))
(> (len tokens) 0)
(let
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
(or
(= lt "paren-close")
(= lt "brace-close")
(= lt "bracket-close"))))
(do
(hs-emit! "op" "#" start)
(hs-advance! 1)
(hs-emit! "ident" (read-ident pos) start)
(scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)
@@ -569,21 +620,7 @@
(let
((word (read-ident start)))
(let
((full-word
(if
(and
(< pos src-len)
(= (hs-cur) "'")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1))
(not
(and
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2)))))))
(do (hs-advance! 1) (str word "'" (read-ident pos)))
word)))
((full-word (if (and (< pos src-len) (= (hs-cur) "'") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1)) (not (and (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2))))))) (do (hs-advance! 1) (str word "'" (read-ident pos))) word)))
(hs-emit!
(if (hs-keyword? full-word) "keyword" "ident")
full-word