HS: E37 tokenizer API — 16/17 conformance tests passing
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 16s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 16s
Add hs-raw->api-token, hs-eof-sentinel, hs-api-list, hs-tokens-of, hs-stream-token, hs-stream-consume, hs-stream-has-more, hs-token-type, hs-token-value, hs-token-op? to runtime. Fix tokenizer to emit whitespace tokens and handle dot/hash after closing brackets. Fix hs-tokens-of to accept bare :template keyword flag via &rest args + some() check. Remaining failure (string interpolation isnt surprising) requires full DOM activation infrastructure. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2615,3 +2615,159 @@
|
||||
(raise (host-get state "value"))
|
||||
(if state (host-get state "value") result)))
|
||||
result)))))
|
||||
|
||||
(define
|
||||
hs-raw->api-token
|
||||
(fn
|
||||
(raw)
|
||||
(let
|
||||
((type (dict-get raw :type)) (value (dict-get raw :value)))
|
||||
(cond
|
||||
(= type "ident")
|
||||
{:value value :type "IDENTIFIER" :op false}
|
||||
(= type "keyword")
|
||||
{:value value :type "IDENTIFIER" :op false}
|
||||
(= type "number")
|
||||
{:value value :type "NUMBER" :op false}
|
||||
(= type "string")
|
||||
{:value value :type "STRING" :op false}
|
||||
(= type "class")
|
||||
{:value (str "." value) :type "CLASS_REF" :op false}
|
||||
(= type "id")
|
||||
{:value (str "#" value) :type "ID_REF" :op false}
|
||||
(= type "attr")
|
||||
{:value value :type "ATTRIBUTE_REF" :op false}
|
||||
(= type "style")
|
||||
{:value value :type "STYLE_REF" :op false}
|
||||
(= type "selector")
|
||||
{:value value :type "QUERY_REF" :op false}
|
||||
(= type "eof")
|
||||
{:value "<<<EOF>>>" :type "EOF" :op false}
|
||||
(= type "paren-open")
|
||||
{:value value :type "L_PAREN" :op true}
|
||||
(= type "paren-close")
|
||||
{:value value :type "R_PAREN" :op true}
|
||||
(= type "bracket-open")
|
||||
{:value value :type "L_BRACKET" :op true}
|
||||
(= type "bracket-close")
|
||||
{:value value :type "R_BRACKET" :op true}
|
||||
(= type "brace-open")
|
||||
{:value value :type "L_BRACE" :op true}
|
||||
(= type "brace-close")
|
||||
{:value value :type "R_BRACE" :op true}
|
||||
(= type "comma")
|
||||
{:value value :type "COMMA" :op true}
|
||||
(= type "dot")
|
||||
{:value value :type "PERIOD" :op true}
|
||||
(= type "colon")
|
||||
{:value value :type "COLON" :op true}
|
||||
(= type "op")
|
||||
(cond
|
||||
(= value "+")
|
||||
{:value value :type "PLUS" :op true}
|
||||
(= value "-")
|
||||
{:value value :type "MINUS" :op true}
|
||||
(= value "*")
|
||||
{:value value :type "MULTIPLY" :op true}
|
||||
(= value "/")
|
||||
{:value value :type "SLASH" :op true}
|
||||
(= value "!")
|
||||
{:value value :type "EXCLAMATION" :op true}
|
||||
(= value "?")
|
||||
{:value value :type "QUESTION" :op true}
|
||||
(= value "#")
|
||||
{:value value :type "POUND" :op true}
|
||||
(= value "&")
|
||||
{:value value :type "AMPERSAND" :op true}
|
||||
(= value "=")
|
||||
{:value value :type "EQUALS" :op true}
|
||||
(= value "<")
|
||||
{:value value :type "L_ANG" :op true}
|
||||
(= value ">")
|
||||
{:value value :type "R_ANG" :op true}
|
||||
(= value "<=")
|
||||
{:value value :type "LTE_ANG" :op true}
|
||||
(= value ">=")
|
||||
{:value value :type "GTE_ANG" :op true}
|
||||
(= value "==")
|
||||
{:value value :type "EQ" :op true}
|
||||
(= value "===")
|
||||
{:value value :type "EQQ" :op true}
|
||||
(= value "..")
|
||||
{:value value :type "PERIOD_PERIOD" :op true}
|
||||
:else {:value value :type value :op true})
|
||||
:else {:value (or value "") :type (str type) :op false}))))
|
||||
|
||||
(define hs-eof-sentinel {:value "<<<EOF>>>" :type "EOF" :op false})
|
||||
|
||||
(define
|
||||
hs-api-list
|
||||
(fn
|
||||
(raw-tokens)
|
||||
(filter
|
||||
(fn (t) (not (= (dict-get t :type) "EOF")))
|
||||
(map hs-raw->api-token raw-tokens))))
|
||||
|
||||
(define
|
||||
hs-tokens-of
|
||||
(fn
|
||||
(src &rest args)
|
||||
(let
|
||||
((template (some (fn (a) (equal? a :template)) args)))
|
||||
(let
|
||||
((raw (if template (hs-tokenize-template src) (hs-tokenize src))))
|
||||
{:pos 0 :list (hs-api-list raw) :source src}))))
|
||||
|
||||
(define
|
||||
hs-stream-token
|
||||
(fn
|
||||
(s i)
|
||||
(let
|
||||
((lst (get s "list")) (start (get s "pos")))
|
||||
(define
|
||||
find-nth
|
||||
(fn
|
||||
(j count)
|
||||
(let
|
||||
((tok (or (nth lst j) hs-eof-sentinel)))
|
||||
(if
|
||||
(= (get tok "type") "whitespace")
|
||||
(find-nth (+ j 1) count)
|
||||
(if (= count 0) tok (find-nth (+ j 1) (- count 1)))))))
|
||||
(find-nth start i))))
|
||||
|
||||
(define
|
||||
hs-stream-consume
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((lst (get s "list")))
|
||||
(define
|
||||
skip-ws
|
||||
(fn
|
||||
(j)
|
||||
(let
|
||||
((tok (or (nth lst j) nil)))
|
||||
(if
|
||||
(and tok (= (get tok "type") "whitespace"))
|
||||
(skip-ws (+ j 1))
|
||||
j))))
|
||||
(let
|
||||
((j (skip-ws (get s "pos"))))
|
||||
(let
|
||||
((tok (or (nth lst j) hs-eof-sentinel)))
|
||||
(do
|
||||
(when
|
||||
(not (= (get tok "type") "EOF"))
|
||||
(dict-set! s :pos (+ j 1)))
|
||||
tok))))))
|
||||
|
||||
(define
|
||||
hs-stream-has-more
|
||||
(fn (s) (not (= (get (hs-stream-token s 0) "type") "EOF"))))
|
||||
|
||||
(define hs-token-type (fn (tok) (get tok "type")))
|
||||
|
||||
(define hs-token-value (fn (tok) (get tok "value")))
|
||||
|
||||
(define hs-token-op? (fn (tok) (get tok "op")))
|
||||
|
||||
@@ -334,11 +334,17 @@
|
||||
(= ch "r")
|
||||
(do (append! chars "\r") (hs-advance! 1))
|
||||
(= ch "b")
|
||||
(do (append! chars (char-from-code 8)) (hs-advance! 1))
|
||||
(do
|
||||
(append! chars (char-from-code 8))
|
||||
(hs-advance! 1))
|
||||
(= ch "f")
|
||||
(do (append! chars (char-from-code 12)) (hs-advance! 1))
|
||||
(do
|
||||
(append! chars (char-from-code 12))
|
||||
(hs-advance! 1))
|
||||
(= ch "v")
|
||||
(do (append! chars (char-from-code 11)) (hs-advance! 1))
|
||||
(do
|
||||
(append! chars (char-from-code 11))
|
||||
(hs-advance! 1))
|
||||
(= ch "\\")
|
||||
(do (append! chars "\\") (hs-advance! 1))
|
||||
(= ch quote-char)
|
||||
@@ -353,12 +359,16 @@
|
||||
(hs-hex-digit? (hs-peek 1)))
|
||||
(let
|
||||
((d1 (hs-hex-val (hs-cur)))
|
||||
(d2 (hs-hex-val (hs-peek 1))))
|
||||
(append! chars (char-from-code (+ (* d1 16) d2)))
|
||||
(d2 (hs-hex-val (hs-peek 1))))
|
||||
(append!
|
||||
chars
|
||||
(char-from-code (+ (* d1 16) d2)))
|
||||
(hs-advance! 2))
|
||||
(error "Invalid hexadecimal escape: \\x")))
|
||||
:else
|
||||
(do (append! chars "\\") (append! chars ch) (hs-advance! 1)))))
|
||||
:else (do
|
||||
(append! chars "\\")
|
||||
(append! chars ch)
|
||||
(hs-advance! 1)))))
|
||||
(loop))
|
||||
(= (hs-cur) quote-char)
|
||||
(hs-advance! 1)
|
||||
@@ -465,7 +475,13 @@
|
||||
scan!
|
||||
(fn
|
||||
()
|
||||
(skip-ws!)
|
||||
(do
|
||||
(let
|
||||
((ws-start pos))
|
||||
(skip-ws!)
|
||||
(when
|
||||
(and (> (len tokens) 0) (> pos ws-start))
|
||||
(hs-emit! "whitespace" (slice src ws-start pos) ws-start))))
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
@@ -489,6 +505,25 @@
|
||||
(do (hs-emit! "selector" (read-selector) start) (scan!))
|
||||
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
|
||||
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
|
||||
(and
|
||||
(= ch ".")
|
||||
(< (+ pos 1) src-len)
|
||||
(or
|
||||
(hs-letter? (hs-peek 1))
|
||||
(= (hs-peek 1) "-")
|
||||
(= (hs-peek 1) "_"))
|
||||
(> (len tokens) 0)
|
||||
(let
|
||||
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
|
||||
(or
|
||||
(= lt "paren-close")
|
||||
(= lt "brace-close")
|
||||
(= lt "bracket-close"))))
|
||||
(do
|
||||
(hs-emit! "dot" "." start)
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "ident" (read-ident pos) start)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch ".")
|
||||
(< (+ pos 1) src-len)
|
||||
@@ -500,6 +535,22 @@
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "class" (read-class-name pos) start)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch "#")
|
||||
(< (+ pos 1) src-len)
|
||||
(hs-ident-start? (hs-peek 1))
|
||||
(> (len tokens) 0)
|
||||
(let
|
||||
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
|
||||
(or
|
||||
(= lt "paren-close")
|
||||
(= lt "brace-close")
|
||||
(= lt "bracket-close"))))
|
||||
(do
|
||||
(hs-emit! "op" "#" start)
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "ident" (read-ident pos) start)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch "#")
|
||||
(< (+ pos 1) src-len)
|
||||
@@ -569,21 +620,7 @@
|
||||
(let
|
||||
((word (read-ident start)))
|
||||
(let
|
||||
((full-word
|
||||
(if
|
||||
(and
|
||||
(< pos src-len)
|
||||
(= (hs-cur) "'")
|
||||
(< (+ pos 1) src-len)
|
||||
(hs-letter? (hs-peek 1))
|
||||
(not
|
||||
(and
|
||||
(= (hs-peek 1) "s")
|
||||
(or
|
||||
(>= (+ pos 2) src-len)
|
||||
(not (hs-ident-char? (hs-peek 2)))))))
|
||||
(do (hs-advance! 1) (str word "'" (read-ident pos)))
|
||||
word)))
|
||||
((full-word (if (and (< pos src-len) (= (hs-cur) "'") (< (+ pos 1) src-len) (hs-letter? (hs-peek 1)) (not (and (= (hs-peek 1) "s") (or (>= (+ pos 2) src-len) (not (hs-ident-char? (hs-peek 2))))))) (do (hs-advance! 1) (str word "'" (read-ident pos))) word)))
|
||||
(hs-emit!
|
||||
(if (hs-keyword? full-word) "keyword" "ident")
|
||||
full-word
|
||||
|
||||
Reference in New Issue
Block a user