HS: E37 tokenizer API (+17 tests)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 15s

Implements hs-tokens-of, hs-stream-token, hs-stream-consume,
hs-stream-has-more, hs-token-type, hs-token-value, hs-token-op?,
hs-raw->api-token, hs-eof-sentinel in runtime.sx.

Tokenizer emits whitespace tokens after the first content token;
stream functions skip them for look-ahead and consume. Parser
filters whitespace tokens at hs-parse entry. Dot/hash after close
brackets split into PERIOD/POUND + IDENTIFIER. Template escape \$
produces literal $.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 19:54:40 +00:00
parent 92adf9d496
commit 54b54f4e19
8 changed files with 360 additions and 16 deletions

View File

@@ -895,9 +895,15 @@
(let
((ch (nth raw i)))
(if
(and (= ch "$") (< (+ i 1) n))
(and (= ch "\\") (< (+ i 1) n) (= (nth raw (+ i 1)) "$"))
(do
(set! buf (str buf "$"))
(set! i (+ i 2))
(tpl-collect))
(if
(= (nth raw (+ i 1)) "{")
(and (= ch "$") (< (+ i 1) n))
(if
(= (nth raw (+ i 1)) "{")
(let
((start (+ i 2)))
(let
@@ -931,10 +937,10 @@
(hs-to-sx (hs-compile ident)))))
(set! i end)
(tpl-collect))))))
(do
(set! buf (str buf ch))
(set! i (+ i 1))
(tpl-collect)))))))
(do
(set! buf (str buf ch))
(set! i (+ i 1))
(tpl-collect))))))))
(tpl-collect)
(tpl-flush)
(cons (quote str) parts))))

View File

@@ -9,7 +9,9 @@
(fn
(tokens src)
(let
((p 0) (tok-len (len tokens)))
((tokens (filter (fn (t) (not (= (get t "type") "whitespace"))) tokens))
(p 0)
(tok-len (len (filter (fn (t) (not (= (get t "type") "whitespace"))) tokens))))
(define tp (fn () (if (< p tok-len) (nth tokens p) nil)))
(define
tp-type

View File

@@ -2615,3 +2615,144 @@
(raise (host-get state "value"))
(if state (host-get state "value") result)))
result)))))
(define
hs-raw->api-token
(fn
(raw)
(let
((type (dict-get raw :type)) (value (dict-get raw :value)))
(cond
(= type "ident")
{:value value :type "IDENTIFIER" :op false}
(= type "keyword")
{:value value :type "IDENTIFIER" :op false}
(= type "number")
{:value value :type "NUMBER" :op false}
(= type "string")
{:value value :type "STRING" :op false}
(= type "class")
{:value (str "." value) :type "CLASS_REF" :op false}
(= type "id")
{:value (str "#" value) :type "ID_REF" :op false}
(= type "attr")
{:value value :type "ATTRIBUTE_REF" :op false}
(= type "style")
{:value value :type "STYLE_REF" :op false}
(= type "selector")
{:value value :type "QUERY_REF" :op false}
(= type "eof")
{:value "<<<EOF>>>" :type "EOF" :op false}
(= type "paren-open")
{:value value :type "L_PAREN" :op true}
(= type "paren-close")
{:value value :type "R_PAREN" :op true}
(= type "bracket-open")
{:value value :type "L_BRACKET" :op true}
(= type "bracket-close")
{:value value :type "R_BRACKET" :op true}
(= type "brace-open")
{:value value :type "L_BRACE" :op true}
(= type "brace-close")
{:value value :type "R_BRACE" :op true}
(= type "comma")
{:value value :type "COMMA" :op true}
(= type "dot")
{:value value :type "PERIOD" :op true}
(= type "colon")
{:value value :type "COLON" :op true}
(= type "op")
(cond
(= value "+") {:value value :type "PLUS" :op true}
(= value "-") {:value value :type "MINUS" :op true}
(= value "*") {:value value :type "MULTIPLY" :op true}
(= value "/") {:value value :type "SLASH" :op true}
(= value "!") {:value value :type "EXCLAMATION" :op true}
(= value "?") {:value value :type "QUESTION" :op true}
(= value "#") {:value value :type "POUND" :op true}
(= value "&") {:value value :type "AMPERSAND" :op true}
(= value "=") {:value value :type "EQUALS" :op true}
(= value "<") {:value value :type "L_ANG" :op true}
(= value ">") {:value value :type "R_ANG" :op true}
(= value "<=") {:value value :type "LTE_ANG" :op true}
(= value ">=") {:value value :type "GTE_ANG" :op true}
(= value "==") {:value value :type "EQ" :op true}
(= value "===") {:value value :type "EQQ" :op true}
(= value "..") {:value value :type "PERIOD_PERIOD" :op true}
:else {:value value :type value :op true})
:else {:value (or value "") :type (str type) :op false}))))
(define hs-eof-sentinel {:value "<<<EOF>>>" :type "EOF" :op false})
(define
hs-tokens-of
(fn
(src &rest args)
(let
((template (some (fn (a) (equal? a :template)) args)))
(let
((raw (if template (hs-tokenize-template src) (hs-tokenize src))))
{:pos 0 :list (filter (fn (t) (not (= (dict-get t :type) "EOF"))) (map hs-raw->api-token raw)) :source src}))))
(define
hs-stream-token
(fn
(s i)
(let
((lst (dict-get s :list))
(n (len (dict-get s :list))))
(define
find
(fn
(pos count)
(if
(>= pos n)
hs-eof-sentinel
(let
((tok (nth lst pos)))
(if
(= (dict-get tok :type) "whitespace")
(find (+ pos 1) count)
(if
(= count 0)
tok
(find (+ pos 1) (- count 1))))))))
(find (dict-get s :pos) i))))
(define
hs-stream-consume
(fn
(s)
(let
((lst (dict-get s :list))
(n (len (dict-get s :list))))
(define
find-pos
(fn
(pos)
(if
(>= pos n)
pos
(if
(= (dict-get (nth lst pos) :type) "whitespace")
(find-pos (+ pos 1))
pos))))
(let
((p (find-pos (dict-get s :pos))))
(let
((tok (if (>= p n) hs-eof-sentinel (nth lst p))))
(do
(when
(not (= (dict-get tok :type) "EOF"))
(dict-set! s :pos (+ p 1)))
tok))))))
(define
hs-stream-has-more
(fn (s) (not (= (dict-get (hs-stream-token s 0) :type) "EOF"))))
(define hs-token-type (fn (tok) (dict-get tok :type)))
(define hs-token-value (fn (tok) (dict-get tok :value)))
(define hs-token-op? (fn (tok) (dict-get tok :op)))

View File

@@ -465,7 +465,12 @@
scan!
(fn
()
(skip-ws!)
(let
((ws-start pos))
(skip-ws!)
(when
(and (> (len tokens) 0) (> pos ws-start))
(hs-emit! "whitespace" (slice src ws-start pos) ws-start)))
(when
(< pos src-len)
(let
@@ -489,6 +494,15 @@
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
(or (hs-letter? (hs-peek 1)) (= (hs-peek 1) "-") (= (hs-peek 1) "_"))
(> (len tokens) 0)
(let
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
(or (= lt "paren-close") (= lt "brace-close") (= lt "bracket-close"))))
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
@@ -500,6 +514,15 @@
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1))
(> (len tokens) 0)
(let
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
(or (= lt "paren-close") (= lt "brace-close") (= lt "bracket-close"))))
(do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)