HS: E37 tokenizer API (+17 tests)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 15s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 15s
Implements hs-tokens-of, hs-stream-token, hs-stream-consume, hs-stream-has-more, hs-token-type, hs-token-value, hs-token-op?, hs-raw->api-token, hs-eof-sentinel in runtime.sx. Tokenizer emits whitespace tokens after the first content token; stream functions skip them for look-ahead and consume. Parser filters whitespace tokens at hs-parse entry. Dot/hash after close brackets split into PERIOD/POUND + IDENTIFIER. Template escape \$ produces literal $. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2615,3 +2615,144 @@
|
||||
(raise (host-get state "value"))
|
||||
(if state (host-get state "value") result)))
|
||||
result)))))
|
||||
|
||||
(define
|
||||
hs-raw->api-token
|
||||
(fn
|
||||
(raw)
|
||||
(let
|
||||
((type (dict-get raw :type)) (value (dict-get raw :value)))
|
||||
(cond
|
||||
(= type "ident")
|
||||
{:value value :type "IDENTIFIER" :op false}
|
||||
(= type "keyword")
|
||||
{:value value :type "IDENTIFIER" :op false}
|
||||
(= type "number")
|
||||
{:value value :type "NUMBER" :op false}
|
||||
(= type "string")
|
||||
{:value value :type "STRING" :op false}
|
||||
(= type "class")
|
||||
{:value (str "." value) :type "CLASS_REF" :op false}
|
||||
(= type "id")
|
||||
{:value (str "#" value) :type "ID_REF" :op false}
|
||||
(= type "attr")
|
||||
{:value value :type "ATTRIBUTE_REF" :op false}
|
||||
(= type "style")
|
||||
{:value value :type "STYLE_REF" :op false}
|
||||
(= type "selector")
|
||||
{:value value :type "QUERY_REF" :op false}
|
||||
(= type "eof")
|
||||
{:value "<<<EOF>>>" :type "EOF" :op false}
|
||||
(= type "paren-open")
|
||||
{:value value :type "L_PAREN" :op true}
|
||||
(= type "paren-close")
|
||||
{:value value :type "R_PAREN" :op true}
|
||||
(= type "bracket-open")
|
||||
{:value value :type "L_BRACKET" :op true}
|
||||
(= type "bracket-close")
|
||||
{:value value :type "R_BRACKET" :op true}
|
||||
(= type "brace-open")
|
||||
{:value value :type "L_BRACE" :op true}
|
||||
(= type "brace-close")
|
||||
{:value value :type "R_BRACE" :op true}
|
||||
(= type "comma")
|
||||
{:value value :type "COMMA" :op true}
|
||||
(= type "dot")
|
||||
{:value value :type "PERIOD" :op true}
|
||||
(= type "colon")
|
||||
{:value value :type "COLON" :op true}
|
||||
(= type "op")
|
||||
(cond
|
||||
(= value "+") {:value value :type "PLUS" :op true}
|
||||
(= value "-") {:value value :type "MINUS" :op true}
|
||||
(= value "*") {:value value :type "MULTIPLY" :op true}
|
||||
(= value "/") {:value value :type "SLASH" :op true}
|
||||
(= value "!") {:value value :type "EXCLAMATION" :op true}
|
||||
(= value "?") {:value value :type "QUESTION" :op true}
|
||||
(= value "#") {:value value :type "POUND" :op true}
|
||||
(= value "&") {:value value :type "AMPERSAND" :op true}
|
||||
(= value "=") {:value value :type "EQUALS" :op true}
|
||||
(= value "<") {:value value :type "L_ANG" :op true}
|
||||
(= value ">") {:value value :type "R_ANG" :op true}
|
||||
(= value "<=") {:value value :type "LTE_ANG" :op true}
|
||||
(= value ">=") {:value value :type "GTE_ANG" :op true}
|
||||
(= value "==") {:value value :type "EQ" :op true}
|
||||
(= value "===") {:value value :type "EQQ" :op true}
|
||||
(= value "..") {:value value :type "PERIOD_PERIOD" :op true}
|
||||
:else {:value value :type value :op true})
|
||||
:else {:value (or value "") :type (str type) :op false}))))
|
||||
|
||||
(define hs-eof-sentinel {:value "<<<EOF>>>" :type "EOF" :op false})
|
||||
|
||||
(define
|
||||
hs-tokens-of
|
||||
(fn
|
||||
(src &rest args)
|
||||
(let
|
||||
((template (some (fn (a) (equal? a :template)) args)))
|
||||
(let
|
||||
((raw (if template (hs-tokenize-template src) (hs-tokenize src))))
|
||||
{:pos 0 :list (filter (fn (t) (not (= (dict-get t :type) "EOF"))) (map hs-raw->api-token raw)) :source src}))))
|
||||
|
||||
(define
|
||||
hs-stream-token
|
||||
(fn
|
||||
(s i)
|
||||
(let
|
||||
((lst (dict-get s :list))
|
||||
(n (len (dict-get s :list))))
|
||||
(define
|
||||
find
|
||||
(fn
|
||||
(pos count)
|
||||
(if
|
||||
(>= pos n)
|
||||
hs-eof-sentinel
|
||||
(let
|
||||
((tok (nth lst pos)))
|
||||
(if
|
||||
(= (dict-get tok :type) "whitespace")
|
||||
(find (+ pos 1) count)
|
||||
(if
|
||||
(= count 0)
|
||||
tok
|
||||
(find (+ pos 1) (- count 1))))))))
|
||||
(find (dict-get s :pos) i))))
|
||||
|
||||
(define
|
||||
hs-stream-consume
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((lst (dict-get s :list))
|
||||
(n (len (dict-get s :list))))
|
||||
(define
|
||||
find-pos
|
||||
(fn
|
||||
(pos)
|
||||
(if
|
||||
(>= pos n)
|
||||
pos
|
||||
(if
|
||||
(= (dict-get (nth lst pos) :type) "whitespace")
|
||||
(find-pos (+ pos 1))
|
||||
pos))))
|
||||
(let
|
||||
((p (find-pos (dict-get s :pos))))
|
||||
(let
|
||||
((tok (if (>= p n) hs-eof-sentinel (nth lst p))))
|
||||
(do
|
||||
(when
|
||||
(not (= (dict-get tok :type) "EOF"))
|
||||
(dict-set! s :pos (+ p 1)))
|
||||
tok))))))
|
||||
|
||||
(define
|
||||
hs-stream-has-more
|
||||
(fn (s) (not (= (dict-get (hs-stream-token s 0) :type) "EOF"))))
|
||||
|
||||
(define hs-token-type (fn (tok) (dict-get tok :type)))
|
||||
|
||||
(define hs-token-value (fn (tok) (dict-get tok :value)))
|
||||
|
||||
(define hs-token-op? (fn (tok) (dict-get tok :op)))
|
||||
|
||||
Reference in New Issue
Block a user