HS: E37 tokenizer API (+17 tests)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 15s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 15s
Implements hs-tokens-of, hs-stream-token, hs-stream-consume, hs-stream-has-more, hs-token-type, hs-token-value, hs-token-op?, hs-raw->api-token, hs-eof-sentinel in runtime.sx. Tokenizer emits whitespace tokens after the first content token; stream functions skip them for look-ahead and consume. Parser filters whitespace tokens at hs-parse entry. Dot/hash after close brackets split into PERIOD/POUND + IDENTIFIER. Template escape \$ produces literal $. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -895,9 +895,15 @@
|
||||
(let
|
||||
((ch (nth raw i)))
|
||||
(if
|
||||
(and (= ch "$") (< (+ i 1) n))
|
||||
(and (= ch "\\") (< (+ i 1) n) (= (nth raw (+ i 1)) "$"))
|
||||
(do
|
||||
(set! buf (str buf "$"))
|
||||
(set! i (+ i 2))
|
||||
(tpl-collect))
|
||||
(if
|
||||
(= (nth raw (+ i 1)) "{")
|
||||
(and (= ch "$") (< (+ i 1) n))
|
||||
(if
|
||||
(= (nth raw (+ i 1)) "{")
|
||||
(let
|
||||
((start (+ i 2)))
|
||||
(let
|
||||
@@ -931,10 +937,10 @@
|
||||
(hs-to-sx (hs-compile ident)))))
|
||||
(set! i end)
|
||||
(tpl-collect))))))
|
||||
(do
|
||||
(set! buf (str buf ch))
|
||||
(set! i (+ i 1))
|
||||
(tpl-collect)))))))
|
||||
(do
|
||||
(set! buf (str buf ch))
|
||||
(set! i (+ i 1))
|
||||
(tpl-collect))))))))
|
||||
(tpl-collect)
|
||||
(tpl-flush)
|
||||
(cons (quote str) parts))))
|
||||
|
||||
@@ -9,7 +9,9 @@
|
||||
(fn
|
||||
(tokens src)
|
||||
(let
|
||||
((p 0) (tok-len (len tokens)))
|
||||
((tokens (filter (fn (t) (not (= (get t "type") "whitespace"))) tokens))
|
||||
(p 0)
|
||||
(tok-len (len (filter (fn (t) (not (= (get t "type") "whitespace"))) tokens))))
|
||||
(define tp (fn () (if (< p tok-len) (nth tokens p) nil)))
|
||||
(define
|
||||
tp-type
|
||||
|
||||
@@ -2615,3 +2615,144 @@
|
||||
(raise (host-get state "value"))
|
||||
(if state (host-get state "value") result)))
|
||||
result)))))
|
||||
|
||||
(define
|
||||
hs-raw->api-token
|
||||
(fn
|
||||
(raw)
|
||||
(let
|
||||
((type (dict-get raw :type)) (value (dict-get raw :value)))
|
||||
(cond
|
||||
(= type "ident")
|
||||
{:value value :type "IDENTIFIER" :op false}
|
||||
(= type "keyword")
|
||||
{:value value :type "IDENTIFIER" :op false}
|
||||
(= type "number")
|
||||
{:value value :type "NUMBER" :op false}
|
||||
(= type "string")
|
||||
{:value value :type "STRING" :op false}
|
||||
(= type "class")
|
||||
{:value (str "." value) :type "CLASS_REF" :op false}
|
||||
(= type "id")
|
||||
{:value (str "#" value) :type "ID_REF" :op false}
|
||||
(= type "attr")
|
||||
{:value value :type "ATTRIBUTE_REF" :op false}
|
||||
(= type "style")
|
||||
{:value value :type "STYLE_REF" :op false}
|
||||
(= type "selector")
|
||||
{:value value :type "QUERY_REF" :op false}
|
||||
(= type "eof")
|
||||
{:value "<<<EOF>>>" :type "EOF" :op false}
|
||||
(= type "paren-open")
|
||||
{:value value :type "L_PAREN" :op true}
|
||||
(= type "paren-close")
|
||||
{:value value :type "R_PAREN" :op true}
|
||||
(= type "bracket-open")
|
||||
{:value value :type "L_BRACKET" :op true}
|
||||
(= type "bracket-close")
|
||||
{:value value :type "R_BRACKET" :op true}
|
||||
(= type "brace-open")
|
||||
{:value value :type "L_BRACE" :op true}
|
||||
(= type "brace-close")
|
||||
{:value value :type "R_BRACE" :op true}
|
||||
(= type "comma")
|
||||
{:value value :type "COMMA" :op true}
|
||||
(= type "dot")
|
||||
{:value value :type "PERIOD" :op true}
|
||||
(= type "colon")
|
||||
{:value value :type "COLON" :op true}
|
||||
(= type "op")
|
||||
(cond
|
||||
(= value "+") {:value value :type "PLUS" :op true}
|
||||
(= value "-") {:value value :type "MINUS" :op true}
|
||||
(= value "*") {:value value :type "MULTIPLY" :op true}
|
||||
(= value "/") {:value value :type "SLASH" :op true}
|
||||
(= value "!") {:value value :type "EXCLAMATION" :op true}
|
||||
(= value "?") {:value value :type "QUESTION" :op true}
|
||||
(= value "#") {:value value :type "POUND" :op true}
|
||||
(= value "&") {:value value :type "AMPERSAND" :op true}
|
||||
(= value "=") {:value value :type "EQUALS" :op true}
|
||||
(= value "<") {:value value :type "L_ANG" :op true}
|
||||
(= value ">") {:value value :type "R_ANG" :op true}
|
||||
(= value "<=") {:value value :type "LTE_ANG" :op true}
|
||||
(= value ">=") {:value value :type "GTE_ANG" :op true}
|
||||
(= value "==") {:value value :type "EQ" :op true}
|
||||
(= value "===") {:value value :type "EQQ" :op true}
|
||||
(= value "..") {:value value :type "PERIOD_PERIOD" :op true}
|
||||
:else {:value value :type value :op true})
|
||||
:else {:value (or value "") :type (str type) :op false}))))
|
||||
|
||||
(define hs-eof-sentinel {:value "<<<EOF>>>" :type "EOF" :op false})
|
||||
|
||||
(define
|
||||
hs-tokens-of
|
||||
(fn
|
||||
(src &rest args)
|
||||
(let
|
||||
((template (some (fn (a) (equal? a :template)) args)))
|
||||
(let
|
||||
((raw (if template (hs-tokenize-template src) (hs-tokenize src))))
|
||||
{:pos 0 :list (filter (fn (t) (not (= (dict-get t :type) "EOF"))) (map hs-raw->api-token raw)) :source src}))))
|
||||
|
||||
(define
|
||||
hs-stream-token
|
||||
(fn
|
||||
(s i)
|
||||
(let
|
||||
((lst (dict-get s :list))
|
||||
(n (len (dict-get s :list))))
|
||||
(define
|
||||
find
|
||||
(fn
|
||||
(pos count)
|
||||
(if
|
||||
(>= pos n)
|
||||
hs-eof-sentinel
|
||||
(let
|
||||
((tok (nth lst pos)))
|
||||
(if
|
||||
(= (dict-get tok :type) "whitespace")
|
||||
(find (+ pos 1) count)
|
||||
(if
|
||||
(= count 0)
|
||||
tok
|
||||
(find (+ pos 1) (- count 1))))))))
|
||||
(find (dict-get s :pos) i))))
|
||||
|
||||
(define
|
||||
hs-stream-consume
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((lst (dict-get s :list))
|
||||
(n (len (dict-get s :list))))
|
||||
(define
|
||||
find-pos
|
||||
(fn
|
||||
(pos)
|
||||
(if
|
||||
(>= pos n)
|
||||
pos
|
||||
(if
|
||||
(= (dict-get (nth lst pos) :type) "whitespace")
|
||||
(find-pos (+ pos 1))
|
||||
pos))))
|
||||
(let
|
||||
((p (find-pos (dict-get s :pos))))
|
||||
(let
|
||||
((tok (if (>= p n) hs-eof-sentinel (nth lst p))))
|
||||
(do
|
||||
(when
|
||||
(not (= (dict-get tok :type) "EOF"))
|
||||
(dict-set! s :pos (+ p 1)))
|
||||
tok))))))
|
||||
|
||||
(define
|
||||
hs-stream-has-more
|
||||
(fn (s) (not (= (dict-get (hs-stream-token s 0) :type) "EOF"))))
|
||||
|
||||
(define hs-token-type (fn (tok) (dict-get tok :type)))
|
||||
|
||||
(define hs-token-value (fn (tok) (dict-get tok :value)))
|
||||
|
||||
(define hs-token-op? (fn (tok) (dict-get tok :op)))
|
||||
|
||||
@@ -465,7 +465,12 @@
|
||||
scan!
|
||||
(fn
|
||||
()
|
||||
(skip-ws!)
|
||||
(let
|
||||
((ws-start pos))
|
||||
(skip-ws!)
|
||||
(when
|
||||
(and (> (len tokens) 0) (> pos ws-start))
|
||||
(hs-emit! "whitespace" (slice src ws-start pos) ws-start)))
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
@@ -489,6 +494,15 @@
|
||||
(do (hs-emit! "selector" (read-selector) start) (scan!))
|
||||
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
|
||||
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
|
||||
(and
|
||||
(= ch ".")
|
||||
(< (+ pos 1) src-len)
|
||||
(or (hs-letter? (hs-peek 1)) (= (hs-peek 1) "-") (= (hs-peek 1) "_"))
|
||||
(> (len tokens) 0)
|
||||
(let
|
||||
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
|
||||
(or (= lt "paren-close") (= lt "brace-close") (= lt "bracket-close"))))
|
||||
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
|
||||
(and
|
||||
(= ch ".")
|
||||
(< (+ pos 1) src-len)
|
||||
@@ -500,6 +514,15 @@
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "class" (read-class-name pos) start)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch "#")
|
||||
(< (+ pos 1) src-len)
|
||||
(hs-ident-start? (hs-peek 1))
|
||||
(> (len tokens) 0)
|
||||
(let
|
||||
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
|
||||
(or (= lt "paren-close") (= lt "brace-close") (= lt "bracket-close"))))
|
||||
(do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!))
|
||||
(and
|
||||
(= ch "#")
|
||||
(< (+ pos 1) src-len)
|
||||
|
||||
@@ -895,9 +895,15 @@
|
||||
(let
|
||||
((ch (nth raw i)))
|
||||
(if
|
||||
(and (= ch "$") (< (+ i 1) n))
|
||||
(and (= ch "\\") (< (+ i 1) n) (= (nth raw (+ i 1)) "$"))
|
||||
(do
|
||||
(set! buf (str buf "$"))
|
||||
(set! i (+ i 2))
|
||||
(tpl-collect))
|
||||
(if
|
||||
(= (nth raw (+ i 1)) "{")
|
||||
(and (= ch "$") (< (+ i 1) n))
|
||||
(if
|
||||
(= (nth raw (+ i 1)) "{")
|
||||
(let
|
||||
((start (+ i 2)))
|
||||
(let
|
||||
@@ -931,10 +937,10 @@
|
||||
(hs-to-sx (hs-compile ident)))))
|
||||
(set! i end)
|
||||
(tpl-collect))))))
|
||||
(do
|
||||
(set! buf (str buf ch))
|
||||
(set! i (+ i 1))
|
||||
(tpl-collect)))))))
|
||||
(do
|
||||
(set! buf (str buf ch))
|
||||
(set! i (+ i 1))
|
||||
(tpl-collect))))))))
|
||||
(tpl-collect)
|
||||
(tpl-flush)
|
||||
(cons (quote str) parts))))
|
||||
|
||||
@@ -9,7 +9,9 @@
|
||||
(fn
|
||||
(tokens src)
|
||||
(let
|
||||
((p 0) (tok-len (len tokens)))
|
||||
((tokens (filter (fn (t) (not (= (get t "type") "whitespace"))) tokens))
|
||||
(p 0)
|
||||
(tok-len (len (filter (fn (t) (not (= (get t "type") "whitespace"))) tokens))))
|
||||
(define tp (fn () (if (< p tok-len) (nth tokens p) nil)))
|
||||
(define
|
||||
tp-type
|
||||
|
||||
@@ -2615,3 +2615,144 @@
|
||||
(raise (host-get state "value"))
|
||||
(if state (host-get state "value") result)))
|
||||
result)))))
|
||||
|
||||
(define
|
||||
hs-raw->api-token
|
||||
(fn
|
||||
(raw)
|
||||
(let
|
||||
((type (dict-get raw :type)) (value (dict-get raw :value)))
|
||||
(cond
|
||||
(= type "ident")
|
||||
{:value value :type "IDENTIFIER" :op false}
|
||||
(= type "keyword")
|
||||
{:value value :type "IDENTIFIER" :op false}
|
||||
(= type "number")
|
||||
{:value value :type "NUMBER" :op false}
|
||||
(= type "string")
|
||||
{:value value :type "STRING" :op false}
|
||||
(= type "class")
|
||||
{:value (str "." value) :type "CLASS_REF" :op false}
|
||||
(= type "id")
|
||||
{:value (str "#" value) :type "ID_REF" :op false}
|
||||
(= type "attr")
|
||||
{:value value :type "ATTRIBUTE_REF" :op false}
|
||||
(= type "style")
|
||||
{:value value :type "STYLE_REF" :op false}
|
||||
(= type "selector")
|
||||
{:value value :type "QUERY_REF" :op false}
|
||||
(= type "eof")
|
||||
{:value "<<<EOF>>>" :type "EOF" :op false}
|
||||
(= type "paren-open")
|
||||
{:value value :type "L_PAREN" :op true}
|
||||
(= type "paren-close")
|
||||
{:value value :type "R_PAREN" :op true}
|
||||
(= type "bracket-open")
|
||||
{:value value :type "L_BRACKET" :op true}
|
||||
(= type "bracket-close")
|
||||
{:value value :type "R_BRACKET" :op true}
|
||||
(= type "brace-open")
|
||||
{:value value :type "L_BRACE" :op true}
|
||||
(= type "brace-close")
|
||||
{:value value :type "R_BRACE" :op true}
|
||||
(= type "comma")
|
||||
{:value value :type "COMMA" :op true}
|
||||
(= type "dot")
|
||||
{:value value :type "PERIOD" :op true}
|
||||
(= type "colon")
|
||||
{:value value :type "COLON" :op true}
|
||||
(= type "op")
|
||||
(cond
|
||||
(= value "+") {:value value :type "PLUS" :op true}
|
||||
(= value "-") {:value value :type "MINUS" :op true}
|
||||
(= value "*") {:value value :type "MULTIPLY" :op true}
|
||||
(= value "/") {:value value :type "SLASH" :op true}
|
||||
(= value "!") {:value value :type "EXCLAMATION" :op true}
|
||||
(= value "?") {:value value :type "QUESTION" :op true}
|
||||
(= value "#") {:value value :type "POUND" :op true}
|
||||
(= value "&") {:value value :type "AMPERSAND" :op true}
|
||||
(= value "=") {:value value :type "EQUALS" :op true}
|
||||
(= value "<") {:value value :type "L_ANG" :op true}
|
||||
(= value ">") {:value value :type "R_ANG" :op true}
|
||||
(= value "<=") {:value value :type "LTE_ANG" :op true}
|
||||
(= value ">=") {:value value :type "GTE_ANG" :op true}
|
||||
(= value "==") {:value value :type "EQ" :op true}
|
||||
(= value "===") {:value value :type "EQQ" :op true}
|
||||
(= value "..") {:value value :type "PERIOD_PERIOD" :op true}
|
||||
:else {:value value :type value :op true})
|
||||
:else {:value (or value "") :type (str type) :op false}))))
|
||||
|
||||
(define hs-eof-sentinel {:value "<<<EOF>>>" :type "EOF" :op false})
|
||||
|
||||
(define
|
||||
hs-tokens-of
|
||||
(fn
|
||||
(src &rest args)
|
||||
(let
|
||||
((template (some (fn (a) (equal? a :template)) args)))
|
||||
(let
|
||||
((raw (if template (hs-tokenize-template src) (hs-tokenize src))))
|
||||
{:pos 0 :list (filter (fn (t) (not (= (dict-get t :type) "EOF"))) (map hs-raw->api-token raw)) :source src}))))
|
||||
|
||||
(define
|
||||
hs-stream-token
|
||||
(fn
|
||||
(s i)
|
||||
(let
|
||||
((lst (dict-get s :list))
|
||||
(n (len (dict-get s :list))))
|
||||
(define
|
||||
find
|
||||
(fn
|
||||
(pos count)
|
||||
(if
|
||||
(>= pos n)
|
||||
hs-eof-sentinel
|
||||
(let
|
||||
((tok (nth lst pos)))
|
||||
(if
|
||||
(= (dict-get tok :type) "whitespace")
|
||||
(find (+ pos 1) count)
|
||||
(if
|
||||
(= count 0)
|
||||
tok
|
||||
(find (+ pos 1) (- count 1))))))))
|
||||
(find (dict-get s :pos) i))))
|
||||
|
||||
(define
|
||||
hs-stream-consume
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((lst (dict-get s :list))
|
||||
(n (len (dict-get s :list))))
|
||||
(define
|
||||
find-pos
|
||||
(fn
|
||||
(pos)
|
||||
(if
|
||||
(>= pos n)
|
||||
pos
|
||||
(if
|
||||
(= (dict-get (nth lst pos) :type) "whitespace")
|
||||
(find-pos (+ pos 1))
|
||||
pos))))
|
||||
(let
|
||||
((p (find-pos (dict-get s :pos))))
|
||||
(let
|
||||
((tok (if (>= p n) hs-eof-sentinel (nth lst p))))
|
||||
(do
|
||||
(when
|
||||
(not (= (dict-get tok :type) "EOF"))
|
||||
(dict-set! s :pos (+ p 1)))
|
||||
tok))))))
|
||||
|
||||
(define
|
||||
hs-stream-has-more
|
||||
(fn (s) (not (= (dict-get (hs-stream-token s 0) :type) "EOF"))))
|
||||
|
||||
(define hs-token-type (fn (tok) (dict-get tok :type)))
|
||||
|
||||
(define hs-token-value (fn (tok) (dict-get tok :value)))
|
||||
|
||||
(define hs-token-op? (fn (tok) (dict-get tok :op)))
|
||||
|
||||
@@ -465,7 +465,12 @@
|
||||
scan!
|
||||
(fn
|
||||
()
|
||||
(skip-ws!)
|
||||
(let
|
||||
((ws-start pos))
|
||||
(skip-ws!)
|
||||
(when
|
||||
(and (> (len tokens) 0) (> pos ws-start))
|
||||
(hs-emit! "whitespace" (slice src ws-start pos) ws-start)))
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
@@ -489,6 +494,15 @@
|
||||
(do (hs-emit! "selector" (read-selector) start) (scan!))
|
||||
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
|
||||
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
|
||||
(and
|
||||
(= ch ".")
|
||||
(< (+ pos 1) src-len)
|
||||
(or (hs-letter? (hs-peek 1)) (= (hs-peek 1) "-") (= (hs-peek 1) "_"))
|
||||
(> (len tokens) 0)
|
||||
(let
|
||||
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
|
||||
(or (= lt "paren-close") (= lt "brace-close") (= lt "bracket-close"))))
|
||||
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
|
||||
(and
|
||||
(= ch ".")
|
||||
(< (+ pos 1) src-len)
|
||||
@@ -500,6 +514,15 @@
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "class" (read-class-name pos) start)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch "#")
|
||||
(< (+ pos 1) src-len)
|
||||
(hs-ident-start? (hs-peek 1))
|
||||
(> (len tokens) 0)
|
||||
(let
|
||||
((lt (dict-get (nth tokens (- (len tokens) 1)) :type)))
|
||||
(or (= lt "paren-close") (= lt "brace-close") (= lt "bracket-close"))))
|
||||
(do (hs-emit! "op" "#" start) (hs-advance! 1) (scan!))
|
||||
(and
|
||||
(= ch "#")
|
||||
(< (+ pos 1) src-len)
|
||||
|
||||
Reference in New Issue
Block a user