Step 18 (part 1): _hyperscript tokenizer — 38 tests
lib/hyperscript/tokenizer.sx — tokenizes real _hyperscript syntax into typed token stream. Handles: Keywords (on, set, add, toggle, if, then, from, etc.) DOM literals (.class, #id, @attr, *style, :local, <sel/>) Strings (single/double quoted, escapes), template literals Numbers (integers, decimals, time units: 100ms, 2s) Operators (==, !=, +, -, 's possessive) Punctuation (parens, brackets, braces, commas, dots) Line comments (// to EOL) Parser will disambiguate .name as class vs property access from context. Possessive 's correctly distinguished from single-quote strings. 2952/2952 tests, zero failures. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
515
lib/hyperscript/tokenizer.sx
Normal file
515
lib/hyperscript/tokenizer.sx
Normal file
@@ -0,0 +1,515 @@
|
||||
;; _hyperscript tokenizer — produces token stream from hyperscript source
|
||||
;;
|
||||
;; Tokens: {:type T :value V :pos P}
|
||||
;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style"
|
||||
;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open"
|
||||
;; "bracket-close" "brace-open" "brace-close" "comma" "colon"
|
||||
;; "template" "local" "eof"
|
||||
|
||||
;; ── Token constructor ─────────────────────────────────────────────
|
||||
|
||||
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
|
||||
|
||||
;; ── Character predicates ──────────────────────────────────────────
|
||||
|
||||
(define hs-digit? (fn (c) (and (>= c "0") (<= c "9"))))
|
||||
|
||||
(define
|
||||
hs-letter?
|
||||
(fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
|
||||
|
||||
(define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$"))))
|
||||
|
||||
(define
|
||||
hs-ident-char?
|
||||
(fn
|
||||
(c)
|
||||
(or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-"))))
|
||||
|
||||
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
|
||||
|
||||
;; ── Keyword set ───────────────────────────────────────────────────
|
||||
|
||||
(define
|
||||
hs-keywords
|
||||
(list
|
||||
"on"
|
||||
"end"
|
||||
"set"
|
||||
"to"
|
||||
"put"
|
||||
"into"
|
||||
"before"
|
||||
"after"
|
||||
"add"
|
||||
"remove"
|
||||
"toggle"
|
||||
"if"
|
||||
"else"
|
||||
"otherwise"
|
||||
"then"
|
||||
"from"
|
||||
"in"
|
||||
"of"
|
||||
"for"
|
||||
"until"
|
||||
"wait"
|
||||
"send"
|
||||
"trigger"
|
||||
"call"
|
||||
"get"
|
||||
"take"
|
||||
"log"
|
||||
"hide"
|
||||
"show"
|
||||
"repeat"
|
||||
"while"
|
||||
"times"
|
||||
"forever"
|
||||
"break"
|
||||
"continue"
|
||||
"return"
|
||||
"throw"
|
||||
"catch"
|
||||
"finally"
|
||||
"def"
|
||||
"tell"
|
||||
"make"
|
||||
"fetch"
|
||||
"as"
|
||||
"with"
|
||||
"every"
|
||||
"or"
|
||||
"and"
|
||||
"not"
|
||||
"is"
|
||||
"no"
|
||||
"the"
|
||||
"my"
|
||||
"me"
|
||||
"it"
|
||||
"its"
|
||||
"result"
|
||||
"true"
|
||||
"false"
|
||||
"null"
|
||||
"when"
|
||||
"between"
|
||||
"at"
|
||||
"by"
|
||||
"queue"
|
||||
"elsewhere"
|
||||
"event"
|
||||
"target"
|
||||
"detail"
|
||||
"sender"
|
||||
"index"
|
||||
"increment"
|
||||
"decrement"
|
||||
"append"
|
||||
"settle"
|
||||
"transition"
|
||||
"over"
|
||||
"closest"
|
||||
"next"
|
||||
"previous"
|
||||
"first"
|
||||
"last"
|
||||
"random"
|
||||
"empty"
|
||||
"exists"
|
||||
"matches"
|
||||
"contains"
|
||||
"do"
|
||||
"unless"
|
||||
"you"
|
||||
"your"
|
||||
"new"
|
||||
"init"
|
||||
"start"
|
||||
"go"
|
||||
"js"
|
||||
"less"
|
||||
"than"
|
||||
"greater"
|
||||
"class"
|
||||
"anything"))
|
||||
|
||||
(define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords)))
|
||||
|
||||
;; ── Main tokenizer ────────────────────────────────────────────────
|
||||
|
||||
(define
|
||||
hs-tokenize
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
(define
|
||||
hs-peek
|
||||
(fn
|
||||
(offset)
|
||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||
(define hs-cur (fn () (hs-peek 0)))
|
||||
(define hs-advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define
|
||||
skip-ws!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (hs-ws? (hs-cur)))
|
||||
(hs-advance! 1)
|
||||
(skip-ws!))))
|
||||
(define
|
||||
skip-comment!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (not (= (hs-cur) "\n")))
|
||||
(hs-advance! 1)
|
||||
(skip-comment!))))
|
||||
(define
|
||||
read-ident
|
||||
(fn
|
||||
(start)
|
||||
(when
|
||||
(and (< pos src-len) (hs-ident-char? (hs-cur)))
|
||||
(hs-advance! 1)
|
||||
(read-ident start))
|
||||
(slice src start pos)))
|
||||
(define
|
||||
read-number
|
||||
(fn
|
||||
(start)
|
||||
(when
|
||||
(and (< pos src-len) (hs-digit? (hs-cur)))
|
||||
(hs-advance! 1)
|
||||
(read-number start))
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(= (hs-cur) ".")
|
||||
(< (+ pos 1) src-len)
|
||||
(hs-digit? (hs-peek 1)))
|
||||
(hs-advance! 1)
|
||||
(define
|
||||
read-frac
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (hs-digit? (hs-cur)))
|
||||
(hs-advance! 1)
|
||||
(read-frac))))
|
||||
(read-frac))
|
||||
(let
|
||||
((num-end pos))
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(or (= (hs-cur) "m") (= (hs-cur) "s")))
|
||||
(if
|
||||
(and
|
||||
(= (hs-cur) "m")
|
||||
(< (+ pos 1) src-len)
|
||||
(= (hs-peek 1) "s"))
|
||||
(hs-advance! 2)
|
||||
(when (= (hs-cur) "s") (hs-advance! 1))))
|
||||
(slice src start pos))))
|
||||
(define
|
||||
read-string
|
||||
(fn
|
||||
(quote-char)
|
||||
(let
|
||||
((chars (list)))
|
||||
(hs-advance! 1)
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
(>= pos src-len)
|
||||
nil
|
||||
(= (hs-cur) "\\")
|
||||
(do
|
||||
(hs-advance! 1)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (hs-cur)))
|
||||
(cond
|
||||
(= ch "n")
|
||||
(append! chars "\n")
|
||||
(= ch "t")
|
||||
(append! chars "\t")
|
||||
(= ch "\\")
|
||||
(append! chars "\\")
|
||||
(= ch quote-char)
|
||||
(append! chars quote-char)
|
||||
:else (do (append! chars "\\") (append! chars ch)))
|
||||
(hs-advance! 1)))
|
||||
(loop))
|
||||
(= (hs-cur) quote-char)
|
||||
(hs-advance! 1)
|
||||
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
|
||||
(loop)
|
||||
(join "" chars))))
|
||||
(define
|
||||
read-template
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((chars (list)))
|
||||
(hs-advance! 1)
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
(>= pos src-len)
|
||||
nil
|
||||
(= (hs-cur) "`")
|
||||
(hs-advance! 1)
|
||||
(and
|
||||
(= (hs-cur) "$")
|
||||
(< (+ pos 1) src-len)
|
||||
(= (hs-peek 1) "{"))
|
||||
(do
|
||||
(append! chars "${")
|
||||
(hs-advance! 2)
|
||||
(let
|
||||
((depth 1))
|
||||
(define
|
||||
inner
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (> depth 0))
|
||||
(cond
|
||||
(= (hs-cur) "{")
|
||||
(do
|
||||
(set! depth (+ depth 1))
|
||||
(append! chars (hs-cur))
|
||||
(hs-advance! 1)
|
||||
(inner))
|
||||
(= (hs-cur) "}")
|
||||
(do
|
||||
(set! depth (- depth 1))
|
||||
(when (> depth 0) (append! chars (hs-cur)))
|
||||
(hs-advance! 1)
|
||||
(when (> depth 0) (inner)))
|
||||
:else (do
|
||||
(append! chars (hs-cur))
|
||||
(hs-advance! 1)
|
||||
(inner))))))
|
||||
(inner))
|
||||
(append! chars "}")
|
||||
(loop))
|
||||
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
|
||||
(loop)
|
||||
(join "" chars))))
|
||||
(define
|
||||
read-selector
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((chars (list)))
|
||||
(hs-advance! 1)
|
||||
(define
|
||||
loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
(>= pos src-len)
|
||||
nil
|
||||
(and
|
||||
(= (hs-cur) "/")
|
||||
(< (+ pos 1) src-len)
|
||||
(= (hs-peek 1) ">"))
|
||||
(hs-advance! 2)
|
||||
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
|
||||
(loop)
|
||||
(join "" chars))))
|
||||
(define
|
||||
read-class-name
|
||||
(fn
|
||||
(start)
|
||||
(when
|
||||
(and
|
||||
(< pos src-len)
|
||||
(or
|
||||
(hs-ident-char? (hs-cur))
|
||||
(= (hs-cur) ":")
|
||||
(= (hs-cur) "\\")
|
||||
(= (hs-cur) "[")
|
||||
(= (hs-cur) "]")
|
||||
(= (hs-cur) "(")
|
||||
(= (hs-cur) ")")))
|
||||
(when (= (hs-cur) "\\") (hs-advance! 1))
|
||||
(hs-advance! 1)
|
||||
(read-class-name start))
|
||||
(slice src start pos)))
|
||||
(define
|
||||
hs-emit!
|
||||
(fn
|
||||
(type value start)
|
||||
(append! tokens (hs-make-token type value start))))
|
||||
(define
|
||||
scan!
|
||||
(fn
|
||||
()
|
||||
(skip-ws!)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (hs-cur)) (start pos))
|
||||
(cond
|
||||
(and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/"))
|
||||
(do (hs-advance! 2) (skip-comment!) (scan!))
|
||||
(and
|
||||
(= ch "<")
|
||||
(< (+ pos 1) src-len)
|
||||
(not (= (hs-peek 1) "="))
|
||||
(or
|
||||
(hs-letter? (hs-peek 1))
|
||||
(= (hs-peek 1) ".")
|
||||
(= (hs-peek 1) "#")
|
||||
(= (hs-peek 1) "[")
|
||||
(= (hs-peek 1) "*")
|
||||
(= (hs-peek 1) ":")))
|
||||
(do (hs-emit! "selector" (read-selector) start) (scan!))
|
||||
(and
|
||||
(= ch ".")
|
||||
(< (+ pos 1) src-len)
|
||||
(or
|
||||
(hs-letter? (hs-peek 1))
|
||||
(= (hs-peek 1) "-")
|
||||
(= (hs-peek 1) "_")))
|
||||
(do
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "class" (read-class-name pos) start)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch "#")
|
||||
(< (+ pos 1) src-len)
|
||||
(hs-ident-start? (hs-peek 1)))
|
||||
(do
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "id" (read-ident pos) start)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch "@")
|
||||
(< (+ pos 1) src-len)
|
||||
(hs-ident-char? (hs-peek 1)))
|
||||
(do
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "attr" (read-ident pos) start)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch "*")
|
||||
(< (+ pos 1) src-len)
|
||||
(hs-letter? (hs-peek 1)))
|
||||
(do
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "style" (read-ident pos) start)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch ":")
|
||||
(< (+ pos 1) src-len)
|
||||
(hs-ident-start? (hs-peek 1)))
|
||||
(do
|
||||
(hs-advance! 1)
|
||||
(hs-emit! "local" (read-ident pos) start)
|
||||
(scan!))
|
||||
(or
|
||||
(= ch "\"")
|
||||
(and
|
||||
(= ch "'")
|
||||
(not
|
||||
(and
|
||||
(< (+ pos 1) src-len)
|
||||
(= (hs-peek 1) "s")
|
||||
(or
|
||||
(>= (+ pos 2) src-len)
|
||||
(not (hs-ident-char? (hs-peek 2))))))))
|
||||
(do (hs-emit! "string" (read-string ch) start) (scan!))
|
||||
(= ch "`")
|
||||
(do (hs-emit! "template" (read-template) start) (scan!))
|
||||
(hs-digit? ch)
|
||||
(do (hs-emit! "number" (read-number start) start) (scan!))
|
||||
(hs-ident-start? ch)
|
||||
(do
|
||||
(let
|
||||
((word (read-ident start)))
|
||||
(hs-emit!
|
||||
(if (hs-keyword? word) "keyword" "ident")
|
||||
word
|
||||
start))
|
||||
(scan!))
|
||||
(and
|
||||
(or (= ch "=") (= ch "!") (= ch "<") (= ch ">"))
|
||||
(< (+ pos 1) src-len)
|
||||
(= (hs-peek 1) "="))
|
||||
(do
|
||||
(hs-emit! "op" (str ch "=") start)
|
||||
(hs-advance! 2)
|
||||
(scan!))
|
||||
(and
|
||||
(= ch "'")
|
||||
(< (+ pos 1) src-len)
|
||||
(= (hs-peek 1) "s")
|
||||
(or
|
||||
(>= (+ pos 2) src-len)
|
||||
(not (hs-ident-char? (hs-peek 2)))))
|
||||
(do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!))
|
||||
(= ch "(")
|
||||
(do
|
||||
(hs-emit! "paren-open" "(" start)
|
||||
(hs-advance! 1)
|
||||
(scan!))
|
||||
(= ch ")")
|
||||
(do
|
||||
(hs-emit! "paren-close" ")" start)
|
||||
(hs-advance! 1)
|
||||
(scan!))
|
||||
(= ch "[")
|
||||
(do
|
||||
(hs-emit! "bracket-open" "[" start)
|
||||
(hs-advance! 1)
|
||||
(scan!))
|
||||
(= ch "]")
|
||||
(do
|
||||
(hs-emit! "bracket-close" "]" start)
|
||||
(hs-advance! 1)
|
||||
(scan!))
|
||||
(= ch "{")
|
||||
(do
|
||||
(hs-emit! "brace-open" "{" start)
|
||||
(hs-advance! 1)
|
||||
(scan!))
|
||||
(= ch "}")
|
||||
(do
|
||||
(hs-emit! "brace-close" "}" start)
|
||||
(hs-advance! 1)
|
||||
(scan!))
|
||||
(= ch ",")
|
||||
(do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!))
|
||||
(= ch "+")
|
||||
(do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!))
|
||||
(= ch "-")
|
||||
(do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!))
|
||||
(= ch "/")
|
||||
(do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!))
|
||||
(= ch "=")
|
||||
(do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!))
|
||||
(= ch "<")
|
||||
(do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!))
|
||||
(= ch ">")
|
||||
(do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!))
|
||||
(= ch "!")
|
||||
(do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!))
|
||||
(= ch ".")
|
||||
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
|
||||
:else (do (hs-advance! 1) (scan!)))))))
|
||||
(scan!)
|
||||
(hs-emit! "eof" nil pos)
|
||||
tokens)))
|
||||
Reference in New Issue
Block a user