Files
rose-ash/lib/hyperscript/tokenizer.sx
giles 802ccd23e8 HS: fix empty/halt/morph/reset/dialog — 17 upstream tests pass
- parser `empty` no-target → (ref "me") (was bogus (sym "me"))
- parser `halt` modes distinguish: "all"/"bubbling"/"default" halt execution
  (raise hs-return), "the-event"/"the event's" only stop propagation/default.
  "'s" now matched as op token, not keyword.
- parser `get` cmd: dispatch + cmd-kw list + parse-get-cmd (parses expr with
  optional `as TYPE`). Required for `get result as JSON` in fetch chains.
- compiler empty-target for (local X): emit (set! X (hs-empty-like X)) so
  arrays/sets/maps clear the variable, not call DOM empty on the value.
- runtime hs-empty-like: container-of-same-type empty value.
- runtime hs-empty-target!: drop dead FORM branch that was short-circuiting
  to innerHTML=""; the querySelectorAll-over-inputs branch now runs.
- runtime hs-halt!: take ev param (was free `event` lookup); raise hs-return
  to stop execution unless mode is "the-event".
- runtime hs-reset!: type-aware — FORM → reset, INPUT/TEXTAREA → value/checked
  from defaults, SELECT → defaultSelected option.
- runtime hs-open!/hs-close!: toggle `open` attribute on details elements
  (not just the prop) so dom-has-attr? assertions work.
- runtime hs-coerce JSON: json-stringify dict/list (was str).
- test-runner mock: host-get on List + "length"/"size" (was only Dict);
  dom-set-attr tracks defaultChecked / defaultSelected / defaultValue;
  mock_query_all supports comma-separated selector groups.
- generator: emit boolean attrs (checked/selected/etc) even with null value;
  drop overcautious "skip HS with bare quotes or embedded HTML" guard so
  morph tests (source contains embedded <div>) emit properly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 15:36:01 +00:00

619 lines
18 KiB
Plaintext

;; _hyperscript tokenizer — produces token stream from hyperscript source
;;
;; Tokens: {:type T :value V :pos P}
;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style"
;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open"
;; "bracket-close" "brace-open" "brace-close" "comma" "colon"
;; "template" "local" "eof"
;; ── Token constructor ─────────────────────────────────────────────
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
(define hs-digit? (fn (c) (and (>= c "0") (<= c "9"))))
(define
hs-letter?
(fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
(define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$"))))
(define
hs-ident-char?
(fn
(c)
(or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-"))))
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; ── Keyword set ───────────────────────────────────────────────────
(define
hs-keywords
(list
"on"
"end"
"set"
"to"
"put"
"into"
"before"
"after"
"add"
"remove"
"toggle"
"if"
"else"
"otherwise"
"then"
"from"
"in"
"of"
"for"
"until"
"wait"
"send"
"trigger"
"call"
"get"
"take"
"log"
"hide"
"show"
"repeat"
"while"
"times"
"forever"
"break"
"continue"
"return"
"throw"
"catch"
"finally"
"def"
"tell"
"make"
"fetch"
"as"
"with"
"every"
"or"
"and"
"not"
"is"
"no"
"the"
"my"
"me"
"it"
"its"
"result"
"true"
"false"
"null"
"when"
"between"
"at"
"by"
"queue"
"elsewhere"
"event"
"target"
"detail"
"sender"
"index"
"indexed"
"increment"
"decrement"
"append"
"settle"
"transition"
"over"
"closest"
"next"
"previous"
"first"
"last"
"random"
"pick"
"empty"
"clear"
"swap"
"open"
"close"
"exists"
"matches"
"contains"
"do"
"unless"
"you"
"your"
"new"
"init"
"start"
"go"
"js"
"less"
"than"
"greater"
"class"
"anything"
"install"
"measure"
"behavior"
"called"
"render"
"eval"
"I"
"am"
"does"
"some"
"mod"
"equal"
"equals"
"really"
"include"
"includes"
"contain"
"undefined"
"exist"
"match"
"beep"
"where"
"sorted"
"mapped"
"split"
"joined"
"descending"
"ascending"
"scroll"
"select"
"reset"
"default"
"halt"
"precedes"
"precede"
"follow"
"follows"
"ignoring"
"case"
"changes"
"focus"
"blur"
"dom"
"morph"
"using"))
(define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords)))
;; ── Main tokenizer ────────────────────────────────────────────────
(define
hs-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
hs-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define hs-cur (fn () (hs-peek 0)))
(define hs-advance! (fn (n) (set! pos (+ pos n))))
(define
skip-ws!
(fn
()
(when
(and (< pos src-len) (hs-ws? (hs-cur)))
(hs-advance! 1)
(skip-ws!))))
(define
skip-comment!
(fn
()
(when
(and (< pos src-len) (not (= (hs-cur) "\n")))
(hs-advance! 1)
(skip-comment!))))
(define
read-ident
(fn
(start)
(when
(and (< pos src-len) (hs-ident-char? (hs-cur)))
(hs-advance! 1)
(read-ident start))
(slice src start pos)))
(define
read-number
(fn
(start)
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-number start))
(when
(and
(< pos src-len)
(= (hs-cur) ".")
(< (+ pos 1) src-len)
(hs-digit? (hs-peek 1)))
(hs-advance! 1)
(define
read-frac
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-frac))))
(read-frac))
(do
(when
(and
(< pos src-len)
(or (= (hs-cur) "e") (= (hs-cur) "E"))
(or
(and (< (+ pos 1) src-len) (hs-digit? (hs-peek 1)))
(and
(< (+ pos 2) src-len)
(or (= (hs-peek 1) "+") (= (hs-peek 1) "-"))
(hs-digit? (hs-peek 2)))))
(hs-advance! 1)
(when
(and
(< pos src-len)
(or (= (hs-cur) "+") (= (hs-cur) "-")))
(hs-advance! 1))
(define
read-exp-digits
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-exp-digits))))
(read-exp-digits))
(let
((num-end pos))
(when
(and
(< pos src-len)
(or (= (hs-cur) "m") (= (hs-cur) "s")))
(if
(and
(= (hs-cur) "m")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s"))
(hs-advance! 2)
(when (= (hs-cur) "s") (hs-advance! 1))))
(slice src start pos)))))
(define
read-string
(fn
(quote-char)
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(= (hs-cur) "\\")
(do
(hs-advance! 1)
(when
(< pos src-len)
(let
((ch (hs-cur)))
(cond
(= ch "n")
(append! chars "\n")
(= ch "t")
(append! chars "\t")
(= ch "\\")
(append! chars "\\")
(= ch quote-char)
(append! chars quote-char)
:else (do (append! chars "\\") (append! chars ch)))
(hs-advance! 1)))
(loop))
(= (hs-cur) quote-char)
(hs-advance! 1)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-template
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(= (hs-cur) "`")
(hs-advance! 1)
(and
(= (hs-cur) "$")
(< (+ pos 1) src-len)
(= (hs-peek 1) "{"))
(do
(append! chars "${")
(hs-advance! 2)
(let
((depth 1))
(define
inner
(fn
()
(when
(and (< pos src-len) (> depth 0))
(cond
(= (hs-cur) "{")
(do
(set! depth (+ depth 1))
(append! chars (hs-cur))
(hs-advance! 1)
(inner))
(= (hs-cur) "}")
(do
(set! depth (- depth 1))
(when (> depth 0) (append! chars (hs-cur)))
(hs-advance! 1)
(when (> depth 0) (inner)))
:else (do
(append! chars (hs-cur))
(hs-advance! 1)
(inner))))))
(inner))
(append! chars "}")
(loop))
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-selector
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(and
(= (hs-cur) "/")
(< (+ pos 1) src-len)
(= (hs-peek 1) ">"))
(hs-advance! 2)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-class-name
(fn
(start)
(when
(and
(< pos src-len)
(or
(hs-ident-char? (hs-cur))
(= (hs-cur) ":")
(= (hs-cur) "[")
(= (hs-cur) "]")))
(hs-advance! 1)
(read-class-name start))
(slice src start pos)))
(define
hs-emit!
(fn
(type value start)
(append! tokens (hs-make-token type value start))))
(define
scan!
(fn
()
(skip-ws!)
(when
(< pos src-len)
(let
((ch (hs-cur)) (start pos))
(cond
(and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/"))
(do (hs-advance! 2) (skip-comment!) (scan!))
(and
(= ch "<")
(< (+ pos 1) src-len)
(not (= (hs-peek 1) "="))
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) ".")
(= (hs-peek 1) "#")
(= (hs-peek 1) "[")
(= (hs-peek 1) "*")
(= (hs-peek 1) ":")))
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) "-")
(= (hs-peek 1) "_")))
(do
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "id" (read-ident pos) start)
(scan!))
(and
(= ch "@")
(< (+ pos 1) src-len)
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "attr" (read-ident pos) start)
(scan!))
(and
(= ch "^")
(< (+ pos 1) src-len)
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "hat" (read-ident pos) start)
(scan!))
(and
(= ch "~")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "component" (str "~" (read-ident pos)) start)
(scan!))
(and
(= ch "*")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "style" (read-ident pos) start)
(scan!))
(and
(= ch ":")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "local" (read-ident pos) start)
(scan!))
(or
(= ch "\"")
(and
(= ch "'")
(not
(and
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2))))))))
(do (hs-emit! "string" (read-string ch) start) (scan!))
(= ch "`")
(do (hs-emit! "template" (read-template) start) (scan!))
(hs-digit? ch)
(do (hs-emit! "number" (read-number start) start) (scan!))
(hs-ident-start? ch)
(do
(let
((word (read-ident start)))
(hs-emit!
(if (hs-keyword? word) "keyword" "ident")
word
start))
(scan!))
(and
(or (= ch "=") (= ch "!") (= ch "<") (= ch ">"))
(< (+ pos 1) src-len)
(= (hs-peek 1) "="))
(do
(if
(and
(or (= ch "=") (= ch "!"))
(< (+ pos 2) src-len)
(= (hs-peek 2) "="))
(do (hs-emit! "op" (str ch "==") start) (hs-advance! 3))
(do (hs-emit! "op" (str ch "=") start) (hs-advance! 2)))
(scan!))
(and
(= ch "'")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2)))))
(do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!))
(= ch "(")
(do
(hs-emit! "paren-open" "(" start)
(hs-advance! 1)
(scan!))
(= ch ")")
(do
(hs-emit! "paren-close" ")" start)
(hs-advance! 1)
(scan!))
(= ch "[")
(do
(hs-emit! "bracket-open" "[" start)
(hs-advance! 1)
(scan!))
(= ch "]")
(do
(hs-emit! "bracket-close" "]" start)
(hs-advance! 1)
(scan!))
(= ch "{")
(do
(hs-emit! "brace-open" "{" start)
(hs-advance! 1)
(scan!))
(= ch "}")
(do
(hs-emit! "brace-close" "}" start)
(hs-advance! 1)
(scan!))
(= ch ",")
(do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!))
(= ch "+")
(do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!))
(= ch "-")
(do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!))
(= ch "/")
(do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!))
(= ch "=")
(do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!))
(= ch "<")
(do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!))
(= ch ">")
(do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!))
(= ch "!")
(do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!))
(= ch "*")
(do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!))
(= ch "%")
(do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!))
(= ch ".")
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
(= ch "\\")
(do (hs-emit! "op" "\\" start) (hs-advance! 1) (scan!))
(= ch ":")
(do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!))
:else (do (hs-advance! 1) (scan!)))))))
(scan!)
(hs-emit! "eof" nil pos)
tokens)))