Files
rose-ash/lib/hyperscript/tokenizer.sx
giles 0515295317 HS: extend parser/runtime + new node test runner; ignore test-results/
- Parser: `--` line comments, `|` op, `result` → `the-result`, query-scoped
  `<sel> in <expr>`, `is a/an <type>` predicate, multi-`as` chaining with `|`,
  `match`/`precede` keyword aliases, `[attr]` add/toggle, between attr forms
- Runtime: per-element listener registry + hs-deactivate!, attr toggle
  variants, set-inner-html boots subtree, hs-append polymorphic on
  string/list/element, default? / array-set! / query-all-in / list-set
  via take+drop, hs-script idempotence guard
- Integration: skip reserved (me/it/event/you/yourself) when collecting vars
- Tokenizer: emit `--` comments and `|` op
- Test framework + conformance runner updates; new tests/hs-run-filtered.js
  (single-process Node runner using OCaml VM step-limit to bound infinite
  loops); generate-sx-conformance-dev.py improvements
- mcp_tree.ml + run_tests.ml: harness extensions
- .gitignore: top-level test-results/ (Playwright artifacts)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 07:11:07 +00:00

623 lines
18 KiB
Plaintext

;; _hyperscript tokenizer — produces token stream from hyperscript source
;;
;; Tokens: {:type T :value V :pos P}
;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style"
;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open"
;; "bracket-close" "brace-open" "brace-close" "comma" "colon"
;; "template" "local" "eof"
;; ── Token constructor ─────────────────────────────────────────────
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
(define hs-digit? (fn (c) (and (>= c "0") (<= c "9"))))
(define
hs-letter?
(fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
(define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$"))))
(define
hs-ident-char?
(fn
(c)
(or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-"))))
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; ── Keyword set ───────────────────────────────────────────────────
(define
hs-keywords
(list
"on"
"end"
"set"
"to"
"put"
"into"
"before"
"after"
"add"
"remove"
"toggle"
"if"
"else"
"otherwise"
"then"
"from"
"in"
"of"
"for"
"until"
"wait"
"send"
"trigger"
"call"
"get"
"take"
"log"
"hide"
"show"
"repeat"
"while"
"times"
"forever"
"break"
"continue"
"return"
"throw"
"catch"
"finally"
"def"
"tell"
"make"
"fetch"
"as"
"with"
"every"
"or"
"and"
"not"
"is"
"no"
"the"
"my"
"me"
"it"
"its"
"result"
"true"
"false"
"null"
"when"
"between"
"at"
"by"
"queue"
"elsewhere"
"event"
"target"
"detail"
"sender"
"index"
"indexed"
"increment"
"decrement"
"append"
"settle"
"transition"
"over"
"closest"
"next"
"previous"
"first"
"last"
"random"
"pick"
"empty"
"clear"
"swap"
"open"
"close"
"exists"
"matches"
"contains"
"do"
"unless"
"you"
"your"
"new"
"init"
"start"
"go"
"js"
"less"
"than"
"greater"
"class"
"anything"
"install"
"measure"
"behavior"
"called"
"render"
"eval"
"I"
"am"
"does"
"some"
"mod"
"equal"
"equals"
"really"
"include"
"includes"
"contain"
"undefined"
"exist"
"match"
"beep"
"where"
"sorted"
"mapped"
"split"
"joined"
"descending"
"ascending"
"scroll"
"select"
"reset"
"default"
"halt"
"precedes"
"precede"
"follow"
"follows"
"ignoring"
"case"
"changes"
"focus"
"blur"
"dom"
"morph"
"using"))
(define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords)))
;; ── Main tokenizer ────────────────────────────────────────────────
(define
hs-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
hs-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define hs-cur (fn () (hs-peek 0)))
(define hs-advance! (fn (n) (set! pos (+ pos n))))
(define
skip-ws!
(fn
()
(when
(and (< pos src-len) (hs-ws? (hs-cur)))
(hs-advance! 1)
(skip-ws!))))
(define
skip-comment!
(fn
()
(when
(and (< pos src-len) (not (= (hs-cur) "\n")))
(hs-advance! 1)
(skip-comment!))))
(define
read-ident
(fn
(start)
(when
(and (< pos src-len) (hs-ident-char? (hs-cur)))
(hs-advance! 1)
(read-ident start))
(slice src start pos)))
(define
read-number
(fn
(start)
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-number start))
(when
(and
(< pos src-len)
(= (hs-cur) ".")
(< (+ pos 1) src-len)
(hs-digit? (hs-peek 1)))
(hs-advance! 1)
(define
read-frac
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-frac))))
(read-frac))
(do
(when
(and
(< pos src-len)
(or (= (hs-cur) "e") (= (hs-cur) "E"))
(or
(and (< (+ pos 1) src-len) (hs-digit? (hs-peek 1)))
(and
(< (+ pos 2) src-len)
(or (= (hs-peek 1) "+") (= (hs-peek 1) "-"))
(hs-digit? (hs-peek 2)))))
(hs-advance! 1)
(when
(and
(< pos src-len)
(or (= (hs-cur) "+") (= (hs-cur) "-")))
(hs-advance! 1))
(define
read-exp-digits
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-exp-digits))))
(read-exp-digits))
(let
((num-end pos))
(when
(and
(< pos src-len)
(or (= (hs-cur) "m") (= (hs-cur) "s")))
(if
(and
(= (hs-cur) "m")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s"))
(hs-advance! 2)
(when (= (hs-cur) "s") (hs-advance! 1))))
(slice src start pos)))))
(define
read-string
(fn
(quote-char)
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(= (hs-cur) "\\")
(do
(hs-advance! 1)
(when
(< pos src-len)
(let
((ch (hs-cur)))
(cond
(= ch "n")
(append! chars "\n")
(= ch "t")
(append! chars "\t")
(= ch "\\")
(append! chars "\\")
(= ch quote-char)
(append! chars quote-char)
:else (do (append! chars "\\") (append! chars ch)))
(hs-advance! 1)))
(loop))
(= (hs-cur) quote-char)
(hs-advance! 1)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-template
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(= (hs-cur) "`")
(hs-advance! 1)
(and
(= (hs-cur) "$")
(< (+ pos 1) src-len)
(= (hs-peek 1) "{"))
(do
(append! chars "${")
(hs-advance! 2)
(let
((depth 1))
(define
inner
(fn
()
(when
(and (< pos src-len) (> depth 0))
(cond
(= (hs-cur) "{")
(do
(set! depth (+ depth 1))
(append! chars (hs-cur))
(hs-advance! 1)
(inner))
(= (hs-cur) "}")
(do
(set! depth (- depth 1))
(when (> depth 0) (append! chars (hs-cur)))
(hs-advance! 1)
(when (> depth 0) (inner)))
:else (do
(append! chars (hs-cur))
(hs-advance! 1)
(inner))))))
(inner))
(append! chars "}")
(loop))
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-selector
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(and
(= (hs-cur) "/")
(< (+ pos 1) src-len)
(= (hs-peek 1) ">"))
(hs-advance! 2)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-class-name
(fn
(start)
(when
(and
(< pos src-len)
(or
(hs-ident-char? (hs-cur))
(= (hs-cur) ":")
(= (hs-cur) "[")
(= (hs-cur) "]")))
(hs-advance! 1)
(read-class-name start))
(slice src start pos)))
(define
hs-emit!
(fn
(type value start)
(append! tokens (hs-make-token type value start))))
(define
scan!
(fn
()
(skip-ws!)
(when
(< pos src-len)
(let
((ch (hs-cur)) (start pos))
(cond
(and (= ch "-") (< (+ pos 1) src-len) (= (hs-peek 1) "-"))
(do (hs-advance! 2) (skip-comment!) (scan!))
(and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/"))
(do (hs-advance! 2) (skip-comment!) (scan!))
(and
(= ch "<")
(< (+ pos 1) src-len)
(not (= (hs-peek 1) "="))
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) ".")
(= (hs-peek 1) "#")
(= (hs-peek 1) "[")
(= (hs-peek 1) "*")
(= (hs-peek 1) ":")))
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and (= ch ".") (< (+ pos 1) src-len) (= (hs-peek 1) "."))
(do (hs-emit! "op" ".." start) (hs-advance! 2) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) "-")
(= (hs-peek 1) "_")))
(do
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "id" (read-ident pos) start)
(scan!))
(and
(= ch "@")
(< (+ pos 1) src-len)
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "attr" (read-ident pos) start)
(scan!))
(and
(= ch "^")
(< (+ pos 1) src-len)
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "hat" (read-ident pos) start)
(scan!))
(and
(= ch "~")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "component" (str "~" (read-ident pos)) start)
(scan!))
(and
(= ch "*")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "style" (read-ident pos) start)
(scan!))
(and
(= ch ":")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "local" (read-ident pos) start)
(scan!))
(or
(= ch "\"")
(and
(= ch "'")
(not
(and
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2))))))))
(do (hs-emit! "string" (read-string ch) start) (scan!))
(= ch "`")
(do (hs-emit! "template" (read-template) start) (scan!))
(hs-digit? ch)
(do (hs-emit! "number" (read-number start) start) (scan!))
(hs-ident-start? ch)
(do
(let
((word (read-ident start)))
(hs-emit!
(if (hs-keyword? word) "keyword" "ident")
word
start))
(scan!))
(and
(or (= ch "=") (= ch "!") (= ch "<") (= ch ">"))
(< (+ pos 1) src-len)
(= (hs-peek 1) "="))
(do
(if
(and
(or (= ch "=") (= ch "!"))
(< (+ pos 2) src-len)
(= (hs-peek 2) "="))
(do (hs-emit! "op" (str ch "==") start) (hs-advance! 3))
(do (hs-emit! "op" (str ch "=") start) (hs-advance! 2)))
(scan!))
(and
(= ch "'")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2)))))
(do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!))
(= ch "(")
(do
(hs-emit! "paren-open" "(" start)
(hs-advance! 1)
(scan!))
(= ch ")")
(do
(hs-emit! "paren-close" ")" start)
(hs-advance! 1)
(scan!))
(= ch "[")
(do
(hs-emit! "bracket-open" "[" start)
(hs-advance! 1)
(scan!))
(= ch "]")
(do
(hs-emit! "bracket-close" "]" start)
(hs-advance! 1)
(scan!))
(= ch "{")
(do
(hs-emit! "brace-open" "{" start)
(hs-advance! 1)
(scan!))
(= ch "}")
(do
(hs-emit! "brace-close" "}" start)
(hs-advance! 1)
(scan!))
(= ch ",")
(do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!))
(= ch "+")
(do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!))
(= ch "-")
(do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!))
(= ch "/")
(do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!))
(= ch "=")
(do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!))
(= ch "<")
(do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!))
(= ch ">")
(do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!))
(= ch "!")
(do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!))
(= ch "*")
(do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!))
(= ch "%")
(do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!))
(= ch ".")
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
(= ch "\\")
(do (hs-emit! "op" "\\" start) (hs-advance! 1) (scan!))
(= ch ":")
(do (hs-emit! "colon" ":" start) (hs-advance! 1) (scan!))
(= ch "|")
(do (hs-emit! "op" "|" start) (hs-advance! 1) (scan!))
:else (do (hs-advance! 1) (scan!)))))))
(scan!)
(hs-emit! "eof" nil pos)
tokens)))