Files
rose-ash/lib/hyperscript/tokenizer.sx
giles 2278443182 Hyperscript conformance: 222 test fixtures from _hyperscript 0.9.14
Extract pure expression tests from the official _hyperscript test suite
and implement parser/compiler/runtime extensions to pass them.

Test infrastructure:
- 222 fixtures extracted from evalHyperScript calls (no DOM dependency)
- SX data format with eval-hs bridge and run-hs-fixture runner
- 24 suites covering expressions, comparisons, coercion, logic, etc.

Parser extensions (parser.sx):
- mod as infix arithmetic operator
- English comparison phrases (is less than, is greater than or equal to)
- is a/an Type typecheck syntax
- === / !== strict equality operators
- I as me synonym, am as is for comparisons
- does not exist/match/contain postfix
- some/every ... with quantifier expressions
- undefined keyword → nil

Compiler updates (compiler.sx):
- + emits hs-add (type-dispatching: string concat or numeric add)
- no emits hs-falsy? (HS truthiness: empty string is falsy)
- matches? emits hs-matches? (string regex in non-DOM context)
- New cases: not-in?, in?, type-check, strict-eq, some, every

Runtime additions (runtime.sx):
- hs-coerce: Int/Integer truncation via floor
- hs-add: string concat when either operand is string
- hs-falsy?: HS-compatible truthiness (nil, false, "" are falsy)
- hs-matches?: string pattern matching
- hs-type-check/hs-type-check!: lenient/strict type checking
- hs-strict-eq: type + value equality

Tokenizer (tokenizer.sx):
- Added keywords: I, am, does, some, mod, equal, equals, really,
  include, includes, contain, undefined, exist

Scorecard: 47/112 test groups passing. 0 non-HS regressions.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 18:53:50 +00:00

546 lines
16 KiB
Plaintext

;; _hyperscript tokenizer — produces token stream from hyperscript source
;;
;; Tokens: {:type T :value V :pos P}
;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style"
;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open"
;; "bracket-close" "brace-open" "brace-close" "comma" "colon"
;; "template" "local" "eof"
;; ── Token constructor ─────────────────────────────────────────────
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
(define hs-digit? (fn (c) (and (>= c "0") (<= c "9"))))
(define
hs-letter?
(fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
(define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$"))))
(define
hs-ident-char?
(fn
(c)
(or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-"))))
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; ── Keyword set ───────────────────────────────────────────────────
(define
hs-keywords
(list
"on"
"end"
"set"
"to"
"put"
"into"
"before"
"after"
"add"
"remove"
"toggle"
"if"
"else"
"otherwise"
"then"
"from"
"in"
"of"
"for"
"until"
"wait"
"send"
"trigger"
"call"
"get"
"take"
"log"
"hide"
"show"
"repeat"
"while"
"times"
"forever"
"break"
"continue"
"return"
"throw"
"catch"
"finally"
"def"
"tell"
"make"
"fetch"
"as"
"with"
"every"
"or"
"and"
"not"
"is"
"no"
"the"
"my"
"me"
"it"
"its"
"result"
"true"
"false"
"null"
"when"
"between"
"at"
"by"
"queue"
"elsewhere"
"event"
"target"
"detail"
"sender"
"index"
"increment"
"decrement"
"append"
"settle"
"transition"
"over"
"closest"
"next"
"previous"
"first"
"last"
"random"
"empty"
"exists"
"matches"
"contains"
"do"
"unless"
"you"
"your"
"new"
"init"
"start"
"go"
"js"
"less"
"than"
"greater"
"class"
"anything"
"install"
"measure"
"behavior"
"called"
"render"
"eval"
"I"
"am"
"does"
"some"
"mod"
"equal"
"equals"
"really"
"include"
"includes"
"contain"
"undefined"
"exist"))
(define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords)))
;; ── Main tokenizer ────────────────────────────────────────────────
(define
hs-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
hs-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define hs-cur (fn () (hs-peek 0)))
(define hs-advance! (fn (n) (set! pos (+ pos n))))
(define
skip-ws!
(fn
()
(when
(and (< pos src-len) (hs-ws? (hs-cur)))
(hs-advance! 1)
(skip-ws!))))
(define
skip-comment!
(fn
()
(when
(and (< pos src-len) (not (= (hs-cur) "\n")))
(hs-advance! 1)
(skip-comment!))))
(define
read-ident
(fn
(start)
(when
(and (< pos src-len) (hs-ident-char? (hs-cur)))
(hs-advance! 1)
(read-ident start))
(slice src start pos)))
(define
read-number
(fn
(start)
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-number start))
(when
(and
(< pos src-len)
(= (hs-cur) ".")
(< (+ pos 1) src-len)
(hs-digit? (hs-peek 1)))
(hs-advance! 1)
(define
read-frac
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-frac))))
(read-frac))
(let
((num-end pos))
(when
(and
(< pos src-len)
(or (= (hs-cur) "m") (= (hs-cur) "s")))
(if
(and
(= (hs-cur) "m")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s"))
(hs-advance! 2)
(when (= (hs-cur) "s") (hs-advance! 1))))
(slice src start pos))))
(define
read-string
(fn
(quote-char)
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(= (hs-cur) "\\")
(do
(hs-advance! 1)
(when
(< pos src-len)
(let
((ch (hs-cur)))
(cond
(= ch "n")
(append! chars "\n")
(= ch "t")
(append! chars "\t")
(= ch "\\")
(append! chars "\\")
(= ch quote-char)
(append! chars quote-char)
:else (do (append! chars "\\") (append! chars ch)))
(hs-advance! 1)))
(loop))
(= (hs-cur) quote-char)
(hs-advance! 1)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-template
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(= (hs-cur) "`")
(hs-advance! 1)
(and
(= (hs-cur) "$")
(< (+ pos 1) src-len)
(= (hs-peek 1) "{"))
(do
(append! chars "${")
(hs-advance! 2)
(let
((depth 1))
(define
inner
(fn
()
(when
(and (< pos src-len) (> depth 0))
(cond
(= (hs-cur) "{")
(do
(set! depth (+ depth 1))
(append! chars (hs-cur))
(hs-advance! 1)
(inner))
(= (hs-cur) "}")
(do
(set! depth (- depth 1))
(when (> depth 0) (append! chars (hs-cur)))
(hs-advance! 1)
(when (> depth 0) (inner)))
:else (do
(append! chars (hs-cur))
(hs-advance! 1)
(inner))))))
(inner))
(append! chars "}")
(loop))
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-selector
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(and
(= (hs-cur) "/")
(< (+ pos 1) src-len)
(= (hs-peek 1) ">"))
(hs-advance! 2)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-class-name
(fn
(start)
(when
(and
(< pos src-len)
(or
(hs-ident-char? (hs-cur))
(= (hs-cur) ":")
(= (hs-cur) "\\")
(= (hs-cur) "[")
(= (hs-cur) "]")
(= (hs-cur) "(")
(= (hs-cur) ")")))
(when (= (hs-cur) "\\") (hs-advance! 1))
(hs-advance! 1)
(read-class-name start))
(slice src start pos)))
(define
hs-emit!
(fn
(type value start)
(append! tokens (hs-make-token type value start))))
(define
scan!
(fn
()
(skip-ws!)
(when
(< pos src-len)
(let
((ch (hs-cur)) (start pos))
(cond
(and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/"))
(do (hs-advance! 2) (skip-comment!) (scan!))
(and
(= ch "<")
(< (+ pos 1) src-len)
(not (= (hs-peek 1) "="))
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) ".")
(= (hs-peek 1) "#")
(= (hs-peek 1) "[")
(= (hs-peek 1) "*")
(= (hs-peek 1) ":")))
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) "-")
(= (hs-peek 1) "_")))
(do
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "id" (read-ident pos) start)
(scan!))
(and
(= ch "@")
(< (+ pos 1) src-len)
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "attr" (read-ident pos) start)
(scan!))
(and
(= ch "~")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "component" (str "~" (read-ident pos)) start)
(scan!))
(and
(= ch "*")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "style" (read-ident pos) start)
(scan!))
(and
(= ch ":")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "local" (read-ident pos) start)
(scan!))
(or
(= ch "\"")
(and
(= ch "'")
(not
(and
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2))))))))
(do (hs-emit! "string" (read-string ch) start) (scan!))
(= ch "`")
(do (hs-emit! "template" (read-template) start) (scan!))
(hs-digit? ch)
(do (hs-emit! "number" (read-number start) start) (scan!))
(hs-ident-start? ch)
(do
(let
((word (read-ident start)))
(hs-emit!
(if (hs-keyword? word) "keyword" "ident")
word
start))
(scan!))
(and
(or (= ch "=") (= ch "!") (= ch "<") (= ch ">"))
(< (+ pos 1) src-len)
(= (hs-peek 1) "="))
(do
(hs-emit! "op" (str ch "=") start)
(hs-advance! 2)
(scan!))
(and
(= ch "'")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2)))))
(do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!))
(= ch "(")
(do
(hs-emit! "paren-open" "(" start)
(hs-advance! 1)
(scan!))
(= ch ")")
(do
(hs-emit! "paren-close" ")" start)
(hs-advance! 1)
(scan!))
(= ch "[")
(do
(hs-emit! "bracket-open" "[" start)
(hs-advance! 1)
(scan!))
(= ch "]")
(do
(hs-emit! "bracket-close" "]" start)
(hs-advance! 1)
(scan!))
(= ch "{")
(do
(hs-emit! "brace-open" "{" start)
(hs-advance! 1)
(scan!))
(= ch "}")
(do
(hs-emit! "brace-close" "}" start)
(hs-advance! 1)
(scan!))
(= ch ",")
(do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!))
(= ch "+")
(do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!))
(= ch "-")
(do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!))
(= ch "/")
(do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!))
(= ch "=")
(do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!))
(= ch "<")
(do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!))
(= ch ">")
(do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!))
(= ch "!")
(do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!))
(= ch "*")
(do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!))
(= ch "%")
(do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!))
(= ch ".")
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
:else (do (hs-advance! 1) (scan!)))))))
(scan!)
(hs-emit! "eof" nil pos)
tokens)))