Files
rose-ash/lib/hyperscript/tokenizer.sx
giles 1f7f47b4c1 Fix hyperscript conformance: 54/112 passing (was 31/81 baseline)
Runtime visibility fix:
- eval-hs now injects runtime helpers (hs-add, hs-falsy?, hs-strict-eq,
  hs-type-check, hs-matches?, hs-contains?, hs-coerce) via outer let
  binding so the tree-walker evaluator can resolve them

Parser fixes:
- null/undefined: return (null-literal) AST node instead of bare nil
  (nil was indistinguishable from "no parse result" sentinel)
- === / !== tokenized as single 3-char operators
- mod operator: emit (modulo) instead of (%) — modulo is a real primitive

Compiler fixes:
- null-literal → nil
- % → modulo
- contains? → hs-contains? (avoids tree-walker primitive arity conflict)

Runtime additions:
- hs-contains?: wraps list membership + string containment

Tokenizer:
- Added keywords: a, an (removed — broke all tokenization), exist
- Triple operators: === and !== now tokenized correctly

Scorecard: 54/112 test groups passing, +23 from baseline.
Unlocked: really-equals, english comparisons, is-in, null is empty,
null exists, type checks, strict equality, mod.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-08 19:46:42 +00:00

551 lines
16 KiB
Plaintext

;; _hyperscript tokenizer — produces token stream from hyperscript source
;;
;; Tokens: {:type T :value V :pos P}
;; Types: "keyword" "ident" "number" "string" "class" "id" "attr" "style"
;; "selector" "op" "dot" "paren-open" "paren-close" "bracket-open"
;; "bracket-close" "brace-open" "brace-close" "comma" "colon"
;; "template" "local" "eof"
;; ── Token constructor ─────────────────────────────────────────────
(define hs-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
(define hs-digit? (fn (c) (and (>= c "0") (<= c "9"))))
(define
hs-letter?
(fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
(define hs-ident-start? (fn (c) (or (hs-letter? c) (= c "_") (= c "$"))))
(define
hs-ident-char?
(fn
(c)
(or (hs-letter? c) (hs-digit? c) (= c "_") (= c "$") (= c "-"))))
(define hs-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; ── Keyword set ───────────────────────────────────────────────────
(define
hs-keywords
(list
"on"
"end"
"set"
"to"
"put"
"into"
"before"
"after"
"add"
"remove"
"toggle"
"if"
"else"
"otherwise"
"then"
"from"
"in"
"of"
"for"
"until"
"wait"
"send"
"trigger"
"call"
"get"
"take"
"log"
"hide"
"show"
"repeat"
"while"
"times"
"forever"
"break"
"continue"
"return"
"throw"
"catch"
"finally"
"def"
"tell"
"make"
"fetch"
"as"
"with"
"every"
"or"
"and"
"not"
"is"
"no"
"the"
"my"
"me"
"it"
"its"
"result"
"true"
"false"
"null"
"when"
"between"
"at"
"by"
"queue"
"elsewhere"
"event"
"target"
"detail"
"sender"
"index"
"increment"
"decrement"
"append"
"settle"
"transition"
"over"
"closest"
"next"
"previous"
"first"
"last"
"random"
"empty"
"exists"
"matches"
"contains"
"do"
"unless"
"you"
"your"
"new"
"init"
"start"
"go"
"js"
"less"
"than"
"greater"
"class"
"anything"
"install"
"measure"
"behavior"
"called"
"render"
"eval"
"I"
"am"
"does"
"some"
"mod"
"equal"
"equals"
"really"
"include"
"includes"
"contain"
"undefined"
"exist"))
(define hs-keyword? (fn (word) (some (fn (k) (= k word)) hs-keywords)))
;; ── Main tokenizer ────────────────────────────────────────────────
(define
hs-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
hs-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define hs-cur (fn () (hs-peek 0)))
(define hs-advance! (fn (n) (set! pos (+ pos n))))
(define
skip-ws!
(fn
()
(when
(and (< pos src-len) (hs-ws? (hs-cur)))
(hs-advance! 1)
(skip-ws!))))
(define
skip-comment!
(fn
()
(when
(and (< pos src-len) (not (= (hs-cur) "\n")))
(hs-advance! 1)
(skip-comment!))))
(define
read-ident
(fn
(start)
(when
(and (< pos src-len) (hs-ident-char? (hs-cur)))
(hs-advance! 1)
(read-ident start))
(slice src start pos)))
(define
read-number
(fn
(start)
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-number start))
(when
(and
(< pos src-len)
(= (hs-cur) ".")
(< (+ pos 1) src-len)
(hs-digit? (hs-peek 1)))
(hs-advance! 1)
(define
read-frac
(fn
()
(when
(and (< pos src-len) (hs-digit? (hs-cur)))
(hs-advance! 1)
(read-frac))))
(read-frac))
(let
((num-end pos))
(when
(and
(< pos src-len)
(or (= (hs-cur) "m") (= (hs-cur) "s")))
(if
(and
(= (hs-cur) "m")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s"))
(hs-advance! 2)
(when (= (hs-cur) "s") (hs-advance! 1))))
(slice src start pos))))
(define
read-string
(fn
(quote-char)
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(= (hs-cur) "\\")
(do
(hs-advance! 1)
(when
(< pos src-len)
(let
((ch (hs-cur)))
(cond
(= ch "n")
(append! chars "\n")
(= ch "t")
(append! chars "\t")
(= ch "\\")
(append! chars "\\")
(= ch quote-char)
(append! chars quote-char)
:else (do (append! chars "\\") (append! chars ch)))
(hs-advance! 1)))
(loop))
(= (hs-cur) quote-char)
(hs-advance! 1)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-template
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(= (hs-cur) "`")
(hs-advance! 1)
(and
(= (hs-cur) "$")
(< (+ pos 1) src-len)
(= (hs-peek 1) "{"))
(do
(append! chars "${")
(hs-advance! 2)
(let
((depth 1))
(define
inner
(fn
()
(when
(and (< pos src-len) (> depth 0))
(cond
(= (hs-cur) "{")
(do
(set! depth (+ depth 1))
(append! chars (hs-cur))
(hs-advance! 1)
(inner))
(= (hs-cur) "}")
(do
(set! depth (- depth 1))
(when (> depth 0) (append! chars (hs-cur)))
(hs-advance! 1)
(when (> depth 0) (inner)))
:else (do
(append! chars (hs-cur))
(hs-advance! 1)
(inner))))))
(inner))
(append! chars "}")
(loop))
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-selector
(fn
()
(let
((chars (list)))
(hs-advance! 1)
(define
loop
(fn
()
(cond
(>= pos src-len)
nil
(and
(= (hs-cur) "/")
(< (+ pos 1) src-len)
(= (hs-peek 1) ">"))
(hs-advance! 2)
:else (do (append! chars (hs-cur)) (hs-advance! 1) (loop)))))
(loop)
(join "" chars))))
(define
read-class-name
(fn
(start)
(when
(and
(< pos src-len)
(or
(hs-ident-char? (hs-cur))
(= (hs-cur) ":")
(= (hs-cur) "\\")
(= (hs-cur) "[")
(= (hs-cur) "]")
(= (hs-cur) "(")
(= (hs-cur) ")")))
(when (= (hs-cur) "\\") (hs-advance! 1))
(hs-advance! 1)
(read-class-name start))
(slice src start pos)))
(define
hs-emit!
(fn
(type value start)
(append! tokens (hs-make-token type value start))))
(define
scan!
(fn
()
(skip-ws!)
(when
(< pos src-len)
(let
((ch (hs-cur)) (start pos))
(cond
(and (= ch "/") (< (+ pos 1) src-len) (= (hs-peek 1) "/"))
(do (hs-advance! 2) (skip-comment!) (scan!))
(and
(= ch "<")
(< (+ pos 1) src-len)
(not (= (hs-peek 1) "="))
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) ".")
(= (hs-peek 1) "#")
(= (hs-peek 1) "[")
(= (hs-peek 1) "*")
(= (hs-peek 1) ":")))
(do (hs-emit! "selector" (read-selector) start) (scan!))
(and
(= ch ".")
(< (+ pos 1) src-len)
(or
(hs-letter? (hs-peek 1))
(= (hs-peek 1) "-")
(= (hs-peek 1) "_")))
(do
(hs-advance! 1)
(hs-emit! "class" (read-class-name pos) start)
(scan!))
(and
(= ch "#")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "id" (read-ident pos) start)
(scan!))
(and
(= ch "@")
(< (+ pos 1) src-len)
(hs-ident-char? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "attr" (read-ident pos) start)
(scan!))
(and
(= ch "~")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "component" (str "~" (read-ident pos)) start)
(scan!))
(and
(= ch "*")
(< (+ pos 1) src-len)
(hs-letter? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "style" (read-ident pos) start)
(scan!))
(and
(= ch ":")
(< (+ pos 1) src-len)
(hs-ident-start? (hs-peek 1)))
(do
(hs-advance! 1)
(hs-emit! "local" (read-ident pos) start)
(scan!))
(or
(= ch "\"")
(and
(= ch "'")
(not
(and
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2))))))))
(do (hs-emit! "string" (read-string ch) start) (scan!))
(= ch "`")
(do (hs-emit! "template" (read-template) start) (scan!))
(hs-digit? ch)
(do (hs-emit! "number" (read-number start) start) (scan!))
(hs-ident-start? ch)
(do
(let
((word (read-ident start)))
(hs-emit!
(if (hs-keyword? word) "keyword" "ident")
word
start))
(scan!))
(and
(or (= ch "=") (= ch "!") (= ch "<") (= ch ">"))
(< (+ pos 1) src-len)
(= (hs-peek 1) "="))
(do
(if
(and
(or (= ch "=") (= ch "!"))
(< (+ pos 2) src-len)
(= (hs-peek 2) "="))
(do (hs-emit! "op" (str ch "==") start) (hs-advance! 3))
(do (hs-emit! "op" (str ch "=") start) (hs-advance! 2)))
(scan!))
(and
(= ch "'")
(< (+ pos 1) src-len)
(= (hs-peek 1) "s")
(or
(>= (+ pos 2) src-len)
(not (hs-ident-char? (hs-peek 2)))))
(do (hs-emit! "op" "'s" start) (hs-advance! 2) (scan!))
(= ch "(")
(do
(hs-emit! "paren-open" "(" start)
(hs-advance! 1)
(scan!))
(= ch ")")
(do
(hs-emit! "paren-close" ")" start)
(hs-advance! 1)
(scan!))
(= ch "[")
(do
(hs-emit! "bracket-open" "[" start)
(hs-advance! 1)
(scan!))
(= ch "]")
(do
(hs-emit! "bracket-close" "]" start)
(hs-advance! 1)
(scan!))
(= ch "{")
(do
(hs-emit! "brace-open" "{" start)
(hs-advance! 1)
(scan!))
(= ch "}")
(do
(hs-emit! "brace-close" "}" start)
(hs-advance! 1)
(scan!))
(= ch ",")
(do (hs-emit! "comma" "," start) (hs-advance! 1) (scan!))
(= ch "+")
(do (hs-emit! "op" "+" start) (hs-advance! 1) (scan!))
(= ch "-")
(do (hs-emit! "op" "-" start) (hs-advance! 1) (scan!))
(= ch "/")
(do (hs-emit! "op" "/" start) (hs-advance! 1) (scan!))
(= ch "=")
(do (hs-emit! "op" "=" start) (hs-advance! 1) (scan!))
(= ch "<")
(do (hs-emit! "op" "<" start) (hs-advance! 1) (scan!))
(= ch ">")
(do (hs-emit! "op" ">" start) (hs-advance! 1) (scan!))
(= ch "!")
(do (hs-emit! "op" "!" start) (hs-advance! 1) (scan!))
(= ch "*")
(do (hs-emit! "op" "*" start) (hs-advance! 1) (scan!))
(= ch "%")
(do (hs-emit! "op" "%" start) (hs-advance! 1) (scan!))
(= ch ".")
(do (hs-emit! "dot" "." start) (hs-advance! 1) (scan!))
:else (do (hs-advance! 1) (scan!)))))))
(scan!)
(hs-emit! "eof" nil pos)
tokens)))