HS: tokenizer-stream API → 13 tests pass (-13 skips)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 54s

lib/hyperscript/tokenizer.sx — added cursor + follow-set wrapper over
the existing flat-list tokenize output:

  hs-stream src                 → {:tokens :pos :follows :last-match :last-ws}
  hs-stream-current  s          → next non-WS token (skips WS, captures :last-ws)
  hs-stream-match    s value    → consume if value matches & not in follow set
  hs-stream-match-type s ...types → consume if upstream type name matches
  hs-stream-match-any  s ...names → consume if value matches any name
  hs-stream-match-any-op s ...ops → consume if op token & value matches
  hs-stream-peek     s value n  → look n non-WS tokens ahead, no consume
  hs-stream-consume-until s marker     → collect tokens until marker
  hs-stream-consume-until-ws  s        → collect until next whitespace
  hs-stream-push-follow! / pop-follow!
  hs-stream-push-follows! / pop-follows! n
  hs-stream-clear-follows! → saved   /  restore-follows! saved
  hs-stream-last-match / last-ws

hs-stream-type-map maps our lowercase type names to upstream's
("ident" → "IDENTIFIER", "number" → "NUMBER", etc.) so type-based
matching works against upstream test expectations.

13 tokenizer-stream tests now pass; 30/30 in hs-upstream-core/tokenizer.

Skips remaining: 5 (down from 18).
  - 2 template-component scope tests
  - 1 async event dispatch (until event keyword works)
  - left for later: needs more architectural work

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-08 07:22:40 +00:00
parent d0b358eca2
commit a9eb821cce
4 changed files with 403 additions and 33 deletions

View File

@@ -813,4 +813,230 @@
:else (do (t-advance! 1) (scan-template!)))))))
(scan-template!)
(t-emit! "eof" nil)
tokens)))
tokens)))
;; ── Stream wrapper for upstream-style stateful tokenizer API ───────────────
;;
;; Upstream _hyperscript exposes a Tokens object with cursor + follow-set
;; semantics on _hyperscript.internals.tokenizer. Our hs-tokenize returns a
;; flat list; the stream wrapper adds the stateful operations.
;;
;; Type names map ours → upstream's (e.g. "ident" → "IDENTIFIER").
(define
hs-stream-type-map
(fn
(t)
(cond
((= t "ident") "IDENTIFIER")
((= t "number") "NUMBER")
((= t "string") "STRING")
((= t "class") "CLASS_REF")
((= t "id") "ID_REF")
((= t "attr") "ATTRIBUTE_REF")
((= t "style") "STYLE_REF")
((= t "whitespace") "WHITESPACE")
((= t "op") "OPERATOR")
((= t "eof") "EOF")
(true (upcase t)))))
;; Create a stream from a source string.
;; Returns a dict — mutable via dict-set!.
(define
hs-stream
(fn
(src)
{:tokens (hs-tokenize src) :pos 0 :follows (list) :last-match nil :last-ws nil}))
;; Skip whitespace tokens, advancing pos to the next non-WS token.
;; Captures the last skipped whitespace value into :last-ws.
(define
hs-stream-skip-ws!
(fn
(s)
(let
((tokens (get s :tokens)))
(define
loop
(fn
()
(let
((p (get s :pos)))
(when
(and (< p (len tokens))
(= (get (nth tokens p) :type) "whitespace"))
(do
(dict-set! s :last-ws (get (nth tokens p) :value))
(dict-set! s :pos (+ p 1))
(loop))))))
(loop))))
;; Current token (after skipping whitespace).
(define
hs-stream-current
(fn
(s)
(do
(hs-stream-skip-ws! s)
(let
((tokens (get s :tokens)) (p (get s :pos)))
(if (< p (len tokens)) (nth tokens p) nil)))))
;; Returns the current token if its value matches; advances and updates
;; :last-match. Returns nil otherwise (no advance).
;; Honors the follow set: tokens whose value is in :follows do NOT match.
(define
hs-stream-match
(fn
(s value)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((some (fn (f) (= f value)) (get s :follows)) nil)
((= (get cur :value) value)
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Match by upstream-style type name. Accepts any number of allowed types.
(define
hs-stream-match-type
(fn
(s &rest types)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((some (fn (t) (= (hs-stream-type-map (get cur :type)) t)) types)
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Match if value is one of the given names.
(define
hs-stream-match-any
(fn
(s &rest names)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((some (fn (n) (= (get cur :value) n)) names)
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Match an op token whose value is in the list.
(define
hs-stream-match-any-op
(fn
(s &rest ops)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((and (= (get cur :type) "op")
(some (fn (o) (= (get cur :value) o)) ops))
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Peek N non-WS tokens ahead. Returns the token if its value matches; nil otherwise.
(define
hs-stream-peek
(fn
(s value offset)
(let
((tokens (get s :tokens)))
(define
skip-n-non-ws
(fn
(p remaining)
(cond
((>= p (len tokens)) -1)
((= (get (nth tokens p) :type) "whitespace")
(skip-n-non-ws (+ p 1) remaining))
((= remaining 0) p)
(true (skip-n-non-ws (+ p 1) (- remaining 1))))))
(let
((p (skip-n-non-ws (get s :pos) offset)))
(if (and (>= p 0) (< p (len tokens))
(= (get (nth tokens p) :value) value))
(nth tokens p)
nil)))))
;; Consume tokens until one whose value matches the marker. Returns
;; the consumed list (excluding the marker). Marker becomes current.
(define
hs-stream-consume-until
(fn
(s marker)
(let
((tokens (get s :tokens)) (out (list)))
(define
loop
(fn
(acc)
(let
((p (get s :pos)))
(cond
((>= p (len tokens)) acc)
((= (get (nth tokens p) :value) marker) acc)
(true
(do
(dict-set! s :pos (+ p 1))
(loop (append acc (list (nth tokens p))))))))))
(loop out))))
;; Consume until the next whitespace token; returns the consumed list.
(define
hs-stream-consume-until-ws
(fn
(s)
(let
((tokens (get s :tokens)))
(define
loop
(fn
(acc)
(let
((p (get s :pos)))
(cond
((>= p (len tokens)) acc)
((= (get (nth tokens p) :type) "whitespace") acc)
(true
(do
(dict-set! s :pos (+ p 1))
(loop (append acc (list (nth tokens p))))))))))
(loop (list)))))
;; Follow-set management.
(define hs-stream-push-follow! (fn (s v) (dict-set! s :follows (cons v (get s :follows)))))
(define
hs-stream-pop-follow!
(fn (s) (let ((f (get s :follows))) (when (> (len f) 0) (dict-set! s :follows (rest f))))))
(define
hs-stream-push-follows!
(fn (s vs) (for-each (fn (v) (hs-stream-push-follow! s v)) vs)))
(define
hs-stream-pop-follows!
(fn (s n) (when (> n 0) (do (hs-stream-pop-follow! s) (hs-stream-pop-follows! s (- n 1))))))
(define
hs-stream-clear-follows!
(fn (s) (let ((saved (get s :follows))) (do (dict-set! s :follows (list)) saved))))
(define
hs-stream-restore-follows!
(fn (s saved) (dict-set! s :follows saved)))
;; Last-consumed token / whitespace.
(define hs-stream-last-match (fn (s) (get s :last-match)))
(define hs-stream-last-ws (fn (s) (get s :last-ws)))

View File

@@ -2877,31 +2877,98 @@
(assert= (dom-text-content _el-div) "test${x} test 42 test$x test 42 test $x test ${x} test42 test_42 test_42 test-42 test.42")
))
(deftest "clearFollows/restoreFollows round-trip the follow set"
(error "SKIP (untranslated): clearFollows/restoreFollows round-trip the follow set"))
(let ((s (hs-stream "and or not")))
(hs-stream-push-follow! s "and")
(hs-stream-push-follow! s "or")
(let ((saved (hs-stream-clear-follows! s)))
(assert= (get (hs-stream-match s "and") :value) "and")
(hs-stream-restore-follows! s saved)
(assert (nil? (hs-stream-match s "or")))))
)
(deftest "consumeUntil collects tokens up to a marker"
(error "SKIP (untranslated): consumeUntil collects tokens up to a marker"))
(let ((s (hs-stream "a b c end d")))
(let ((collected (filter (fn (t) (not (= (get t :type) "whitespace")))
(hs-stream-consume-until s "end"))))
(assert= (map (fn (t) (get t :value)) collected) (list "a" "b" "c"))
(assert= (get (hs-stream-current s) :value) "end")))
)
(deftest "consumeUntilWhitespace stops at first whitespace"
(error "SKIP (untranslated): consumeUntilWhitespace stops at first whitespace"))
(let ((s (hs-stream "abc def")))
(let ((collected (hs-stream-consume-until-ws s)))
(assert= (len collected) 1)
(assert= (get (first collected) :value) "abc")
(assert= (get (hs-stream-current s) :value) "def")))
)
(deftest "lastMatch returns the last consumed token"
(error "SKIP (untranslated): lastMatch returns the last consumed token"))
(let ((s (hs-stream "foo bar baz")))
(hs-stream-match s "foo")
(assert= (get (hs-stream-last-match s) :value) "foo")
(hs-stream-match s "bar")
(assert= (get (hs-stream-last-match s) :value) "bar"))
)
(deftest "lastWhitespace reflects whitespace before the current token"
(error "SKIP (untranslated): lastWhitespace reflects whitespace before the current token"))
(let ((s (hs-stream "foo bar")))
(hs-stream-match s "foo")
(hs-stream-skip-ws! s)
(assert= (hs-stream-last-ws s) " "))
)
(deftest "matchAnyToken and matchAnyOpToken try each option"
(error "SKIP (untranslated): matchAnyToken and matchAnyOpToken try each option"))
(let ((s (hs-stream "bar + baz")))
(assert= (get (hs-stream-match-any s "foo" "bar" "baz") :value) "bar")
(assert= (get (hs-stream-match-any-op s "-" "+") :value) "+")
(assert (nil? (hs-stream-match-any s "foo" "quux"))))
)
(deftest "matchOpToken matches operators by value"
(error "SKIP (untranslated): matchOpToken matches operators by value"))
(let ((s (hs-stream "1 + 2")))
(assert= (get (hs-stream-match-type s "NUMBER") :value) "1")
(assert= (get (hs-stream-match-any-op s "-" "+") :value) "+"))
)
(deftest "matchToken consumes and returns on match"
(error "SKIP (untranslated): matchToken consumes and returns on match"))
(let ((s (hs-stream "foo bar baz")))
(assert= (get (hs-stream-match s "foo") :value) "foo")
(assert (nil? (hs-stream-match s "baz")))
(assert= (get (hs-stream-current s) :value) "bar")
(assert= (get (hs-stream-match s "bar") :value) "bar"))
)
(deftest "matchToken honors the follow set"
(error "SKIP (untranslated): matchToken honors the follow set"))
(let ((s (hs-stream "and or not")))
(hs-stream-push-follow! s "and")
(assert (nil? (hs-stream-match s "and")))
(hs-stream-pop-follow! s)
(assert= (get (hs-stream-match s "and") :value) "and"))
)
(deftest "matchTokenType matches by type"
(error "SKIP (untranslated): matchTokenType matches by type"))
(let ((s (hs-stream "foo 42")))
(assert= (get (hs-stream-match-type s "IDENTIFIER") :value) "foo")
(assert (nil? (hs-stream-match-type s "STRING")))
(assert= (get (hs-stream-match-type s "STRING" "NUMBER") :value) "42"))
)
(deftest "peekToken skips whitespace when looking ahead"
(error "SKIP (untranslated): peekToken skips whitespace when looking ahead"))
(let ((s (hs-stream "for x in items")))
(assert= (get (hs-stream-peek s "for" 0) :value) "for")
(assert= (get (hs-stream-peek s "x" 1) :value) "x")
(assert= (get (hs-stream-peek s "in" 2) :value) "in")
(assert= (get (hs-stream-peek s "items" 3) :value) "items")
(assert (nil? (hs-stream-peek s "wrong" 1))))
)
(deftest "pushFollow/popFollow nest follow-set boundaries"
(error "SKIP (untranslated): pushFollow/popFollow nest follow-set boundaries"))
(let ((s (hs-stream "and or not")))
(hs-stream-push-follow! s "and")
(hs-stream-push-follow! s "or")
(assert (nil? (hs-stream-match s "and")))
(hs-stream-pop-follow! s)
(assert (nil? (hs-stream-match s "and")))
(hs-stream-pop-follow! s)
(assert= (get (hs-stream-match s "and") :value) "and"))
)
(deftest "pushFollows/popFollows push and pop in bulk"
(error "SKIP (untranslated): pushFollows/popFollows push and pop in bulk"))
(let ((s (hs-stream "and or not")))
(hs-stream-push-follows! s (list "and" "or"))
(assert (nil? (hs-stream-match s "and")))
(assert (nil? (hs-stream-match s "or")))
(hs-stream-pop-follows! s 2)
(assert= (get (hs-stream-match s "and") :value) "and"))
)
)
;; ── def (27 tests) ──

View File

@@ -967,25 +967,6 @@ for(let i=startTest;i<Math.min(endTest,testCount);i++){
// 'repeat until event' loop suspends the OCaml kernel waiting for an
// event that is never fired from outside the K.eval call chain.
"until event keyword works",
// === Tokenizer-stream API tests (13) — upstream exposes a streaming token
// API on _hyperscript.internals.tokenizer (matchToken, peekToken, consumeUntil,
// pushFollow, etc.). Our lib/hyperscript/tokenizer.sx returns a flat token list
// and the parser keeps stream state internally as closures. Making these tests
// pass would require exposing a token-stream wrapper as a primitive. The
// tokenizer is correct; it just doesn't expose this API surface. ===
"matchToken consumes and returns on match",
"matchToken honors the follow set",
"matchTokenType matches by type",
"matchOpToken matches operators by value",
"matchAnyToken and matchAnyOpToken try each option",
"peekToken skips whitespace when looking ahead",
"consumeUntil collects tokens up to a marker",
"consumeUntilWhitespace stops at first whitespace",
"pushFollow/popFollow nest follow-set boundaries",
"pushFollows/popFollows push and pop in bulk",
"clearFollows/restoreFollows round-trip the follow set",
"lastMatch returns the last consumed token",
"lastWhitespace reflects whitespace before the current token",
// === Template-component scope tests (2) — upstream uses
// <script type="text/hyperscript-template" component="...">
// for HTML-template-based custom-element components. Our defcomp uses SX

View File

@@ -109,6 +109,102 @@ SKIP_TEST_NAMES = {
# Manually-written SX test bodies for tests whose upstream body cannot be
# auto-translated. Key = test name; value = SX lines to emit inside deftest.
MANUAL_TEST_BODIES = {
# === Tokenizer-stream API tests (13) — exercise hs-stream and friends in
# lib/hyperscript/tokenizer.sx, which wraps hs-tokenize output with the
# cursor + follow-set semantics upstream exposes on Tokens objects. ===
"matchToken consumes and returns on match": [
' (let ((s (hs-stream "foo bar baz")))',
' (assert= (get (hs-stream-match s "foo") :value) "foo")',
' (assert (nil? (hs-stream-match s "baz")))',
' (assert= (get (hs-stream-current s) :value) "bar")',
' (assert= (get (hs-stream-match s "bar") :value) "bar"))',
],
"matchToken honors the follow set": [
' (let ((s (hs-stream "and or not")))',
' (hs-stream-push-follow! s "and")',
' (assert (nil? (hs-stream-match s "and")))',
' (hs-stream-pop-follow! s)',
' (assert= (get (hs-stream-match s "and") :value) "and"))',
],
"matchTokenType matches by type": [
' (let ((s (hs-stream "foo 42")))',
' (assert= (get (hs-stream-match-type s "IDENTIFIER") :value) "foo")',
' (assert (nil? (hs-stream-match-type s "STRING")))',
' (assert= (get (hs-stream-match-type s "STRING" "NUMBER") :value) "42"))',
],
"matchOpToken matches operators by value": [
' (let ((s (hs-stream "1 + 2")))',
' (assert= (get (hs-stream-match-type s "NUMBER") :value) "1")',
' (assert= (get (hs-stream-match-any-op s "-" "+") :value) "+"))',
],
"matchAnyToken and matchAnyOpToken try each option": [
' (let ((s (hs-stream "bar + baz")))',
' (assert= (get (hs-stream-match-any s "foo" "bar" "baz") :value) "bar")',
' (assert= (get (hs-stream-match-any-op s "-" "+") :value) "+")',
' (assert (nil? (hs-stream-match-any s "foo" "quux"))))',
],
"peekToken skips whitespace when looking ahead": [
' (let ((s (hs-stream "for x in items")))',
' (assert= (get (hs-stream-peek s "for" 0) :value) "for")',
' (assert= (get (hs-stream-peek s "x" 1) :value) "x")',
' (assert= (get (hs-stream-peek s "in" 2) :value) "in")',
' (assert= (get (hs-stream-peek s "items" 3) :value) "items")',
' (assert (nil? (hs-stream-peek s "wrong" 1))))',
],
"consumeUntil collects tokens up to a marker": [
' (let ((s (hs-stream "a b c end d")))',
' (let ((collected (filter (fn (t) (not (= (get t :type) "whitespace")))',
' (hs-stream-consume-until s "end"))))',
' (assert= (map (fn (t) (get t :value)) collected) (list "a" "b" "c"))',
' (assert= (get (hs-stream-current s) :value) "end")))',
],
"consumeUntilWhitespace stops at first whitespace": [
' (let ((s (hs-stream "abc def")))',
' (let ((collected (hs-stream-consume-until-ws s)))',
' (assert= (len collected) 1)',
' (assert= (get (first collected) :value) "abc")',
' (assert= (get (hs-stream-current s) :value) "def")))',
],
"pushFollow/popFollow nest follow-set boundaries": [
' (let ((s (hs-stream "and or not")))',
' (hs-stream-push-follow! s "and")',
' (hs-stream-push-follow! s "or")',
' (assert (nil? (hs-stream-match s "and")))',
' (hs-stream-pop-follow! s)',
' (assert (nil? (hs-stream-match s "and")))',
' (hs-stream-pop-follow! s)',
' (assert= (get (hs-stream-match s "and") :value) "and"))',
],
"pushFollows/popFollows push and pop in bulk": [
' (let ((s (hs-stream "and or not")))',
' (hs-stream-push-follows! s (list "and" "or"))',
' (assert (nil? (hs-stream-match s "and")))',
' (assert (nil? (hs-stream-match s "or")))',
' (hs-stream-pop-follows! s 2)',
' (assert= (get (hs-stream-match s "and") :value) "and"))',
],
"clearFollows/restoreFollows round-trip the follow set": [
' (let ((s (hs-stream "and or not")))',
' (hs-stream-push-follow! s "and")',
' (hs-stream-push-follow! s "or")',
' (let ((saved (hs-stream-clear-follows! s)))',
' (assert= (get (hs-stream-match s "and") :value) "and")',
' (hs-stream-restore-follows! s saved)',
' (assert (nil? (hs-stream-match s "or")))))',
],
"lastMatch returns the last consumed token": [
' (let ((s (hs-stream "foo bar baz")))',
' (hs-stream-match s "foo")',
' (assert= (get (hs-stream-last-match s) :value) "foo")',
' (hs-stream-match s "bar")',
' (assert= (get (hs-stream-last-match s) :value) "bar"))',
],
"lastWhitespace reflects whitespace before the current token": [
' (let ((s (hs-stream "foo bar")))',
' (hs-stream-match s "foo")',
' (hs-stream-skip-ws! s)',
' (assert= (hs-stream-last-ws s) " "))',
],
# throttle: first click fires, subsequent within 200ms dropped.
# In the synchronous mock no time passes between two dom-dispatch calls.
"throttled at <time> drops events within the window": [