HS: tokenizer-stream API → 13 tests pass (-13 skips)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 54s

lib/hyperscript/tokenizer.sx — added cursor + follow-set wrapper over
the existing flat-list tokenize output:

  hs-stream src                 → {:tokens :pos :follows :last-match :last-ws}
  hs-stream-current  s          → next non-WS token (skips WS, captures :last-ws)
  hs-stream-match    s value    → consume if value matches & not in follow set
  hs-stream-match-type s ...types → consume if upstream type name matches
  hs-stream-match-any  s ...names → consume if value matches any name
  hs-stream-match-any-op s ...ops → consume if op token & value matches
  hs-stream-peek     s value n  → look n non-WS tokens ahead, no consume
  hs-stream-consume-until s marker     → collect tokens until marker
  hs-stream-consume-until-ws  s        → collect until next whitespace
  hs-stream-push-follow! / pop-follow!
  hs-stream-push-follows! / pop-follows! n
  hs-stream-clear-follows! → saved   /  restore-follows! saved
  hs-stream-last-match / last-ws

hs-stream-type-map maps our lowercase type names to upstream's
("ident" → "IDENTIFIER", "number" → "NUMBER", etc.) so type-based
matching works against upstream test expectations.

13 tokenizer-stream tests now pass; 30/30 in hs-upstream-core/tokenizer.

Skips remaining: 5 (down from 18).
  - 2 template-component scope tests
  - 1 async event dispatch (until event keyword works)
  - left for later: needs more architectural work

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-08 07:22:40 +00:00
parent d0b358eca2
commit a9eb821cce
4 changed files with 403 additions and 33 deletions

View File

@@ -813,4 +813,230 @@
:else (do (t-advance! 1) (scan-template!)))))))
(scan-template!)
(t-emit! "eof" nil)
tokens)))
tokens)))
;; ── Stream wrapper for upstream-style stateful tokenizer API ───────────────
;;
;; Upstream _hyperscript exposes a Tokens object with cursor + follow-set
;; semantics on _hyperscript.internals.tokenizer. Our hs-tokenize returns a
;; flat list; the stream wrapper adds the stateful operations.
;;
;; Type names map ours → upstream's (e.g. "ident" → "IDENTIFIER").
(define
hs-stream-type-map
(fn
(t)
(cond
((= t "ident") "IDENTIFIER")
((= t "number") "NUMBER")
((= t "string") "STRING")
((= t "class") "CLASS_REF")
((= t "id") "ID_REF")
((= t "attr") "ATTRIBUTE_REF")
((= t "style") "STYLE_REF")
((= t "whitespace") "WHITESPACE")
((= t "op") "OPERATOR")
((= t "eof") "EOF")
(true (upcase t)))))
;; Create a stream from a source string.
;; Returns a dict — mutable via dict-set!.
(define
hs-stream
(fn
(src)
{:tokens (hs-tokenize src) :pos 0 :follows (list) :last-match nil :last-ws nil}))
;; Skip whitespace tokens, advancing pos to the next non-WS token.
;; Captures the last skipped whitespace value into :last-ws.
(define
hs-stream-skip-ws!
(fn
(s)
(let
((tokens (get s :tokens)))
(define
loop
(fn
()
(let
((p (get s :pos)))
(when
(and (< p (len tokens))
(= (get (nth tokens p) :type) "whitespace"))
(do
(dict-set! s :last-ws (get (nth tokens p) :value))
(dict-set! s :pos (+ p 1))
(loop))))))
(loop))))
;; Current token (after skipping whitespace).
(define
hs-stream-current
(fn
(s)
(do
(hs-stream-skip-ws! s)
(let
((tokens (get s :tokens)) (p (get s :pos)))
(if (< p (len tokens)) (nth tokens p) nil)))))
;; Returns the current token if its value matches; advances and updates
;; :last-match. Returns nil otherwise (no advance).
;; Honors the follow set: tokens whose value is in :follows do NOT match.
(define
hs-stream-match
(fn
(s value)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((some (fn (f) (= f value)) (get s :follows)) nil)
((= (get cur :value) value)
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Match by upstream-style type name. Accepts any number of allowed types.
(define
hs-stream-match-type
(fn
(s &rest types)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((some (fn (t) (= (hs-stream-type-map (get cur :type)) t)) types)
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Match if value is one of the given names.
(define
hs-stream-match-any
(fn
(s &rest names)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((some (fn (n) (= (get cur :value) n)) names)
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Match an op token whose value is in the list.
(define
hs-stream-match-any-op
(fn
(s &rest ops)
(let
((cur (hs-stream-current s)))
(cond
((nil? cur) nil)
((and (= (get cur :type) "op")
(some (fn (o) (= (get cur :value) o)) ops))
(do
(dict-set! s :pos (+ (get s :pos) 1))
(dict-set! s :last-match cur)
cur))
(true nil)))))
;; Peek N non-WS tokens ahead. Returns the token if its value matches; nil otherwise.
(define
hs-stream-peek
(fn
(s value offset)
(let
((tokens (get s :tokens)))
(define
skip-n-non-ws
(fn
(p remaining)
(cond
((>= p (len tokens)) -1)
((= (get (nth tokens p) :type) "whitespace")
(skip-n-non-ws (+ p 1) remaining))
((= remaining 0) p)
(true (skip-n-non-ws (+ p 1) (- remaining 1))))))
(let
((p (skip-n-non-ws (get s :pos) offset)))
(if (and (>= p 0) (< p (len tokens))
(= (get (nth tokens p) :value) value))
(nth tokens p)
nil)))))
;; Consume tokens until one whose value matches the marker. Returns
;; the consumed list (excluding the marker). Marker becomes current.
(define
hs-stream-consume-until
(fn
(s marker)
(let
((tokens (get s :tokens)) (out (list)))
(define
loop
(fn
(acc)
(let
((p (get s :pos)))
(cond
((>= p (len tokens)) acc)
((= (get (nth tokens p) :value) marker) acc)
(true
(do
(dict-set! s :pos (+ p 1))
(loop (append acc (list (nth tokens p))))))))))
(loop out))))
;; Consume until the next whitespace token; returns the consumed list.
(define
hs-stream-consume-until-ws
(fn
(s)
(let
((tokens (get s :tokens)))
(define
loop
(fn
(acc)
(let
((p (get s :pos)))
(cond
((>= p (len tokens)) acc)
((= (get (nth tokens p) :type) "whitespace") acc)
(true
(do
(dict-set! s :pos (+ p 1))
(loop (append acc (list (nth tokens p))))))))))
(loop (list)))))
;; Follow-set management.
(define hs-stream-push-follow! (fn (s v) (dict-set! s :follows (cons v (get s :follows)))))
(define
hs-stream-pop-follow!
(fn (s) (let ((f (get s :follows))) (when (> (len f) 0) (dict-set! s :follows (rest f))))))
(define
hs-stream-push-follows!
(fn (s vs) (for-each (fn (v) (hs-stream-push-follow! s v)) vs)))
(define
hs-stream-pop-follows!
(fn (s n) (when (> n 0) (do (hs-stream-pop-follow! s) (hs-stream-pop-follows! s (- n 1))))))
(define
hs-stream-clear-follows!
(fn (s) (let ((saved (get s :follows))) (do (dict-set! s :follows (list)) saved))))
(define
hs-stream-restore-follows!
(fn (s saved) (dict-set! s :follows saved)))
;; Last-consumed token / whitespace.
(define hs-stream-last-match (fn (s) (get s :last-match)))
(define hs-stream-last-ws (fn (s) (get s :last-ws)))