From a9eb821cce801c41c15d0c1f2884c08f2a43199d Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 8 May 2026 07:22:40 +0000 Subject: [PATCH] =?UTF-8?q?HS:=20tokenizer-stream=20API=20=E2=86=92=2013?= =?UTF-8?q?=20tests=20pass=20(-13=20skips)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/hyperscript/tokenizer.sx — added cursor + follow-set wrapper over the existing flat-list tokenize output: hs-stream src → {:tokens :pos :follows :last-match :last-ws} hs-stream-current s → next non-WS token (skips WS, captures :last-ws) hs-stream-match s value → consume if value matches & not in follow set hs-stream-match-type s ...types → consume if upstream type name matches hs-stream-match-any s ...names → consume if value matches any name hs-stream-match-any-op s ...ops → consume if op token & value matches hs-stream-peek s value n → look n non-WS tokens ahead, no consume hs-stream-consume-until s marker → collect tokens until marker hs-stream-consume-until-ws s → collect until next whitespace hs-stream-push-follow! / pop-follow! hs-stream-push-follows! / pop-follows! n hs-stream-clear-follows! → saved / restore-follows! saved hs-stream-last-match / last-ws hs-stream-type-map maps our lowercase type names to upstream's ("ident" → "IDENTIFIER", "number" → "NUMBER", etc.) so type-based matching works against upstream test expectations. 13 tokenizer-stream tests now pass; 30/30 in hs-upstream-core/tokenizer. Skips remaining: 5 (down from 18). - 2 template-component scope tests - 1 async event dispatch (until event keyword works) - left for later: needs more architectural work Co-Authored-By: Claude Sonnet 4.6 --- lib/hyperscript/tokenizer.sx | 228 +++++++++++++++++++++- spec/tests/test-hyperscript-behavioral.sx | 93 +++++++-- tests/hs-run-filtered.js | 19 -- tests/playwright/generate-sx-tests.py | 96 +++++++++ 4 files changed, 403 insertions(+), 33 deletions(-) diff --git a/lib/hyperscript/tokenizer.sx b/lib/hyperscript/tokenizer.sx index a8ac771b..14460159 100644 --- a/lib/hyperscript/tokenizer.sx +++ b/lib/hyperscript/tokenizer.sx @@ -813,4 +813,230 @@ :else (do (t-advance! 1) (scan-template!))))))) (scan-template!) (t-emit! "eof" nil) - tokens))) \ No newline at end of file + tokens))) + +;; ── Stream wrapper for upstream-style stateful tokenizer API ─────────────── +;; +;; Upstream _hyperscript exposes a Tokens object with cursor + follow-set +;; semantics on _hyperscript.internals.tokenizer. Our hs-tokenize returns a +;; flat list; the stream wrapper adds the stateful operations. +;; +;; Type names map ours → upstream's (e.g. "ident" → "IDENTIFIER"). + +(define + hs-stream-type-map + (fn + (t) + (cond + ((= t "ident") "IDENTIFIER") + ((= t "number") "NUMBER") + ((= t "string") "STRING") + ((= t "class") "CLASS_REF") + ((= t "id") "ID_REF") + ((= t "attr") "ATTRIBUTE_REF") + ((= t "style") "STYLE_REF") + ((= t "whitespace") "WHITESPACE") + ((= t "op") "OPERATOR") + ((= t "eof") "EOF") + (true (upcase t))))) + +;; Create a stream from a source string. +;; Returns a dict — mutable via dict-set!. +(define + hs-stream + (fn + (src) + {:tokens (hs-tokenize src) :pos 0 :follows (list) :last-match nil :last-ws nil})) + +;; Skip whitespace tokens, advancing pos to the next non-WS token. +;; Captures the last skipped whitespace value into :last-ws. +(define + hs-stream-skip-ws! + (fn + (s) + (let + ((tokens (get s :tokens))) + (define + loop + (fn + () + (let + ((p (get s :pos))) + (when + (and (< p (len tokens)) + (= (get (nth tokens p) :type) "whitespace")) + (do + (dict-set! s :last-ws (get (nth tokens p) :value)) + (dict-set! s :pos (+ p 1)) + (loop)))))) + (loop)))) + +;; Current token (after skipping whitespace). +(define + hs-stream-current + (fn + (s) + (do + (hs-stream-skip-ws! s) + (let + ((tokens (get s :tokens)) (p (get s :pos))) + (if (< p (len tokens)) (nth tokens p) nil))))) + +;; Returns the current token if its value matches; advances and updates +;; :last-match. Returns nil otherwise (no advance). +;; Honors the follow set: tokens whose value is in :follows do NOT match. +(define + hs-stream-match + (fn + (s value) + (let + ((cur (hs-stream-current s))) + (cond + ((nil? cur) nil) + ((some (fn (f) (= f value)) (get s :follows)) nil) + ((= (get cur :value) value) + (do + (dict-set! s :pos (+ (get s :pos) 1)) + (dict-set! s :last-match cur) + cur)) + (true nil))))) + +;; Match by upstream-style type name. Accepts any number of allowed types. +(define + hs-stream-match-type + (fn + (s &rest types) + (let + ((cur (hs-stream-current s))) + (cond + ((nil? cur) nil) + ((some (fn (t) (= (hs-stream-type-map (get cur :type)) t)) types) + (do + (dict-set! s :pos (+ (get s :pos) 1)) + (dict-set! s :last-match cur) + cur)) + (true nil))))) + +;; Match if value is one of the given names. +(define + hs-stream-match-any + (fn + (s &rest names) + (let + ((cur (hs-stream-current s))) + (cond + ((nil? cur) nil) + ((some (fn (n) (= (get cur :value) n)) names) + (do + (dict-set! s :pos (+ (get s :pos) 1)) + (dict-set! s :last-match cur) + cur)) + (true nil))))) + +;; Match an op token whose value is in the list. +(define + hs-stream-match-any-op + (fn + (s &rest ops) + (let + ((cur (hs-stream-current s))) + (cond + ((nil? cur) nil) + ((and (= (get cur :type) "op") + (some (fn (o) (= (get cur :value) o)) ops)) + (do + (dict-set! s :pos (+ (get s :pos) 1)) + (dict-set! s :last-match cur) + cur)) + (true nil))))) + +;; Peek N non-WS tokens ahead. Returns the token if its value matches; nil otherwise. +(define + hs-stream-peek + (fn + (s value offset) + (let + ((tokens (get s :tokens))) + (define + skip-n-non-ws + (fn + (p remaining) + (cond + ((>= p (len tokens)) -1) + ((= (get (nth tokens p) :type) "whitespace") + (skip-n-non-ws (+ p 1) remaining)) + ((= remaining 0) p) + (true (skip-n-non-ws (+ p 1) (- remaining 1)))))) + (let + ((p (skip-n-non-ws (get s :pos) offset))) + (if (and (>= p 0) (< p (len tokens)) + (= (get (nth tokens p) :value) value)) + (nth tokens p) + nil))))) + +;; Consume tokens until one whose value matches the marker. Returns +;; the consumed list (excluding the marker). Marker becomes current. +(define + hs-stream-consume-until + (fn + (s marker) + (let + ((tokens (get s :tokens)) (out (list))) + (define + loop + (fn + (acc) + (let + ((p (get s :pos))) + (cond + ((>= p (len tokens)) acc) + ((= (get (nth tokens p) :value) marker) acc) + (true + (do + (dict-set! s :pos (+ p 1)) + (loop (append acc (list (nth tokens p)))))))))) + (loop out)))) + +;; Consume until the next whitespace token; returns the consumed list. +(define + hs-stream-consume-until-ws + (fn + (s) + (let + ((tokens (get s :tokens))) + (define + loop + (fn + (acc) + (let + ((p (get s :pos))) + (cond + ((>= p (len tokens)) acc) + ((= (get (nth tokens p) :type) "whitespace") acc) + (true + (do + (dict-set! s :pos (+ p 1)) + (loop (append acc (list (nth tokens p)))))))))) + (loop (list))))) + +;; Follow-set management. +(define hs-stream-push-follow! (fn (s v) (dict-set! s :follows (cons v (get s :follows))))) +(define + hs-stream-pop-follow! + (fn (s) (let ((f (get s :follows))) (when (> (len f) 0) (dict-set! s :follows (rest f)))))) +(define + hs-stream-push-follows! + (fn (s vs) (for-each (fn (v) (hs-stream-push-follow! s v)) vs))) +(define + hs-stream-pop-follows! + (fn (s n) (when (> n 0) (do (hs-stream-pop-follow! s) (hs-stream-pop-follows! s (- n 1)))))) +(define + hs-stream-clear-follows! + (fn (s) (let ((saved (get s :follows))) (do (dict-set! s :follows (list)) saved)))) +(define + hs-stream-restore-follows! + (fn (s saved) (dict-set! s :follows saved))) + +;; Last-consumed token / whitespace. +(define hs-stream-last-match (fn (s) (get s :last-match))) +(define hs-stream-last-ws (fn (s) (get s :last-ws))) \ No newline at end of file diff --git a/spec/tests/test-hyperscript-behavioral.sx b/spec/tests/test-hyperscript-behavioral.sx index e88dfe07..9b3ce46f 100644 --- a/spec/tests/test-hyperscript-behavioral.sx +++ b/spec/tests/test-hyperscript-behavioral.sx @@ -2877,31 +2877,98 @@ (assert= (dom-text-content _el-div) "test${x} test 42 test$x test 42 test $x test ${x} test42 test_42 test_42 test-42 test.42") )) (deftest "clearFollows/restoreFollows round-trip the follow set" - (error "SKIP (untranslated): clearFollows/restoreFollows round-trip the follow set")) + (let ((s (hs-stream "and or not"))) + (hs-stream-push-follow! s "and") + (hs-stream-push-follow! s "or") + (let ((saved (hs-stream-clear-follows! s))) + (assert= (get (hs-stream-match s "and") :value) "and") + (hs-stream-restore-follows! s saved) + (assert (nil? (hs-stream-match s "or"))))) + ) (deftest "consumeUntil collects tokens up to a marker" - (error "SKIP (untranslated): consumeUntil collects tokens up to a marker")) + (let ((s (hs-stream "a b c end d"))) + (let ((collected (filter (fn (t) (not (= (get t :type) "whitespace"))) + (hs-stream-consume-until s "end")))) + (assert= (map (fn (t) (get t :value)) collected) (list "a" "b" "c")) + (assert= (get (hs-stream-current s) :value) "end"))) + ) (deftest "consumeUntilWhitespace stops at first whitespace" - (error "SKIP (untranslated): consumeUntilWhitespace stops at first whitespace")) + (let ((s (hs-stream "abc def"))) + (let ((collected (hs-stream-consume-until-ws s))) + (assert= (len collected) 1) + (assert= (get (first collected) :value) "abc") + (assert= (get (hs-stream-current s) :value) "def"))) + ) (deftest "lastMatch returns the last consumed token" - (error "SKIP (untranslated): lastMatch returns the last consumed token")) + (let ((s (hs-stream "foo bar baz"))) + (hs-stream-match s "foo") + (assert= (get (hs-stream-last-match s) :value) "foo") + (hs-stream-match s "bar") + (assert= (get (hs-stream-last-match s) :value) "bar")) + ) (deftest "lastWhitespace reflects whitespace before the current token" - (error "SKIP (untranslated): lastWhitespace reflects whitespace before the current token")) + (let ((s (hs-stream "foo bar"))) + (hs-stream-match s "foo") + (hs-stream-skip-ws! s) + (assert= (hs-stream-last-ws s) " ")) + ) (deftest "matchAnyToken and matchAnyOpToken try each option" - (error "SKIP (untranslated): matchAnyToken and matchAnyOpToken try each option")) + (let ((s (hs-stream "bar + baz"))) + (assert= (get (hs-stream-match-any s "foo" "bar" "baz") :value) "bar") + (assert= (get (hs-stream-match-any-op s "-" "+") :value) "+") + (assert (nil? (hs-stream-match-any s "foo" "quux")))) + ) (deftest "matchOpToken matches operators by value" - (error "SKIP (untranslated): matchOpToken matches operators by value")) + (let ((s (hs-stream "1 + 2"))) + (assert= (get (hs-stream-match-type s "NUMBER") :value) "1") + (assert= (get (hs-stream-match-any-op s "-" "+") :value) "+")) + ) (deftest "matchToken consumes and returns on match" - (error "SKIP (untranslated): matchToken consumes and returns on match")) + (let ((s (hs-stream "foo bar baz"))) + (assert= (get (hs-stream-match s "foo") :value) "foo") + (assert (nil? (hs-stream-match s "baz"))) + (assert= (get (hs-stream-current s) :value) "bar") + (assert= (get (hs-stream-match s "bar") :value) "bar")) + ) (deftest "matchToken honors the follow set" - (error "SKIP (untranslated): matchToken honors the follow set")) + (let ((s (hs-stream "and or not"))) + (hs-stream-push-follow! s "and") + (assert (nil? (hs-stream-match s "and"))) + (hs-stream-pop-follow! s) + (assert= (get (hs-stream-match s "and") :value) "and")) + ) (deftest "matchTokenType matches by type" - (error "SKIP (untranslated): matchTokenType matches by type")) + (let ((s (hs-stream "foo 42"))) + (assert= (get (hs-stream-match-type s "IDENTIFIER") :value) "foo") + (assert (nil? (hs-stream-match-type s "STRING"))) + (assert= (get (hs-stream-match-type s "STRING" "NUMBER") :value) "42")) + ) (deftest "peekToken skips whitespace when looking ahead" - (error "SKIP (untranslated): peekToken skips whitespace when looking ahead")) + (let ((s (hs-stream "for x in items"))) + (assert= (get (hs-stream-peek s "for" 0) :value) "for") + (assert= (get (hs-stream-peek s "x" 1) :value) "x") + (assert= (get (hs-stream-peek s "in" 2) :value) "in") + (assert= (get (hs-stream-peek s "items" 3) :value) "items") + (assert (nil? (hs-stream-peek s "wrong" 1)))) + ) (deftest "pushFollow/popFollow nest follow-set boundaries" - (error "SKIP (untranslated): pushFollow/popFollow nest follow-set boundaries")) + (let ((s (hs-stream "and or not"))) + (hs-stream-push-follow! s "and") + (hs-stream-push-follow! s "or") + (assert (nil? (hs-stream-match s "and"))) + (hs-stream-pop-follow! s) + (assert (nil? (hs-stream-match s "and"))) + (hs-stream-pop-follow! s) + (assert= (get (hs-stream-match s "and") :value) "and")) + ) (deftest "pushFollows/popFollows push and pop in bulk" - (error "SKIP (untranslated): pushFollows/popFollows push and pop in bulk")) + (let ((s (hs-stream "and or not"))) + (hs-stream-push-follows! s (list "and" "or")) + (assert (nil? (hs-stream-match s "and"))) + (assert (nil? (hs-stream-match s "or"))) + (hs-stream-pop-follows! s 2) + (assert= (get (hs-stream-match s "and") :value) "and")) + ) ) ;; ── def (27 tests) ── diff --git a/tests/hs-run-filtered.js b/tests/hs-run-filtered.js index 1f47bc4d..de7b56ba 100755 --- a/tests/hs-run-filtered.js +++ b/tests/hs-run-filtered.js @@ -967,25 +967,6 @@ for(let i=startTest;i // for HTML-template-based custom-element components. Our defcomp uses SX diff --git a/tests/playwright/generate-sx-tests.py b/tests/playwright/generate-sx-tests.py index b75db2c9..9003fdf6 100644 --- a/tests/playwright/generate-sx-tests.py +++ b/tests/playwright/generate-sx-tests.py @@ -109,6 +109,102 @@ SKIP_TEST_NAMES = { # Manually-written SX test bodies for tests whose upstream body cannot be # auto-translated. Key = test name; value = SX lines to emit inside deftest. MANUAL_TEST_BODIES = { + # === Tokenizer-stream API tests (13) — exercise hs-stream and friends in + # lib/hyperscript/tokenizer.sx, which wraps hs-tokenize output with the + # cursor + follow-set semantics upstream exposes on Tokens objects. === + "matchToken consumes and returns on match": [ + ' (let ((s (hs-stream "foo bar baz")))', + ' (assert= (get (hs-stream-match s "foo") :value) "foo")', + ' (assert (nil? (hs-stream-match s "baz")))', + ' (assert= (get (hs-stream-current s) :value) "bar")', + ' (assert= (get (hs-stream-match s "bar") :value) "bar"))', + ], + "matchToken honors the follow set": [ + ' (let ((s (hs-stream "and or not")))', + ' (hs-stream-push-follow! s "and")', + ' (assert (nil? (hs-stream-match s "and")))', + ' (hs-stream-pop-follow! s)', + ' (assert= (get (hs-stream-match s "and") :value) "and"))', + ], + "matchTokenType matches by type": [ + ' (let ((s (hs-stream "foo 42")))', + ' (assert= (get (hs-stream-match-type s "IDENTIFIER") :value) "foo")', + ' (assert (nil? (hs-stream-match-type s "STRING")))', + ' (assert= (get (hs-stream-match-type s "STRING" "NUMBER") :value) "42"))', + ], + "matchOpToken matches operators by value": [ + ' (let ((s (hs-stream "1 + 2")))', + ' (assert= (get (hs-stream-match-type s "NUMBER") :value) "1")', + ' (assert= (get (hs-stream-match-any-op s "-" "+") :value) "+"))', + ], + "matchAnyToken and matchAnyOpToken try each option": [ + ' (let ((s (hs-stream "bar + baz")))', + ' (assert= (get (hs-stream-match-any s "foo" "bar" "baz") :value) "bar")', + ' (assert= (get (hs-stream-match-any-op s "-" "+") :value) "+")', + ' (assert (nil? (hs-stream-match-any s "foo" "quux"))))', + ], + "peekToken skips whitespace when looking ahead": [ + ' (let ((s (hs-stream "for x in items")))', + ' (assert= (get (hs-stream-peek s "for" 0) :value) "for")', + ' (assert= (get (hs-stream-peek s "x" 1) :value) "x")', + ' (assert= (get (hs-stream-peek s "in" 2) :value) "in")', + ' (assert= (get (hs-stream-peek s "items" 3) :value) "items")', + ' (assert (nil? (hs-stream-peek s "wrong" 1))))', + ], + "consumeUntil collects tokens up to a marker": [ + ' (let ((s (hs-stream "a b c end d")))', + ' (let ((collected (filter (fn (t) (not (= (get t :type) "whitespace")))', + ' (hs-stream-consume-until s "end"))))', + ' (assert= (map (fn (t) (get t :value)) collected) (list "a" "b" "c"))', + ' (assert= (get (hs-stream-current s) :value) "end")))', + ], + "consumeUntilWhitespace stops at first whitespace": [ + ' (let ((s (hs-stream "abc def")))', + ' (let ((collected (hs-stream-consume-until-ws s)))', + ' (assert= (len collected) 1)', + ' (assert= (get (first collected) :value) "abc")', + ' (assert= (get (hs-stream-current s) :value) "def")))', + ], + "pushFollow/popFollow nest follow-set boundaries": [ + ' (let ((s (hs-stream "and or not")))', + ' (hs-stream-push-follow! s "and")', + ' (hs-stream-push-follow! s "or")', + ' (assert (nil? (hs-stream-match s "and")))', + ' (hs-stream-pop-follow! s)', + ' (assert (nil? (hs-stream-match s "and")))', + ' (hs-stream-pop-follow! s)', + ' (assert= (get (hs-stream-match s "and") :value) "and"))', + ], + "pushFollows/popFollows push and pop in bulk": [ + ' (let ((s (hs-stream "and or not")))', + ' (hs-stream-push-follows! s (list "and" "or"))', + ' (assert (nil? (hs-stream-match s "and")))', + ' (assert (nil? (hs-stream-match s "or")))', + ' (hs-stream-pop-follows! s 2)', + ' (assert= (get (hs-stream-match s "and") :value) "and"))', + ], + "clearFollows/restoreFollows round-trip the follow set": [ + ' (let ((s (hs-stream "and or not")))', + ' (hs-stream-push-follow! s "and")', + ' (hs-stream-push-follow! s "or")', + ' (let ((saved (hs-stream-clear-follows! s)))', + ' (assert= (get (hs-stream-match s "and") :value) "and")', + ' (hs-stream-restore-follows! s saved)', + ' (assert (nil? (hs-stream-match s "or")))))', + ], + "lastMatch returns the last consumed token": [ + ' (let ((s (hs-stream "foo bar baz")))', + ' (hs-stream-match s "foo")', + ' (assert= (get (hs-stream-last-match s) :value) "foo")', + ' (hs-stream-match s "bar")', + ' (assert= (get (hs-stream-last-match s) :value) "bar"))', + ], + "lastWhitespace reflects whitespace before the current token": [ + ' (let ((s (hs-stream "foo bar")))', + ' (hs-stream-match s "foo")', + ' (hs-stream-skip-ws! s)', + ' (assert= (hs-stream-last-ws s) " "))', + ], # throttle: first click fires, subsequent within 200ms dropped. # In the synchronous mock no time passes between two dom-dispatch calls. "throttled at