Merge branch 'loops/hs' into hs-f (E37 tokenizer, E40 fetch, DOM ref-eq, DOM tree fixes)

2026-04-26 17:57:37 +00:00
parent 61c9697f67 41fac7ac29
commit c36fd5b208
10 changed files with 3556 additions and 3057 deletions
--- a/tests/playwright/generate-sx-tests.py
+++ b/tests/playwright/generate-sx-tests.py
@@ -125,19 +125,9 @@ SKIP_TEST_NAMES = {
    "can ignore when target doesn't exist",
    "can ignore when target doesn\\'t exist",
    "can handle an or after a from clause",
-    # upstream 'fetch' category — depend on per-test sinon stubs for 404 / thrown errors,
-    # or on real DocumentFragment semantics (`its childElementCount` after `as html`).
-    # Our generic test-runner mock returns a fixed 200 response, so these cases
-    # (non-2xx handling, error path, before-fetch event, real DOM fragment) can't be
-    # exercised here.
+    # upstream 'fetch' category — real DocumentFragment semantics (`its childElementCount`
+    # after `as html`) not exercisable with our DOM mock.
    "can do a simple fetch w/ html",
-    "triggers an event just before fetching",
-    "can catch an error that occurs when using fetch",
-    "throws on non-2xx response by default",
-    "do not throw passes through 404 response",
-    "don't throw passes through 404 response",
-    "as response does not throw on 404",
-    "Response can be converted to JSON via as JSON",
 }

 # Manually-written SX test bodies for tests whose upstream body cannot be
@@ -249,11 +239,18 @@ def parse_html(html):
    # button HTML in `properly processes hyperscript X` tests). HTMLParser handles
    # backslashes in attribute values as literal characters, so we leave them.

+    # HTML5 void elements — never have children, auto-pop from stack immediately.
+    VOID_TAGS = {'area','base','br','col','embed','hr','img','input','link',
+                 'meta','param','source','track','wbr'}
+
    elements = []
    stack = []

    class Parser(HTMLParser):
        def handle_starttag(self, tag, attrs):
+            # Pop any void elements left on the stack (they have no close tag).
+            while stack and stack[-1]['tag'] in VOID_TAGS:
+                stack.pop()
            el = {
                'tag': tag, 'id': None, 'classes': [], 'hs': None,
                'attrs': {}, 'inner': '', 'depth': len(stack),
@@ -283,6 +280,9 @@ def parse_html(html):
            elements.append(el)

        def handle_endtag(self, tag):
+            # Pop void elements first (they don't have close tags but may linger).
+            while stack and stack[-1]['tag'] in VOID_TAGS:
+                stack.pop()
            if stack and stack[-1]['tag'] == tag:
                stack.pop()

@@ -1002,6 +1002,24 @@ def parse_dev_body(body, elements, var_names):
                    else:
                        pre_setups.append(('__hs_config__', op_expr))
                continue
+            # window.addEventListener(EVT, (param) => { param.target.PROP = 'VAL'; })
+            wa = re.search(
+                r"window\.addEventListener\(\s*(['\"])([^'\"]+)\1\s*,\s*"
+                r"\((\w+)\)\s*=>\s*\{\s*\3\.target\.(\w+)\s*=\s*['\"]([^'\"]+)['\"]\s*;?\s*\}",
+                m.group(1),
+            )
+            if wa:
+                ev_name = wa.group(2)
+                prop = wa.group(4)
+                val = wa.group(5)
+                attr = 'class' if prop == 'className' else prop
+                sx = (f'(host-call (host-global "window") "addEventListener" "{ev_name}" '
+                      f'(fn (_event) (dom-set-attr (host-get _event "target") "{attr}" "{val}")))')
+                if seen_html:
+                    ops.append(sx)
+                else:
+                    pre_setups.append(('__hs_config__', sx))
+                continue
            # fall through

        # evaluate(() => _hyperscript.config.X = ...) single-line variant.
@@ -1293,7 +1311,9 @@ def process_hs_val(hs_val):
    hs_val = hs_val.replace('\\n', '\n').replace('\\t', ' ')
    # Preserve escaped quotes (\" → placeholder), strip remaining backslashes, restore
    hs_val = hs_val.replace('\\"', '\x00QUOT\x00')
+    hs_val = hs_val.replace('\\$', '\x00DOLLAR\x00')  # preserve \$ template escape
    hs_val = hs_val.replace('\\', '')
+    hs_val = hs_val.replace('\x00DOLLAR\x00', '\\$')  # restore \$
    hs_val = hs_val.replace('\x00QUOT\x00', '\\"')
    # Strip line comments BEFORE newline collapse — once newlines become `then`,
    # an unterminated `//` / ` --` comment would consume the rest of the input.
@@ -1705,6 +1725,13 @@ def js_expr_to_sx(expr):
            if s is None:
                return None
            arg_sx.append(s)
+        # Translate common array HO methods to SX primitives so SX lists work.
+        if method == 'reduce' and len(arg_sx) == 2:
+            return f'(reduce {arg_sx[0]} {arg_sx[1]} {obj})'
+        if method == 'map' and len(arg_sx) == 1:
+            return f'(map {arg_sx[0]} {obj})'
+        if method == 'filter' and len(arg_sx) == 1:
+            return f'(filter {arg_sx[0]} {obj})'
        return f'(host-call {obj} "{method}" {" ".join(arg_sx)})'.strip()

    # Property access: o.prop
@@ -1877,6 +1904,272 @@ def extract_hs_expr(raw):
    return expr


+def generate_tokenizer_test(test, safe_name):
+    """Hardcoded SX translation for _hyperscript.internals.tokenizer tests (E37)."""
+    name = test['name']
+
+    def to_(src, tmpl=False):
+        """Return (hs-tokens-of <sx-str> [:template]) for HS source string src."""
+        escaped = (src
+                   .replace('\\', '\\\\')
+                   .replace('"', '\\"')
+                   .replace('\n', '\\n')
+                   .replace('\r', '\\r')
+                   .replace('\t', '\\t'))
+        q = '"' + escaped + '"'
+        suffix = ' :template' if tmpl else ''
+        return f'(hs-tokens-of {q}{suffix})'
+
+    def consume(s):
+        return f'(hs-stream-consume {s})'
+
+    def tok_i(s, i):
+        return f'(hs-stream-token {s} {i})'
+
+    def has_more(s):
+        return f'(hs-stream-has-more {s})'
+
+    def t_type(t):
+        return f'(hs-token-type {t})'
+
+    def t_val(t):
+        return f'(hs-token-value {t})'
+
+    def t_op(t):
+        return f'(hs-token-op? {t})'
+
+    def nth_list(s, i):
+        return f'(nth (get {s} "list") {i})'
+
+    def list_len(s):
+        return f'(len (get {s} "list"))'
+
+    def ae(actual, expected):
+        return f'    (assert= {actual} {expected})'
+
+    def throws(expr):
+        return (
+            f'    (let ((threw false))\n'
+            f'      (guard (e (true (set! threw true))) {expr})\n'
+            f'      (assert threw))'
+        )
+
+    lines = [f'  (deftest "{safe_name}"']
+
+    if name == 'handles $ in template properly':
+        s = to_('"', tmpl=True)
+        lines.append(ae(t_val(tok_i(s, 0)), sx_str('"')))
+
+    elif name == 'handles all special escapes properly':
+        for src, exp in [
+            ('"\\b"',  '(char-from-code 8)'),
+            ('"\\f"',  '(char-from-code 12)'),
+            ('"\\n"',  '"\\n"'),
+            ('"\\r"',  '"\\r"'),
+            ('"\\t"',  '"\\t"'),
+            ('"\\v"',  '(char-from-code 11)'),
+        ]:
+            lines.append(ae(t_val(consume(to_(src))), exp))
+
+    elif name == 'handles basic token types':
+        lines.append(ae(t_type(consume(to_('foo'))), '"IDENTIFIER"'))
+        lines.append(ae(t_type(consume(to_('1'))), '"NUMBER"'))
+        for src in ['1.1', '1e6', '1e-6', '1.1e6', '1.1e-6']:
+            sq = to_(src)
+            lines.append(f'    (let ((s {sq}))')
+            lines.append(f'      (let ((tok (hs-stream-consume s)))')
+            lines.append(f'        (assert= (hs-token-type tok) "NUMBER")')
+            lines.append(f'        (assert= (hs-stream-has-more s) false)))')
+        lines.append(ae(t_type(consume(to_('.a'))), '"CLASS_REF"'))
+        lines.append(ae(t_type(consume(to_('#a'))), '"ID_REF"'))
+        lines.append(ae(t_type(consume(to_('"asdf"'))), '"STRING"'))
+
+    elif name == 'handles class identifiers properly':
+        for src, idx, exp_type, exp_val in [
+            ('.a',    None, 'CLASS_REF',  '.a'),
+            ('  .a',  None, 'CLASS_REF',  '.a'),
+            ('a.a',   None, 'IDENTIFIER', 'a'),
+            ('(a).a', 4,    'IDENTIFIER', 'a'),
+            ('{a}.a', 4,    'IDENTIFIER', 'a'),
+            ('[a].a', 4,    'IDENTIFIER', 'a'),
+            ('(a(.a', 3,    'CLASS_REF',  '.a'),
+            ('{a{.a', 3,    'CLASS_REF',  '.a'),
+            ('[a[.a', 3,    'CLASS_REF',  '.a'),
+        ]:
+            if idx is None:
+                tok_expr = consume(to_(src))
+            else:
+                tok_expr = nth_list(to_(src), idx)
+            lines.append(ae(t_type(tok_expr), f'"{exp_type}"'))
+            lines.append(ae(t_val(tok_expr),  sx_str(exp_val)))
+
+    elif name == 'handles comments properly':
+        for src, expected in [
+            ('--',           0),
+            ('asdf--',       1),
+            ('-- asdf',      0),
+            ('--\nasdf',     1),
+            ('--\nasdf--',   1),
+            ('---asdf',      0),
+            ('----\n---asdf', 0),
+            ('----asdf----', 0),
+            ('---\nasdf---', 1),
+            ('// asdf',      0),
+            ('///asdf',      0),
+            ('asdf//',       1),
+            ('asdf\n//',     2),
+        ]:
+            lines.append(ae(list_len(to_(src)), str(expected)))
+
+    elif name == 'handles hex escapes properly':
+        lines.append(ae(t_val(consume(to_('"\\x1f"'))), '(char-from-code 31)'))
+        lines.append(ae(t_val(consume(to_('"\\x41"'))), '"A"'))
+        lines.append(ae(t_val(consume(to_('"\\x41\\x61"'))), '"Aa"'))
+        for bad in ['"\\x"', '"\\xGG"', '"\\x4"']:
+            lines.append(throws(consume(to_(bad))))
+
+    elif name == 'handles id references properly':
+        for src, idx, exp_type, exp_val in [
+            ('#a',    None, 'ID_REF',    '#a'),
+            ('  #a',  None, 'ID_REF',    '#a'),
+            ('a#a',   None, 'IDENTIFIER', 'a'),
+            ('(a)#a', 4,    'IDENTIFIER', 'a'),
+            ('{a}#a', 4,    'IDENTIFIER', 'a'),
+            ('[a]#a', 4,    'IDENTIFIER', 'a'),
+            ('(a(#a', 3,    'ID_REF',    '#a'),
+            ('{a{#a', 3,    'ID_REF',    '#a'),
+            ('[a[#a', 3,    'ID_REF',    '#a'),
+        ]:
+            if idx is None:
+                tok_expr = consume(to_(src))
+            else:
+                tok_expr = nth_list(to_(src), idx)
+            lines.append(ae(t_type(tok_expr), f'"{exp_type}"'))
+            lines.append(ae(t_val(tok_expr),  sx_str(exp_val)))
+
+    elif name == 'handles identifiers properly':
+        lines.append(ae(t_type(consume(to_('foo'))),           '"IDENTIFIER"'))
+        lines.append(ae(t_val(consume(to_('foo'))),            '"foo"'))
+        lines.append(ae(t_type(consume(to_('     foo    '))), '"IDENTIFIER"'))
+        lines.append(ae(t_val(consume(to_('     foo    '))),  '"foo"'))
+        for src, v1, v2 in [
+            ('     foo    bar',                 'foo', 'bar'),
+            ('     foo\n-- a comment\n    bar', 'foo', 'bar'),
+        ]:
+            sq = to_(src)
+            lines.append(f'    (let ((s {sq}))')
+            lines.append(f'      (let ((tok1 (hs-stream-consume s)))')
+            lines.append(f'        (assert= (hs-token-type tok1) "IDENTIFIER")')
+            lines.append(f'        (assert= (hs-token-value tok1) {sx_str(v1)})')
+            lines.append(f'        (let ((tok2 (hs-stream-consume s)))')
+            lines.append(f'          (assert= (hs-token-type tok2) "IDENTIFIER")')
+            lines.append(f'          (assert= (hs-token-value tok2) {sx_str(v2)}))))')
+
+    elif name == 'handles identifiers with numbers properly':
+        for src in ['f1oo', 'fo1o', 'foo1']:
+            lines.append(ae(t_type(consume(to_(src))), '"IDENTIFIER"'))
+            lines.append(ae(t_val(consume(to_(src))),  sx_str(src)))
+
+    elif name == 'handles look ahead property':
+        s = to_('a 1 + 1')
+        for i, v in [(0, 'a'), (1, '1'), (2, '+'), (3, '1'), (4, '<<<EOF>>>')]:
+            lines.append(ae(t_val(tok_i(s, i)), sx_str(v)))
+
+    elif name == 'handles numbers properly':
+        for src, v in [
+            ('1',                     '1'),
+            ('1.1',                   '1.1'),
+            ('1234567890.1234567890', '1234567890.1234567890'),
+            ('1e6',                   '1e6'),
+            ('1e-6',                  '1e-6'),
+            ('1.1e6',                 '1.1e6'),
+            ('1.1e-6',               '1.1e-6'),
+        ]:
+            lines.append(ae(t_type(consume(to_(src))), '"NUMBER"'))
+            lines.append(ae(t_val(consume(to_(src))),  sx_str(v)))
+        s = to_('1.1.1')
+        toks = f'(get {s} "list")'
+        lines.append(ae(f'(hs-token-type (nth {toks} 0))', '"NUMBER"'))
+        lines.append(ae(f'(hs-token-type (nth {toks} 1))', '"PERIOD"'))
+        lines.append(ae(f'(hs-token-type (nth {toks} 2))', '"NUMBER"'))
+        lines.append(ae(f'(len {toks})', '3'))
+
+    elif name == 'handles operators properly':
+        optable = [
+            ('+',   'PLUS'),      ('-',   'MINUS'),    ('*',  'MULTIPLY'),
+            ('.',   'PERIOD'),    ('\\',  'BACKSLASH'), (':',  'COLON'),
+            ('%',   'PERCENT'),   ('|',   'PIPE'),      ('!',  'EXCLAMATION'),
+            ('?',   'QUESTION'),  ('#',   'POUND'),     ('&',  'AMPERSAND'),
+            (';',   'SEMI'),      (',',   'COMMA'),     ('(',  'L_PAREN'),
+            (')',   'R_PAREN'),   ('<',   'L_ANG'),     ('>',  'R_ANG'),
+            ('{',   'L_BRACE'),   ('}',   'R_BRACE'),   ('[',  'L_BRACKET'),
+            (']',   'R_BRACKET'), ('=',   'EQUALS'),
+            ('<=',  'LTE_ANG'),   ('>=',  'GTE_ANG'),
+            ('==',  'EQ'),        ('===', 'EQQ'),
+        ]
+        for op_char, _op_name in optable:
+            tok_expr = consume(to_(op_char))
+            lines.append(ae(t_op(tok_expr),  'true'))
+            lines.append(ae(t_val(tok_expr), sx_str(op_char)))
+
+    elif name == 'handles strings properly':
+        for src, v in [
+            ('"foo"',    'foo'),
+            ('"fo\'o"',  "fo'o"),
+            ('"fo\\"o"', 'fo"o'),
+            ("'foo'",    'foo'),
+            ("'fo\"o'",  'fo"o'),
+            ("'fo\\'o'", "fo'o"),
+        ]:
+            lines.append(ae(t_type(consume(to_(src))), '"STRING"'))
+            lines.append(ae(t_val(consume(to_(src))),  sx_str(v)))
+        lines.append(throws(consume(to_("'"))))
+        lines.append(throws(consume(to_('"'))))
+
+    elif name == 'handles strings properly 2':
+        tok_expr = consume(to_("'foo'"))
+        lines.append(ae(t_type(tok_expr), '"STRING"'))
+        lines.append(ae(t_val(tok_expr),  '"foo"'))
+
+    elif name == 'handles template bootstrap properly':
+        s1 = to_('"', tmpl=True)
+        lines.append(ae(t_val(tok_i(s1, 0)), sx_str('"')))
+        s2 = to_('"$', tmpl=True)
+        lines.append(ae(t_val(tok_i(s2, 0)), sx_str('"')))
+        lines.append(ae(t_val(tok_i(s2, 1)), '"$"'))
+        s3 = to_('"${', tmpl=True)
+        lines.append(ae(t_val(tok_i(s3, 0)), sx_str('"')))
+        lines.append(ae(t_val(tok_i(s3, 1)), '"$"'))
+        lines.append(ae(t_val(tok_i(s3, 2)), '"{"'))
+        s4 = to_('"${"asdf"', tmpl=True)
+        lines.append(ae(t_val(tok_i(s4, 0)), sx_str('"')))
+        lines.append(ae(t_val(tok_i(s4, 1)), '"$"'))
+        lines.append(ae(t_val(tok_i(s4, 2)), '"{"'))
+        lines.append(ae(t_val(tok_i(s4, 3)), '"asdf"'))
+        s5 = to_('"${"asdf"}"', tmpl=True)
+        lines.append(ae(t_val(tok_i(s5, 0)), sx_str('"')))
+        lines.append(ae(t_val(tok_i(s5, 1)), '"$"'))
+        lines.append(ae(t_val(tok_i(s5, 2)), '"{"'))
+        lines.append(ae(t_val(tok_i(s5, 3)), '"asdf"'))
+        lines.append(ae(t_val(tok_i(s5, 4)), '"}"'))
+        lines.append(ae(t_val(tok_i(s5, 5)), sx_str('"')))
+
+    elif name == 'handles whitespace properly':
+        for src, expected in [
+            ('   ', 0), ('  asdf', 1), ('  asdf  ', 2), ('asdf  ', 2),
+            ('\n',   0), ('\nasdf', 1), ('\nasdf\n', 2), ('asdf\n', 2),
+            ('\r',   0), ('\rasdf', 1), ('\rasdf\r', 2), ('asdf\r', 2),
+            ('\t',   0), ('\tasdf', 1), ('\tasdf\t', 2), ('asdf\t', 2),
+        ]:
+            lines.append(ae(list_len(to_(src)), str(expected)))
+
+    else:
+        return None  # not a tokenizer test we handle
+
+    lines.append('  )')
+    return '\n'.join(lines)
+
+
 def generate_eval_only_test(test, idx):
    """Generate SX deftest for no-HTML tests using eval-hs.
    Handles patterns:
@@ -2095,6 +2388,9 @@ def generate_eval_only_test(test, idx):
            f"    (assert= (hs-line-at \"{src}\" (list :true-branch :next)) \"    log 'it was true'\"))"
        )

+    if '_hyperscript.internals.tokenizer' in body:
+        return generate_tokenizer_test(test, safe_name)
+
    lines.append(f'  (deftest "{safe_name}"')

    assertions = []
@@ -2106,13 +2402,20 @@ def generate_eval_only_test(test, idx):
    def emit_eval(hs_expr, expected_sx, extra_locals=None):
        """Emit an assertion using eval-hs / eval-hs-locals / eval-hs-with-me
        as appropriate, given the window setups and any per-call locals.
+        Uses assert-equal (deep equal?) when expected contains dicts; assert= otherwise.
        """
        pairs = list(window_setups) + list(extra_locals or [])
+        # assert= uses = (reference equality for dicts); assert-equal uses equal? (deep)
+        use_deep = '{' in expected_sx
        if pairs:
            locals_sx = '(list ' + ' '.join(
                f'(list (quote {n}) {v})' for n, v in pairs
            ) + ')'
+            if use_deep:
+                return f'    (assert-equal {expected_sx} (eval-hs-locals "{hs_expr}" {locals_sx}))'
            return f'    (assert= (eval-hs-locals "{hs_expr}" {locals_sx}) {expected_sx})'
+        if use_deep:
+            return f'    (assert-equal {expected_sx} (eval-hs "{hs_expr}"))'
        return f'    (assert= (eval-hs "{hs_expr}") {expected_sx})'

    # Shared sub-pattern for run() call with optional String.raw and extra args: