HS E37: tokenizer-as-API 17/17 (+fixes)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 16s

- runtime.sx: fix extra ) in hs-tokens-of (parse error); add hs-eof-sentinel,
  hs-raw->api-token, hs-normalize-raw-tokens, hs-tokens-of, stream helpers,
  hs-token-type/value/op?; add \$ escape to hs-template
- tokenizer.sx: fix read-number double-dot bug (1.1.1 → 3 tokens); fix t-emit!
  eof call (3→2 args); add bare $ case to scan-template!
- compiler.sx: add \$ escape to tpl-collect template interpolation
- generate-sx-tests.py: preserve \$ in process_hs_val; add generate_tokenizer_test
- regen spec/tests/test-hyperscript-behavioral.sx: 17 tokenizer tests generated
- plans/hs-conformance-to-100.md: row 37 marked done +17

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-26 09:54:59 +00:00
parent 3003c8a069
commit 880503e2b6
9 changed files with 974 additions and 77 deletions

View File

@@ -1254,7 +1254,9 @@ def process_hs_val(hs_val):
hs_val = hs_val.replace('\\n', '\n').replace('\\t', ' ')
# Preserve escaped quotes (\" → placeholder), strip remaining backslashes, restore
hs_val = hs_val.replace('\\"', '\x00QUOT\x00')
hs_val = hs_val.replace('\\$', '\x00DOLLAR\x00') # preserve \$ template escape
hs_val = hs_val.replace('\\', '')
hs_val = hs_val.replace('\x00DOLLAR\x00', '\\$') # restore \$
hs_val = hs_val.replace('\x00QUOT\x00', '\\"')
# Strip line comments BEFORE newline collapse — once newlines become `then`,
# an unterminated `//` / ` --` comment would consume the rest of the input.
@@ -1838,6 +1840,272 @@ def extract_hs_expr(raw):
return expr
def generate_tokenizer_test(test, safe_name):
"""Hardcoded SX translation for _hyperscript.internals.tokenizer tests (E37)."""
name = test['name']
def to_(src, tmpl=False):
"""Return (hs-tokens-of <sx-str> [:template]) for HS source string src."""
escaped = (src
.replace('\\', '\\\\')
.replace('"', '\\"')
.replace('\n', '\\n')
.replace('\r', '\\r')
.replace('\t', '\\t'))
q = '"' + escaped + '"'
suffix = ' :template' if tmpl else ''
return f'(hs-tokens-of {q}{suffix})'
def consume(s):
return f'(hs-stream-consume {s})'
def tok_i(s, i):
return f'(hs-stream-token {s} {i})'
def has_more(s):
return f'(hs-stream-has-more {s})'
def t_type(t):
return f'(hs-token-type {t})'
def t_val(t):
return f'(hs-token-value {t})'
def t_op(t):
return f'(hs-token-op? {t})'
def nth_list(s, i):
return f'(nth (get {s} "list") {i})'
def list_len(s):
return f'(len (get {s} "list"))'
def ae(actual, expected):
return f' (assert= {actual} {expected})'
def throws(expr):
return (
f' (let ((threw false))\n'
f' (guard (e (true (set! threw true))) {expr})\n'
f' (assert threw))'
)
lines = [f' (deftest "{safe_name}"']
if name == 'handles $ in template properly':
s = to_('"', tmpl=True)
lines.append(ae(t_val(tok_i(s, 0)), sx_str('"')))
elif name == 'handles all special escapes properly':
for src, exp in [
('"\\b"', '(char-from-code 8)'),
('"\\f"', '(char-from-code 12)'),
('"\\n"', '"\\n"'),
('"\\r"', '"\\r"'),
('"\\t"', '"\\t"'),
('"\\v"', '(char-from-code 11)'),
]:
lines.append(ae(t_val(consume(to_(src))), exp))
elif name == 'handles basic token types':
lines.append(ae(t_type(consume(to_('foo'))), '"IDENTIFIER"'))
lines.append(ae(t_type(consume(to_('1'))), '"NUMBER"'))
for src in ['1.1', '1e6', '1e-6', '1.1e6', '1.1e-6']:
sq = to_(src)
lines.append(f' (let ((s {sq}))')
lines.append(f' (let ((tok (hs-stream-consume s)))')
lines.append(f' (assert= (hs-token-type tok) "NUMBER")')
lines.append(f' (assert= (hs-stream-has-more s) false)))')
lines.append(ae(t_type(consume(to_('.a'))), '"CLASS_REF"'))
lines.append(ae(t_type(consume(to_('#a'))), '"ID_REF"'))
lines.append(ae(t_type(consume(to_('"asdf"'))), '"STRING"'))
elif name == 'handles class identifiers properly':
for src, idx, exp_type, exp_val in [
('.a', None, 'CLASS_REF', '.a'),
(' .a', None, 'CLASS_REF', '.a'),
('a.a', None, 'IDENTIFIER', 'a'),
('(a).a', 4, 'IDENTIFIER', 'a'),
('{a}.a', 4, 'IDENTIFIER', 'a'),
('[a].a', 4, 'IDENTIFIER', 'a'),
('(a(.a', 3, 'CLASS_REF', '.a'),
('{a{.a', 3, 'CLASS_REF', '.a'),
('[a[.a', 3, 'CLASS_REF', '.a'),
]:
if idx is None:
tok_expr = consume(to_(src))
else:
tok_expr = nth_list(to_(src), idx)
lines.append(ae(t_type(tok_expr), f'"{exp_type}"'))
lines.append(ae(t_val(tok_expr), sx_str(exp_val)))
elif name == 'handles comments properly':
for src, expected in [
('--', 0),
('asdf--', 1),
('-- asdf', 0),
('--\nasdf', 1),
('--\nasdf--', 1),
('---asdf', 0),
('----\n---asdf', 0),
('----asdf----', 0),
('---\nasdf---', 1),
('// asdf', 0),
('///asdf', 0),
('asdf//', 1),
('asdf\n//', 2),
]:
lines.append(ae(list_len(to_(src)), str(expected)))
elif name == 'handles hex escapes properly':
lines.append(ae(t_val(consume(to_('"\\x1f"'))), '(char-from-code 31)'))
lines.append(ae(t_val(consume(to_('"\\x41"'))), '"A"'))
lines.append(ae(t_val(consume(to_('"\\x41\\x61"'))), '"Aa"'))
for bad in ['"\\x"', '"\\xGG"', '"\\x4"']:
lines.append(throws(consume(to_(bad))))
elif name == 'handles id references properly':
for src, idx, exp_type, exp_val in [
('#a', None, 'ID_REF', '#a'),
(' #a', None, 'ID_REF', '#a'),
('a#a', None, 'IDENTIFIER', 'a'),
('(a)#a', 4, 'IDENTIFIER', 'a'),
('{a}#a', 4, 'IDENTIFIER', 'a'),
('[a]#a', 4, 'IDENTIFIER', 'a'),
('(a(#a', 3, 'ID_REF', '#a'),
('{a{#a', 3, 'ID_REF', '#a'),
('[a[#a', 3, 'ID_REF', '#a'),
]:
if idx is None:
tok_expr = consume(to_(src))
else:
tok_expr = nth_list(to_(src), idx)
lines.append(ae(t_type(tok_expr), f'"{exp_type}"'))
lines.append(ae(t_val(tok_expr), sx_str(exp_val)))
elif name == 'handles identifiers properly':
lines.append(ae(t_type(consume(to_('foo'))), '"IDENTIFIER"'))
lines.append(ae(t_val(consume(to_('foo'))), '"foo"'))
lines.append(ae(t_type(consume(to_(' foo '))), '"IDENTIFIER"'))
lines.append(ae(t_val(consume(to_(' foo '))), '"foo"'))
for src, v1, v2 in [
(' foo bar', 'foo', 'bar'),
(' foo\n-- a comment\n bar', 'foo', 'bar'),
]:
sq = to_(src)
lines.append(f' (let ((s {sq}))')
lines.append(f' (let ((tok1 (hs-stream-consume s)))')
lines.append(f' (assert= (hs-token-type tok1) "IDENTIFIER")')
lines.append(f' (assert= (hs-token-value tok1) {sx_str(v1)})')
lines.append(f' (let ((tok2 (hs-stream-consume s)))')
lines.append(f' (assert= (hs-token-type tok2) "IDENTIFIER")')
lines.append(f' (assert= (hs-token-value tok2) {sx_str(v2)}))))')
elif name == 'handles identifiers with numbers properly':
for src in ['f1oo', 'fo1o', 'foo1']:
lines.append(ae(t_type(consume(to_(src))), '"IDENTIFIER"'))
lines.append(ae(t_val(consume(to_(src))), sx_str(src)))
elif name == 'handles look ahead property':
s = to_('a 1 + 1')
for i, v in [(0, 'a'), (1, '1'), (2, '+'), (3, '1'), (4, '<<<EOF>>>')]:
lines.append(ae(t_val(tok_i(s, i)), sx_str(v)))
elif name == 'handles numbers properly':
for src, v in [
('1', '1'),
('1.1', '1.1'),
('1234567890.1234567890', '1234567890.1234567890'),
('1e6', '1e6'),
('1e-6', '1e-6'),
('1.1e6', '1.1e6'),
('1.1e-6', '1.1e-6'),
]:
lines.append(ae(t_type(consume(to_(src))), '"NUMBER"'))
lines.append(ae(t_val(consume(to_(src))), sx_str(v)))
s = to_('1.1.1')
toks = f'(get {s} "list")'
lines.append(ae(f'(hs-token-type (nth {toks} 0))', '"NUMBER"'))
lines.append(ae(f'(hs-token-type (nth {toks} 1))', '"PERIOD"'))
lines.append(ae(f'(hs-token-type (nth {toks} 2))', '"NUMBER"'))
lines.append(ae(f'(len {toks})', '3'))
elif name == 'handles operators properly':
optable = [
('+', 'PLUS'), ('-', 'MINUS'), ('*', 'MULTIPLY'),
('.', 'PERIOD'), ('\\', 'BACKSLASH'), (':', 'COLON'),
('%', 'PERCENT'), ('|', 'PIPE'), ('!', 'EXCLAMATION'),
('?', 'QUESTION'), ('#', 'POUND'), ('&', 'AMPERSAND'),
(';', 'SEMI'), (',', 'COMMA'), ('(', 'L_PAREN'),
(')', 'R_PAREN'), ('<', 'L_ANG'), ('>', 'R_ANG'),
('{', 'L_BRACE'), ('}', 'R_BRACE'), ('[', 'L_BRACKET'),
(']', 'R_BRACKET'), ('=', 'EQUALS'),
('<=', 'LTE_ANG'), ('>=', 'GTE_ANG'),
('==', 'EQ'), ('===', 'EQQ'),
]
for op_char, _op_name in optable:
tok_expr = consume(to_(op_char))
lines.append(ae(t_op(tok_expr), 'true'))
lines.append(ae(t_val(tok_expr), sx_str(op_char)))
elif name == 'handles strings properly':
for src, v in [
('"foo"', 'foo'),
('"fo\'o"', "fo'o"),
('"fo\\"o"', 'fo"o'),
("'foo'", 'foo'),
("'fo\"o'", 'fo"o'),
("'fo\\'o'", "fo'o"),
]:
lines.append(ae(t_type(consume(to_(src))), '"STRING"'))
lines.append(ae(t_val(consume(to_(src))), sx_str(v)))
lines.append(throws(consume(to_("'"))))
lines.append(throws(consume(to_('"'))))
elif name == 'handles strings properly 2':
tok_expr = consume(to_("'foo'"))
lines.append(ae(t_type(tok_expr), '"STRING"'))
lines.append(ae(t_val(tok_expr), '"foo"'))
elif name == 'handles template bootstrap properly':
s1 = to_('"', tmpl=True)
lines.append(ae(t_val(tok_i(s1, 0)), sx_str('"')))
s2 = to_('"$', tmpl=True)
lines.append(ae(t_val(tok_i(s2, 0)), sx_str('"')))
lines.append(ae(t_val(tok_i(s2, 1)), '"$"'))
s3 = to_('"${', tmpl=True)
lines.append(ae(t_val(tok_i(s3, 0)), sx_str('"')))
lines.append(ae(t_val(tok_i(s3, 1)), '"$"'))
lines.append(ae(t_val(tok_i(s3, 2)), '"{"'))
s4 = to_('"${"asdf"', tmpl=True)
lines.append(ae(t_val(tok_i(s4, 0)), sx_str('"')))
lines.append(ae(t_val(tok_i(s4, 1)), '"$"'))
lines.append(ae(t_val(tok_i(s4, 2)), '"{"'))
lines.append(ae(t_val(tok_i(s4, 3)), '"asdf"'))
s5 = to_('"${"asdf"}"', tmpl=True)
lines.append(ae(t_val(tok_i(s5, 0)), sx_str('"')))
lines.append(ae(t_val(tok_i(s5, 1)), '"$"'))
lines.append(ae(t_val(tok_i(s5, 2)), '"{"'))
lines.append(ae(t_val(tok_i(s5, 3)), '"asdf"'))
lines.append(ae(t_val(tok_i(s5, 4)), '"}"'))
lines.append(ae(t_val(tok_i(s5, 5)), sx_str('"')))
elif name == 'handles whitespace properly':
for src, expected in [
(' ', 0), (' asdf', 1), (' asdf ', 2), ('asdf ', 2),
('\n', 0), ('\nasdf', 1), ('\nasdf\n', 2), ('asdf\n', 2),
('\r', 0), ('\rasdf', 1), ('\rasdf\r', 2), ('asdf\r', 2),
('\t', 0), ('\tasdf', 1), ('\tasdf\t', 2), ('asdf\t', 2),
]:
lines.append(ae(list_len(to_(src)), str(expected)))
else:
return None # not a tokenizer test we handle
lines.append(' )')
return '\n'.join(lines)
def generate_eval_only_test(test, idx):
"""Generate SX deftest for no-HTML tests using eval-hs.
Handles patterns:
@@ -2015,6 +2283,9 @@ def generate_eval_only_test(test, idx):
f' )'
)
if '_hyperscript.internals.tokenizer' in body:
return generate_tokenizer_test(test, safe_name)
lines.append(f' (deftest "{safe_name}"')
assertions = []