HS E37: tokenizer-as-API 17/17 (+fixes)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 16s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 16s
- runtime.sx: fix extra ) in hs-tokens-of (parse error); add hs-eof-sentinel, hs-raw->api-token, hs-normalize-raw-tokens, hs-tokens-of, stream helpers, hs-token-type/value/op?; add \$ escape to hs-template - tokenizer.sx: fix read-number double-dot bug (1.1.1 → 3 tokens); fix t-emit! eof call (3→2 args); add bare $ case to scan-template! - compiler.sx: add \$ escape to tpl-collect template interpolation - generate-sx-tests.py: preserve \$ in process_hs_val; add generate_tokenizer_test - regen spec/tests/test-hyperscript-behavioral.sx: 17 tokenizer tests generated - plans/hs-conformance-to-100.md: row 37 marked done +17 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1254,7 +1254,9 @@ def process_hs_val(hs_val):
|
||||
hs_val = hs_val.replace('\\n', '\n').replace('\\t', ' ')
|
||||
# Preserve escaped quotes (\" → placeholder), strip remaining backslashes, restore
|
||||
hs_val = hs_val.replace('\\"', '\x00QUOT\x00')
|
||||
hs_val = hs_val.replace('\\$', '\x00DOLLAR\x00') # preserve \$ template escape
|
||||
hs_val = hs_val.replace('\\', '')
|
||||
hs_val = hs_val.replace('\x00DOLLAR\x00', '\\$') # restore \$
|
||||
hs_val = hs_val.replace('\x00QUOT\x00', '\\"')
|
||||
# Strip line comments BEFORE newline collapse — once newlines become `then`,
|
||||
# an unterminated `//` / ` --` comment would consume the rest of the input.
|
||||
@@ -1838,6 +1840,272 @@ def extract_hs_expr(raw):
|
||||
return expr
|
||||
|
||||
|
||||
def generate_tokenizer_test(test, safe_name):
|
||||
"""Hardcoded SX translation for _hyperscript.internals.tokenizer tests (E37)."""
|
||||
name = test['name']
|
||||
|
||||
def to_(src, tmpl=False):
|
||||
"""Return (hs-tokens-of <sx-str> [:template]) for HS source string src."""
|
||||
escaped = (src
|
||||
.replace('\\', '\\\\')
|
||||
.replace('"', '\\"')
|
||||
.replace('\n', '\\n')
|
||||
.replace('\r', '\\r')
|
||||
.replace('\t', '\\t'))
|
||||
q = '"' + escaped + '"'
|
||||
suffix = ' :template' if tmpl else ''
|
||||
return f'(hs-tokens-of {q}{suffix})'
|
||||
|
||||
def consume(s):
|
||||
return f'(hs-stream-consume {s})'
|
||||
|
||||
def tok_i(s, i):
|
||||
return f'(hs-stream-token {s} {i})'
|
||||
|
||||
def has_more(s):
|
||||
return f'(hs-stream-has-more {s})'
|
||||
|
||||
def t_type(t):
|
||||
return f'(hs-token-type {t})'
|
||||
|
||||
def t_val(t):
|
||||
return f'(hs-token-value {t})'
|
||||
|
||||
def t_op(t):
|
||||
return f'(hs-token-op? {t})'
|
||||
|
||||
def nth_list(s, i):
|
||||
return f'(nth (get {s} "list") {i})'
|
||||
|
||||
def list_len(s):
|
||||
return f'(len (get {s} "list"))'
|
||||
|
||||
def ae(actual, expected):
|
||||
return f' (assert= {actual} {expected})'
|
||||
|
||||
def throws(expr):
|
||||
return (
|
||||
f' (let ((threw false))\n'
|
||||
f' (guard (e (true (set! threw true))) {expr})\n'
|
||||
f' (assert threw))'
|
||||
)
|
||||
|
||||
lines = [f' (deftest "{safe_name}"']
|
||||
|
||||
if name == 'handles $ in template properly':
|
||||
s = to_('"', tmpl=True)
|
||||
lines.append(ae(t_val(tok_i(s, 0)), sx_str('"')))
|
||||
|
||||
elif name == 'handles all special escapes properly':
|
||||
for src, exp in [
|
||||
('"\\b"', '(char-from-code 8)'),
|
||||
('"\\f"', '(char-from-code 12)'),
|
||||
('"\\n"', '"\\n"'),
|
||||
('"\\r"', '"\\r"'),
|
||||
('"\\t"', '"\\t"'),
|
||||
('"\\v"', '(char-from-code 11)'),
|
||||
]:
|
||||
lines.append(ae(t_val(consume(to_(src))), exp))
|
||||
|
||||
elif name == 'handles basic token types':
|
||||
lines.append(ae(t_type(consume(to_('foo'))), '"IDENTIFIER"'))
|
||||
lines.append(ae(t_type(consume(to_('1'))), '"NUMBER"'))
|
||||
for src in ['1.1', '1e6', '1e-6', '1.1e6', '1.1e-6']:
|
||||
sq = to_(src)
|
||||
lines.append(f' (let ((s {sq}))')
|
||||
lines.append(f' (let ((tok (hs-stream-consume s)))')
|
||||
lines.append(f' (assert= (hs-token-type tok) "NUMBER")')
|
||||
lines.append(f' (assert= (hs-stream-has-more s) false)))')
|
||||
lines.append(ae(t_type(consume(to_('.a'))), '"CLASS_REF"'))
|
||||
lines.append(ae(t_type(consume(to_('#a'))), '"ID_REF"'))
|
||||
lines.append(ae(t_type(consume(to_('"asdf"'))), '"STRING"'))
|
||||
|
||||
elif name == 'handles class identifiers properly':
|
||||
for src, idx, exp_type, exp_val in [
|
||||
('.a', None, 'CLASS_REF', '.a'),
|
||||
(' .a', None, 'CLASS_REF', '.a'),
|
||||
('a.a', None, 'IDENTIFIER', 'a'),
|
||||
('(a).a', 4, 'IDENTIFIER', 'a'),
|
||||
('{a}.a', 4, 'IDENTIFIER', 'a'),
|
||||
('[a].a', 4, 'IDENTIFIER', 'a'),
|
||||
('(a(.a', 3, 'CLASS_REF', '.a'),
|
||||
('{a{.a', 3, 'CLASS_REF', '.a'),
|
||||
('[a[.a', 3, 'CLASS_REF', '.a'),
|
||||
]:
|
||||
if idx is None:
|
||||
tok_expr = consume(to_(src))
|
||||
else:
|
||||
tok_expr = nth_list(to_(src), idx)
|
||||
lines.append(ae(t_type(tok_expr), f'"{exp_type}"'))
|
||||
lines.append(ae(t_val(tok_expr), sx_str(exp_val)))
|
||||
|
||||
elif name == 'handles comments properly':
|
||||
for src, expected in [
|
||||
('--', 0),
|
||||
('asdf--', 1),
|
||||
('-- asdf', 0),
|
||||
('--\nasdf', 1),
|
||||
('--\nasdf--', 1),
|
||||
('---asdf', 0),
|
||||
('----\n---asdf', 0),
|
||||
('----asdf----', 0),
|
||||
('---\nasdf---', 1),
|
||||
('// asdf', 0),
|
||||
('///asdf', 0),
|
||||
('asdf//', 1),
|
||||
('asdf\n//', 2),
|
||||
]:
|
||||
lines.append(ae(list_len(to_(src)), str(expected)))
|
||||
|
||||
elif name == 'handles hex escapes properly':
|
||||
lines.append(ae(t_val(consume(to_('"\\x1f"'))), '(char-from-code 31)'))
|
||||
lines.append(ae(t_val(consume(to_('"\\x41"'))), '"A"'))
|
||||
lines.append(ae(t_val(consume(to_('"\\x41\\x61"'))), '"Aa"'))
|
||||
for bad in ['"\\x"', '"\\xGG"', '"\\x4"']:
|
||||
lines.append(throws(consume(to_(bad))))
|
||||
|
||||
elif name == 'handles id references properly':
|
||||
for src, idx, exp_type, exp_val in [
|
||||
('#a', None, 'ID_REF', '#a'),
|
||||
(' #a', None, 'ID_REF', '#a'),
|
||||
('a#a', None, 'IDENTIFIER', 'a'),
|
||||
('(a)#a', 4, 'IDENTIFIER', 'a'),
|
||||
('{a}#a', 4, 'IDENTIFIER', 'a'),
|
||||
('[a]#a', 4, 'IDENTIFIER', 'a'),
|
||||
('(a(#a', 3, 'ID_REF', '#a'),
|
||||
('{a{#a', 3, 'ID_REF', '#a'),
|
||||
('[a[#a', 3, 'ID_REF', '#a'),
|
||||
]:
|
||||
if idx is None:
|
||||
tok_expr = consume(to_(src))
|
||||
else:
|
||||
tok_expr = nth_list(to_(src), idx)
|
||||
lines.append(ae(t_type(tok_expr), f'"{exp_type}"'))
|
||||
lines.append(ae(t_val(tok_expr), sx_str(exp_val)))
|
||||
|
||||
elif name == 'handles identifiers properly':
|
||||
lines.append(ae(t_type(consume(to_('foo'))), '"IDENTIFIER"'))
|
||||
lines.append(ae(t_val(consume(to_('foo'))), '"foo"'))
|
||||
lines.append(ae(t_type(consume(to_(' foo '))), '"IDENTIFIER"'))
|
||||
lines.append(ae(t_val(consume(to_(' foo '))), '"foo"'))
|
||||
for src, v1, v2 in [
|
||||
(' foo bar', 'foo', 'bar'),
|
||||
(' foo\n-- a comment\n bar', 'foo', 'bar'),
|
||||
]:
|
||||
sq = to_(src)
|
||||
lines.append(f' (let ((s {sq}))')
|
||||
lines.append(f' (let ((tok1 (hs-stream-consume s)))')
|
||||
lines.append(f' (assert= (hs-token-type tok1) "IDENTIFIER")')
|
||||
lines.append(f' (assert= (hs-token-value tok1) {sx_str(v1)})')
|
||||
lines.append(f' (let ((tok2 (hs-stream-consume s)))')
|
||||
lines.append(f' (assert= (hs-token-type tok2) "IDENTIFIER")')
|
||||
lines.append(f' (assert= (hs-token-value tok2) {sx_str(v2)}))))')
|
||||
|
||||
elif name == 'handles identifiers with numbers properly':
|
||||
for src in ['f1oo', 'fo1o', 'foo1']:
|
||||
lines.append(ae(t_type(consume(to_(src))), '"IDENTIFIER"'))
|
||||
lines.append(ae(t_val(consume(to_(src))), sx_str(src)))
|
||||
|
||||
elif name == 'handles look ahead property':
|
||||
s = to_('a 1 + 1')
|
||||
for i, v in [(0, 'a'), (1, '1'), (2, '+'), (3, '1'), (4, '<<<EOF>>>')]:
|
||||
lines.append(ae(t_val(tok_i(s, i)), sx_str(v)))
|
||||
|
||||
elif name == 'handles numbers properly':
|
||||
for src, v in [
|
||||
('1', '1'),
|
||||
('1.1', '1.1'),
|
||||
('1234567890.1234567890', '1234567890.1234567890'),
|
||||
('1e6', '1e6'),
|
||||
('1e-6', '1e-6'),
|
||||
('1.1e6', '1.1e6'),
|
||||
('1.1e-6', '1.1e-6'),
|
||||
]:
|
||||
lines.append(ae(t_type(consume(to_(src))), '"NUMBER"'))
|
||||
lines.append(ae(t_val(consume(to_(src))), sx_str(v)))
|
||||
s = to_('1.1.1')
|
||||
toks = f'(get {s} "list")'
|
||||
lines.append(ae(f'(hs-token-type (nth {toks} 0))', '"NUMBER"'))
|
||||
lines.append(ae(f'(hs-token-type (nth {toks} 1))', '"PERIOD"'))
|
||||
lines.append(ae(f'(hs-token-type (nth {toks} 2))', '"NUMBER"'))
|
||||
lines.append(ae(f'(len {toks})', '3'))
|
||||
|
||||
elif name == 'handles operators properly':
|
||||
optable = [
|
||||
('+', 'PLUS'), ('-', 'MINUS'), ('*', 'MULTIPLY'),
|
||||
('.', 'PERIOD'), ('\\', 'BACKSLASH'), (':', 'COLON'),
|
||||
('%', 'PERCENT'), ('|', 'PIPE'), ('!', 'EXCLAMATION'),
|
||||
('?', 'QUESTION'), ('#', 'POUND'), ('&', 'AMPERSAND'),
|
||||
(';', 'SEMI'), (',', 'COMMA'), ('(', 'L_PAREN'),
|
||||
(')', 'R_PAREN'), ('<', 'L_ANG'), ('>', 'R_ANG'),
|
||||
('{', 'L_BRACE'), ('}', 'R_BRACE'), ('[', 'L_BRACKET'),
|
||||
(']', 'R_BRACKET'), ('=', 'EQUALS'),
|
||||
('<=', 'LTE_ANG'), ('>=', 'GTE_ANG'),
|
||||
('==', 'EQ'), ('===', 'EQQ'),
|
||||
]
|
||||
for op_char, _op_name in optable:
|
||||
tok_expr = consume(to_(op_char))
|
||||
lines.append(ae(t_op(tok_expr), 'true'))
|
||||
lines.append(ae(t_val(tok_expr), sx_str(op_char)))
|
||||
|
||||
elif name == 'handles strings properly':
|
||||
for src, v in [
|
||||
('"foo"', 'foo'),
|
||||
('"fo\'o"', "fo'o"),
|
||||
('"fo\\"o"', 'fo"o'),
|
||||
("'foo'", 'foo'),
|
||||
("'fo\"o'", 'fo"o'),
|
||||
("'fo\\'o'", "fo'o"),
|
||||
]:
|
||||
lines.append(ae(t_type(consume(to_(src))), '"STRING"'))
|
||||
lines.append(ae(t_val(consume(to_(src))), sx_str(v)))
|
||||
lines.append(throws(consume(to_("'"))))
|
||||
lines.append(throws(consume(to_('"'))))
|
||||
|
||||
elif name == 'handles strings properly 2':
|
||||
tok_expr = consume(to_("'foo'"))
|
||||
lines.append(ae(t_type(tok_expr), '"STRING"'))
|
||||
lines.append(ae(t_val(tok_expr), '"foo"'))
|
||||
|
||||
elif name == 'handles template bootstrap properly':
|
||||
s1 = to_('"', tmpl=True)
|
||||
lines.append(ae(t_val(tok_i(s1, 0)), sx_str('"')))
|
||||
s2 = to_('"$', tmpl=True)
|
||||
lines.append(ae(t_val(tok_i(s2, 0)), sx_str('"')))
|
||||
lines.append(ae(t_val(tok_i(s2, 1)), '"$"'))
|
||||
s3 = to_('"${', tmpl=True)
|
||||
lines.append(ae(t_val(tok_i(s3, 0)), sx_str('"')))
|
||||
lines.append(ae(t_val(tok_i(s3, 1)), '"$"'))
|
||||
lines.append(ae(t_val(tok_i(s3, 2)), '"{"'))
|
||||
s4 = to_('"${"asdf"', tmpl=True)
|
||||
lines.append(ae(t_val(tok_i(s4, 0)), sx_str('"')))
|
||||
lines.append(ae(t_val(tok_i(s4, 1)), '"$"'))
|
||||
lines.append(ae(t_val(tok_i(s4, 2)), '"{"'))
|
||||
lines.append(ae(t_val(tok_i(s4, 3)), '"asdf"'))
|
||||
s5 = to_('"${"asdf"}"', tmpl=True)
|
||||
lines.append(ae(t_val(tok_i(s5, 0)), sx_str('"')))
|
||||
lines.append(ae(t_val(tok_i(s5, 1)), '"$"'))
|
||||
lines.append(ae(t_val(tok_i(s5, 2)), '"{"'))
|
||||
lines.append(ae(t_val(tok_i(s5, 3)), '"asdf"'))
|
||||
lines.append(ae(t_val(tok_i(s5, 4)), '"}"'))
|
||||
lines.append(ae(t_val(tok_i(s5, 5)), sx_str('"')))
|
||||
|
||||
elif name == 'handles whitespace properly':
|
||||
for src, expected in [
|
||||
(' ', 0), (' asdf', 1), (' asdf ', 2), ('asdf ', 2),
|
||||
('\n', 0), ('\nasdf', 1), ('\nasdf\n', 2), ('asdf\n', 2),
|
||||
('\r', 0), ('\rasdf', 1), ('\rasdf\r', 2), ('asdf\r', 2),
|
||||
('\t', 0), ('\tasdf', 1), ('\tasdf\t', 2), ('asdf\t', 2),
|
||||
]:
|
||||
lines.append(ae(list_len(to_(src)), str(expected)))
|
||||
|
||||
else:
|
||||
return None # not a tokenizer test we handle
|
||||
|
||||
lines.append(' )')
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
def generate_eval_only_test(test, idx):
|
||||
"""Generate SX deftest for no-HTML tests using eval-hs.
|
||||
Handles patterns:
|
||||
@@ -2015,6 +2283,9 @@ def generate_eval_only_test(test, idx):
|
||||
f' )'
|
||||
)
|
||||
|
||||
if '_hyperscript.internals.tokenizer' in body:
|
||||
return generate_tokenizer_test(test, safe_name)
|
||||
|
||||
lines.append(f' (deftest "{safe_name}"')
|
||||
|
||||
assertions = []
|
||||
|
||||
Reference in New Issue
Block a user