Test generator: nested HTML elements, three-phase element setup

- parse_html now captures ALL elements (not just top-level) with
  parent-child relationships
- emit_element_setup uses three phases: attributes, DOM tree, activation
- ref() maps positional names (d1, d2) to top-level elements only
- dom-scope: 9→14 (+5), reset: 3→6 (+3), take: 2→3, parser: 2→3

Net 0 due to regressions in dialog/halt/closest (needs investigation).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-13 01:20:53 +00:00
parent e5293e4e03
commit da0da1472d
2 changed files with 1500 additions and 668 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -26,7 +26,7 @@ with open(INPUT) as f:
# ── HTML parsing ──────────────────────────────────────────────────
def parse_html(html):
"""Parse HTML into list of element dicts.
"""Parse HTML into list of element dicts with parent-child relationships.
Uses Python's html.parser for reliability with same-tag siblings."""
from html.parser import HTMLParser
@@ -40,7 +40,8 @@ def parse_html(html):
def handle_starttag(self, tag, attrs):
el = {
'tag': tag, 'id': None, 'classes': [], 'hs': None,
'attrs': {}, 'inner': '', 'depth': len(stack)
'attrs': {}, 'inner': '', 'depth': len(stack),
'children': [], 'parent_idx': None
}
for name, val in attrs:
if name == 'id': el['id'] = val
@@ -48,18 +49,27 @@ def parse_html(html):
elif name == '_': el['hs'] = val
elif name == 'style': el['attrs']['style'] = val or ''
elif val is not None: el['attrs'][name] = val
# Track parent-child relationship
if stack:
parent = stack[-1]
# Find parent's index in elements list
parent_idx = None
for i, e in enumerate(elements):
if e is parent:
parent_idx = i
break
el['parent_idx'] = parent_idx
parent['children'].append(len(elements))
stack.append(el)
# Only collect top-level elements
if el['depth'] == 0:
elements.append(el)
elements.append(el)
def handle_endtag(self, tag):
if stack and stack[-1]['tag'] == tag:
stack.pop()
def handle_data(self, data):
# Only capture text for top-level elements with no children
if stack and stack[-1]['depth'] == 0 and len(stack) == 1:
# Only capture text for elements with no children
if stack and len(stack[-1]['children']) == 0:
stack[-1]['inner'] += data.strip()
Parser().feed(html)
@@ -234,22 +244,27 @@ def make_ref_fn(elements, var_names):
- div1, div2, div3 — divs by position among same tag (1-indexed)
- bar, btn, A, B — elements by ID
"""
# Map tag → first UNNAMED element of that tag (no id)
# Map tag → first UNNAMED top-level element of that tag (no id)
tag_to_unnamed = {}
# Map tag → list of vars for elements of that tag (ordered)
# Map tag → list of vars for top-level elements of that tag (ordered)
tag_to_all = {}
id_to_var = {}
# Top-level element vars for positional refs (d1, d2, ...)
top_level_vars = []
first_var = var_names[0] if var_names else '_el-div'
for i, el in enumerate(elements):
tag = el['tag']
if tag not in tag_to_unnamed and not el['id']:
tag_to_unnamed[tag] = var_names[i]
if tag not in tag_to_all:
tag_to_all[tag] = []
tag_to_all[tag].append(var_names[i])
if el['id']:
id_to_var[el['id']] = var_names[i]
# Only use top-level elements for tag/positional mapping
if el.get('depth', 0) == 0:
top_level_vars.append(var_names[i])
if tag not in tag_to_unnamed and not el['id']:
tag_to_unnamed[tag] = var_names[i]
if tag not in tag_to_all:
tag_to_all[tag] = []
tag_to_all[tag].append(var_names[i])
tags = {'div', 'form', 'button', 'input', 'span', 'p', 'a', 'section',
'ul', 'li', 'select', 'textarea', 'details', 'dialog', 'template',
@@ -277,12 +292,12 @@ def make_ref_fn(elements, var_names):
if 0 <= idx < len(tag_to_all[tag_part]):
return tag_to_all[tag_part][idx]
# Positional: d1→1st element, d2→2nd, d3→3rd, etc.
# Positional: d1→1st top-level element, d2→2nd, d3→3rd, etc.
m = re.match(r'^d(\d+)$', name)
if m:
idx = int(m.group(1)) - 1 # 1-indexed
if 0 <= idx < len(var_names):
return var_names[idx]
if 0 <= idx < len(top_level_vars):
return top_level_vars[idx]
# Short aliases: btn → look up as ID
if name == 'btn':
@@ -483,44 +498,47 @@ def parse_dev_body(body, elements, var_names):
# ── Test generation ───────────────────────────────────────────────
def process_hs_val(hs_val):
"""Process a raw HS attribute value: collapse whitespace, insert 'then' separators."""
hs_val = hs_val.replace('\\', '')
cmd_kws = r'(?:set|put|get|add|remove|toggle|hide|show|if|repeat|for|wait|send|trigger|log|call|take|throw|return|append|tell|go|halt|settle|increment|decrement|fetch|make|install|measure|empty|reset|swap|default|morph|render|scroll|focus|select|pick|beep!)'
hs_val = re.sub(r'\s{2,}(?=' + cmd_kws + r'\b)', ' then ', hs_val)
hs_val = re.sub(r'\s*[\n\r]\s*', ' then ', hs_val)
hs_val = re.sub(r'\s+', ' ', hs_val)
hs_val = re.sub(r'(then\s*)+then', 'then', hs_val)
hs_val = re.sub(r'\bon (\w[\w.:+-]*) then\b', r'on \1 ', hs_val)
hs_val = re.sub(r'(\bin \[.*?\]) then\b', r'\1 ', hs_val)
hs_val = re.sub(r'\btimes then\b', 'times ', hs_val)
return hs_val.strip()
def emit_element_setup(lines, elements, var_names):
"""Emit SX for creating elements, setting attributes, appending to body, and activating."""
"""Emit SX for creating elements, setting attributes, appending to DOM, and activating.
Three phases to ensure correct ordering:
1. Set attributes/content on all elements
2. Append elements to their parents (children first, then parents to body)
3. Activate HS handlers (all elements in DOM)
"""
hs_elements = [] # indices of elements with valid HS
# Phase 1: Set attributes, classes, HS, inner text
for i, el in enumerate(elements):
var = var_names[i]
if el['id']:
lines.append(f' (dom-set-attr {var} "id" "{el["id"]}")')
for cls in el['classes']:
lines.append(f' (dom-add-class {var} "{cls}")')
if el['hs']:
hs_val = el['hs']
hs_val = hs_val.replace('\\', '')
# Newlines/tabs in _hyperscript act as implicit 'then' separators.
# Only insert 'then' before command keywords, not within expressions.
cmd_kws = r'(?:set|put|get|add|remove|toggle|hide|show|if|repeat|for|wait|send|trigger|log|call|take|throw|return|append|tell|go|halt|settle|increment|decrement|fetch|make|install|measure|empty|reset|swap|default|morph|render|scroll|focus|select|pick|beep!)'
# Replace multi-whitespace before a command keyword with ' then '
hs_val = re.sub(r'\s{2,}(?=' + cmd_kws + r'\b)', ' then ', hs_val)
# Also handle actual newlines
hs_val = re.sub(r'\s*[\n\r]\s*', ' then ', hs_val)
# Clean up: collapse spaces, dedupe then
hs_val = re.sub(r'\s+', ' ', hs_val)
hs_val = re.sub(r'(then\s*)+then', 'then', hs_val)
# Don't insert 'then' between event name and first command in 'on' handlers
hs_val = re.sub(r'\bon (\w[\w.:+-]*) then\b', r'on \1 ', hs_val)
# Don't insert 'then' inside for-in loop bodies (between collection and body)
hs_val = re.sub(r'(\bin \[.*?\]) then\b', r'\1 ', hs_val)
# Don't insert 'then' after 'times' in repeat N times loops
hs_val = re.sub(r'\btimes then\b', 'times ', hs_val)
hs_val = hs_val.strip()
hs_val = process_hs_val(el['hs'])
if not hs_val:
lines.append(f' (dom-append (dom-body) {var})')
continue
if hs_val.startswith('"') or (hs_val.endswith('"') and '<' in hs_val):
pass # no HS to set
elif hs_val.startswith('"') or (hs_val.endswith('"') and '<' in hs_val):
lines.append(f' ;; HS source has bare quotes or embedded HTML')
lines.append(f' (dom-append (dom-body) {var})')
continue
hs_escaped = hs_val.replace('\\', '\\\\').replace('"', '\\"')
lines.append(f' (dom-set-attr {var} "_" "{hs_escaped}")')
else:
hs_escaped = hs_val.replace('\\', '\\\\').replace('"', '\\"')
lines.append(f' (dom-set-attr {var} "_" "{hs_escaped}")')
hs_elements.append(i)
for aname, aval in el['attrs'].items():
if '\\' in aval or '\n' in aval or aname.startswith('['):
lines.append(f' ;; SKIP attr {aname} (contains special chars)')
@@ -530,9 +548,19 @@ def emit_element_setup(lines, elements, var_names):
if el['inner']:
inner_escaped = el['inner'].replace('\\', '\\\\').replace('"', '\\"')
lines.append(f' (dom-set-inner-html {var} "{inner_escaped}")')
lines.append(f' (dom-append (dom-body) {var})')
if el['hs']:
lines.append(f' (hs-activate! {var})')
# Phase 2: Append elements (children to parents, roots to body)
for i, el in enumerate(elements):
var = var_names[i]
if el['parent_idx'] is not None:
parent_var = var_names[el['parent_idx']]
lines.append(f' (dom-append {parent_var} {var})')
else:
lines.append(f' (dom-append (dom-body) {var})')
# Phase 3: Activate HS handlers (all elements now in DOM)
for i in hs_elements:
lines.append(f' (hs-activate! {var_names[i]})')
def generate_test_chai(test, elements, var_names, idx):