#!/usr/bin/env python3 """Extract _hyperscript upstream tests into spec/tests/hyperscript-upstream-tests.json. Walks /tmp/hs-upstream/test/**/*.js, finds every test('name', ...) call, extracts: - category from file path (test/core/tokenizer.js → "core/tokenizer") - name from first arg - body from arrow function body (between outer { and }) - html from preceding test.use({html: '...'}) if any - async from whether the arrow function is async - complexity heuristic — eval-only / event-driven / dom Output: spec/tests/hyperscript-upstream-tests.json (overwrites) Run after: cd /tmp && git clone --depth 1 https://github.com/bigskysoftware/_hyperscript hs-upstream """ import json import os import re from pathlib import Path UPSTREAM = Path('/tmp/hs-upstream/test') OUT = Path(__file__).parent.parent / 'spec/tests/hyperscript-upstream-tests.json' def find_matching_brace(src, open_idx): """Return index of matching close brace for { at open_idx. Handles strings/comments.""" assert src[open_idx] == '{' depth = 0 i = open_idx n = len(src) while i < n: c = src[i] if c == '{': depth += 1 elif c == '}': depth -= 1 if depth == 0: return i elif c == '"' or c == "'" or c == '`': # skip string quote = c i += 1 while i < n and src[i] != quote: if src[i] == '\\': i += 2 continue if quote == '`' and src[i] == '$' and i + 1 < n and src[i+1] == '{': # template literal interpolation — skip nested braces nested = find_matching_brace(src, i + 1) i = nested + 1 continue i += 1 elif c == '/' and i + 1 < n: nxt = src[i+1] if nxt == '/': # line comment while i < n and src[i] != '\n': i += 1 continue elif nxt == '*': # block comment i += 2 while i < n - 1 and not (src[i] == '*' and src[i+1] == '/'): i += 1 i += 1 i += 1 raise ValueError(f"unbalanced brace at {open_idx}") def extract_tests(src, category): """Find test('name', async/non-async ({...}) => { body }) patterns.""" tests = [] i = 0 n = len(src) test_re = re.compile(r"\btest\s*\(\s*(['\"])((?:[^\\]|\\.)*?)\1\s*,\s*(async\s+)?(\([^)]*\))\s*=>\s*\{") for m in test_re.finditer(src): name = m.group(2) # Unescape quotes name = name.replace("\\'", "'").replace('\\"', '"').replace('\\\\', '\\') is_async = m.group(3) is not None body_open = src.index('{', m.end() - 1) try: body_close = find_matching_brace(src, body_open) except ValueError: continue body = src[body_open + 1:body_close] # Heuristic complexity classification complexity = 'eval-only' if 'html(' in body or 'find(' in body: complexity = 'dom' if 'click(' in body or 'dispatch' in body: complexity = 'event-driven' tests.append({ 'category': category, 'name': name, 'html': '', 'body': body, 'async': is_async, 'complexity': complexity, }) return tests def main(): import sys if not UPSTREAM.exists(): print(f"ERROR: {UPSTREAM} not found. Clone first:") print(" git clone --depth 1 https://github.com/bigskysoftware/_hyperscript /tmp/hs-upstream") return 1 merge_mode = '--replace' not in sys.argv all_tests = [] skipped_files = [] for path in sorted(UPSTREAM.rglob('*.js')): if path.name in {'fixtures.js', 'entry.js', 'global-setup.js', 'global-teardown.js', 'htmx-fixtures.js', 'playwright.config.js'}: continue rel = path.relative_to(UPSTREAM) category = str(rel.with_suffix('')).replace('\\', '/') for prefix in ('commands/', 'features/'): if category.startswith(prefix): category = category[len(prefix):] break try: src = path.read_text() except Exception as e: skipped_files.append((path, str(e))) continue all_tests.extend(extract_tests(src, category)) print(f"Extracted {len(all_tests)} tests from {len(list(UPSTREAM.rglob('*.js')))} files") if skipped_files: print(f"Skipped {len(skipped_files)} files due to errors") if not OUT.exists(): OUT.write_text(json.dumps(all_tests, indent=2)) print(f"\nWrote {OUT} (no existing snapshot)") return 0 old = json.loads(OUT.read_text()) old_by_key = {(t['category'], t['name']): t for t in old} new_keys = set((t['category'], t['name']) for t in all_tests) old_keys = set(old_by_key) added_keys = new_keys - old_keys removed_keys = old_keys - new_keys print(f"\nDelta vs existing snapshot ({len(old)} tests):") print(f" +{len(added_keys)} new") print(f" -{len(removed_keys)} removed/renamed") if added_keys: print("\nNew tests:") for cat, name in sorted(added_keys): print(f" [{cat}] {name}") if removed_keys: print("\nRemoved/renamed tests (first 20):") for cat, name in sorted(removed_keys)[:20]: print(f" [{cat}] {name}") if merge_mode: # Merge mode (default): preserve existing test bodies, only add new tests. # The old snapshot's bodies were curated/cleaned — re-extracting from raw # upstream JS produces slightly different bodies that may not auto-translate. # New tests get the raw extracted body; existing tests keep theirs. new_by_key = {(t['category'], t['name']): t for t in all_tests} merged = list(old) # preserves original order for k in sorted(added_keys): merged.append(new_by_key[k]) OUT.write_text(json.dumps(merged, indent=2)) print(f"\nMerged: {len(merged)} tests ({len(old)} existing + {len(added_keys)} new) → {OUT}") print(" (rerun with --replace to discard old bodies and use raw upstream)") else: OUT.write_text(json.dumps(all_tests, indent=2)) print(f"\nReplaced: {len(all_tests)} tests → {OUT}") return 0 if __name__ == '__main__': raise SystemExit(main())