HS tests: scrape v0.9.90 upstream in full, flip silent stubs to loud SKIPs

- scrape-hs-upstream.py: new scraper walks /tmp/hs-upstream/test/**/*.js and emits body-style records for all 1,496 v0.9.90 tests (up from 831). Widens coverage into 66 previously-missing categories — templates, reactivity, behavior, worker, classRef, make, throw, htmx, tailwind, viewTransition, and more. - build-hs-manifest.py + hyperscript-upstream-manifest.{json,md}: coverage manifest tagging each upstream test with a status (runnable / skip-listed / untranslated / missing) and block reason. - generate-sx-tests.py: emit (error "SKIP (...)") instead of silent (hs-cleanup!) no-op for both skip-listed tests and generator- untranslatable bodies. Stub counter now reports both buckets. - hyperscript-feature-audit-0.9.90.md: gap audit against the 0.9.90 spec; pre-0.9.90.json backs up prior 831-test snapshot. New honest baseline (ocaml runner, test-hyperscript-behavioral): 831 -> 1,496 tests; 645 -> 1,013 passing (67.7% conformance). 483 failures split: 45 skip-list, 151 untranslated, 287 real. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 20:27:22 +00:00
parent 802ccd23e8
commit fd1dfea9b3
8 changed files with 35232 additions and 8311 deletions
--- a/tests/playwright/scrape-hs-upstream.py
+++ b/tests/playwright/scrape-hs-upstream.py
@@ -0,0 +1,297 @@
+#!/usr/bin/env python3
+"""Scrape every test from _hyperscript v0.9.90 upstream into our JSON format.
+
+Walks /tmp/hs-upstream/test/**/*.js, parses `test.describe(...)` and `test(...)`
+calls with balanced-paren scanning, extracts the arrow function body, and the
+first html(...) argument. Emits /root/rose-ash/spec/tests/hyperscript-upstream-tests.json
+in body-style Playwright format (matching existing body entries).
+"""
+import json, os, re, sys
+from collections import Counter
+from pathlib import Path
+
+HS_ROOT   = Path('/tmp/hs-upstream')
+TEST_ROOT = HS_ROOT / 'test'
+OUT_JSON  = Path('/root/rose-ash/spec/tests/hyperscript-upstream-tests.json')
+BACKUP    = Path('/root/rose-ash/spec/tests/hyperscript-upstream-tests.pre-0.9.90.json')
+
+SKIP_FILES = {'fixtures.js', 'global-setup.js', 'global-teardown.js',
+              'entry.js', 'htmx-fixtures.js', 'playwright.config.js'}
+
+# --- tokeniser-ish balanced-paren scanner -----------------------------------
+
+def parse_string_literal(src, i):
+    """src[i] must be quote; return (value, next_i). Handles template literals with ${...}."""
+    q = src[i]
+    i += 1
+    out = []
+    while i < len(src):
+        c = src[i]
+        if c == '\\':
+            nxt = src[i+1] if i+1 < len(src) else ''
+            if nxt == 'n':   out.append('\n'); i += 2
+            elif nxt == 't': out.append('\t'); i += 2
+            elif nxt == 'r': out.append('\r'); i += 2
+            elif nxt == '\\': out.append('\\'); i += 2
+            elif nxt == q:    out.append(q); i += 2
+            else:
+                out.append(nxt); i += 2
+        elif c == q:
+            return ''.join(out), i + 1
+        elif q == '`' and c == '$' and i+1 < len(src) and src[i+1] == '{':
+            # template interpolation — skip balanced braces
+            out.append('${'); i += 2
+            depth = 1
+            while i < len(src) and depth > 0:
+                cc = src[i]
+                if cc in ('"', "'", '`'):
+                    _, i = parse_string_literal(src, i)
+                    continue
+                if cc == '{': depth += 1
+                elif cc == '}': depth -= 1
+                out.append(cc); i += 1
+        else:
+            out.append(c); i += 1
+    raise ValueError("unterminated string")
+
+def skip_comment_or_regex(src, i):
+    """If src[i:] starts a // comment, /* block */, or regex literal, return next index. Else None."""
+    if src[i] != '/' or i+1 >= len(src):
+        return None
+    nxt = src[i+1]
+    if nxt == '/':
+        j = src.find('\n', i)
+        return len(src) if j == -1 else j + 1
+    if nxt == '*':
+        j = src.find('*/', i)
+        return len(src) if j == -1 else j + 2
+    # regex heuristic: preceding non-space char is operator-ish
+    k = i - 1
+    while k >= 0 and src[k].isspace(): k -= 1
+    prev = src[k] if k >= 0 else ''
+    if prev and prev not in '(,;=!?&|:+-*/<>%^~{[\n' and prev not in '' :
+        # not regex context — looks like division
+        return None
+    j = i + 1
+    while j < len(src):
+        cc = src[j]
+        if cc == '\\':
+            j += 2; continue
+        if cc == '[':
+            j += 1
+            while j < len(src) and src[j] != ']':
+                if src[j] == '\\': j += 2
+                else: j += 1
+            if j < len(src): j += 1
+            continue
+        if cc == '/':
+            j += 1
+            while j < len(src) and src[j].isalpha(): j += 1
+            return j
+        if cc == '\n':
+            return None
+        j += 1
+    return None
+
+def find_matching(src, start, open_c='(', close_c=')'):
+    """start is index of open_c; return index of matching close_c."""
+    depth = 0
+    i = start
+    while i < len(src):
+        c = src[i]
+        if c in ('"', "'", '`'):
+            try:
+                _, i = parse_string_literal(src, i)
+            except ValueError:
+                return -1
+            continue
+        j = skip_comment_or_regex(src, i)
+        if j is not None:
+            i = j
+            continue
+        if c == open_c:
+            depth += 1; i += 1
+        elif c == close_c:
+            depth -= 1
+            if depth == 0:
+                return i
+            i += 1
+        else:
+            i += 1
+    return -1
+
+# --- test extraction --------------------------------------------------------
+
+def extract_arrow_body(call_src):
+    """Given the full `(...args...)` source of test(name, fn), extract the fn body.
+    Returns the content between { and } of the arrow function body, or None."""
+    # Find the arrow
+    arrow = call_src.find('=>')
+    if arrow == -1:
+        return None
+    # Find the first { after =>
+    j = arrow + 2
+    while j < len(call_src) and call_src[j].isspace(): j += 1
+    if j >= len(call_src) or call_src[j] != '{':
+        return None
+    end = find_matching(call_src, j, '{', '}')
+    if end == -1:
+        return None
+    body = call_src[j+1:end]
+    # Strip leading newline + common indentation (for readability)
+    return body
+
+def extract_first_html(body):
+    """Find the first html(...) call in body and extract its literal string argument.
+    Supports html("x" + "y"), html(`x`), html("x"). Returns '' if not findable."""
+    m = re.search(r'\bhtml\s*\(', body)
+    if not m:
+        return ''
+    lp = m.end() - 1
+    rp = find_matching(body, lp, '(', ')')
+    if rp == -1:
+        return ''
+    args = body[lp+1:rp].strip()
+    # Args should be a string or concatenation of strings.
+    parts = []
+    i = 0
+    while i < len(args):
+        c = args[i]
+        if c.isspace() or c == '+':
+            i += 1; continue
+        if c in ('"', "'", '`'):
+            try:
+                val, i = parse_string_literal(args, i)
+                parts.append(val)
+            except ValueError:
+                return ''
+        else:
+            # not a pure string concatenation — bail
+            return ''
+    return ''.join(parts)
+
+def extract_tests_from_file(path, rel_category):
+    src = path.read_text()
+    # Find every test( call (not test.describe, not test.skip.)
+    tests = []
+    i = 0
+    while i < len(src):
+        m = re.search(r'(?<![a-zA-Z0-9_$.])test\s*\(', src[i:])
+        if not m:
+            break
+        abs_start = i + m.start()
+        abs_paren = i + m.end() - 1
+        # Ensure this is not test.describe / test.only / test.skip
+        # The lookbehind prevents .describe case. But test( is fine.
+        # parse name arg
+        j = abs_paren + 1
+        while j < len(src) and src[j].isspace(): j += 1
+        if j >= len(src) or src[j] not in ('"', "'", '`'):
+            i = abs_paren + 1
+            continue
+        try:
+            tname, j2 = parse_string_literal(src, j)
+        except ValueError:
+            i = abs_paren + 1
+            continue
+        endp = find_matching(src, abs_paren, '(', ')')
+        if endp == -1:
+            i = abs_paren + 1
+            continue
+        call_src = src[abs_paren:endp+1]
+        body = extract_arrow_body(call_src)
+        if body is None:
+            i = endp + 1
+            continue
+        html = extract_first_html(body)
+        tests.append({
+            'category': rel_category,
+            'name': tname,
+            'html': html,
+            'body': body,
+            'async': True,
+            'complexity': classify_complexity(body),
+        })
+        i = endp + 1
+    return tests
+
+def classify_complexity(body):
+    if 'sinon.' in body:
+        return 'sinon'
+    if '<script type="text/hyperscript"' in body or "<script type='text/hyperscript'" in body:
+        return 'script-tag'
+    if '<script type="text/hypertemplate"' in body or "<script type='text/hypertemplate'" in body:
+        return 'script-tag'
+    if 'showModal' in body or '<dialog' in body.lower():
+        return 'dialog'
+    if 'new Promise' in body or '.resolves' in body or 'Promise.' in body:
+        return 'promise'
+    if 'html(' not in body:
+        if '_hyperscript.evaluate' in body or re.search(r'\bevaluate\s*\(', body):
+            return 'eval-only'
+        if re.search(r'\brun\s*\(', body):
+            return 'run-eval'
+    return 'simple'
+
+# --- main -------------------------------------------------------------------
+
+def rel_category(path):
+    """For test/commands/foo.js, test/features/foo.js → 'foo'.
+    For test/core/foo.js → 'core/foo'. test/templates/foo.js → 'templates/foo' etc."""
+    rel = path.relative_to(TEST_ROOT)
+    parts = rel.parts
+    stem = path.stem
+    if len(parts) == 1:
+        # Top-level — shouldn't happen since all tests are in subdirs
+        return stem
+    top = parts[0]
+    if top in ('commands', 'features'):
+        return stem
+    # Single subdir like core/api.js → 'core/api'
+    if len(parts) == 2:
+        return f'{top}/{stem}'
+    # Deeper nesting — join all parts except final extension
+    return '/'.join(parts[:-1] + (stem,))
+
+def main():
+    # Back up existing JSON
+    if OUT_JSON.exists() and not BACKUP.exists():
+        import shutil
+        shutil.copy2(OUT_JSON, BACKUP)
+        print(f'Backed up existing JSON to {BACKUP}', file=sys.stderr)
+
+    all_tests = []
+    file_count = 0
+    for path in sorted(TEST_ROOT.rglob('*.js')):
+        if path.name in SKIP_FILES:
+            continue
+        if any(p in ('vendor', 'node_modules', 'manual') for p in path.parts):
+            continue
+        cat = rel_category(path)
+        tests = extract_tests_from_file(path, cat)
+        all_tests.extend(tests)
+        file_count += 1
+
+    # Dedup by (category, name) — stable
+    seen = {}
+    for t in all_tests:
+        key = (t['category'], t['name'])
+        if key not in seen:
+            seen[key] = t
+    deduped = list(seen.values())
+    deduped.sort(key=lambda t: (t['category'], t['name']))
+
+    # Stats
+    cat_counts = Counter(t['category'] for t in deduped)
+    print(f'Scanned {file_count} files, extracted {len(all_tests)} tests ({len(deduped)} unique)')
+    print(f'Categories: {len(cat_counts)}')
+    for cat, n in cat_counts.most_common():
+        print(f'  {cat:40s} {n:4d}')
+
+    with OUT_JSON.open('w') as f:
+        json.dump(deduped, f, indent=2, ensure_ascii=False)
+        f.write('\n')
+    print(f'\nWrote {OUT_JSON} ({len(deduped)} tests)')
+
+if __name__ == '__main__':
+    main()