HS tests: scrape v0.9.90 upstream in full, flip silent stubs to loud SKIPs
- scrape-hs-upstream.py: new scraper walks /tmp/hs-upstream/test/**/*.js
and emits body-style records for all 1,496 v0.9.90 tests (up from 831).
Widens coverage into 66 previously-missing categories — templates,
reactivity, behavior, worker, classRef, make, throw, htmx, tailwind,
viewTransition, and more.
- build-hs-manifest.py + hyperscript-upstream-manifest.{json,md}:
coverage manifest tagging each upstream test with a status
(runnable / skip-listed / untranslated / missing) and block reason.
- generate-sx-tests.py: emit (error "SKIP (...)") instead of silent
(hs-cleanup!) no-op for both skip-listed tests and generator-
untranslatable bodies. Stub counter now reports both buckets.
- hyperscript-feature-audit-0.9.90.md: gap audit against the 0.9.90
spec; pre-0.9.90.json backs up prior 831-test snapshot.
New honest baseline (ocaml runner, test-hyperscript-behavioral):
831 -> 1,496 tests; 645 -> 1,013 passing (67.7% conformance).
483 failures split: 45 skip-list, 151 untranslated, 287 real.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
297
tests/playwright/scrape-hs-upstream.py
Normal file
297
tests/playwright/scrape-hs-upstream.py
Normal file
@@ -0,0 +1,297 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Scrape every test from _hyperscript v0.9.90 upstream into our JSON format.
|
||||
|
||||
Walks /tmp/hs-upstream/test/**/*.js, parses `test.describe(...)` and `test(...)`
|
||||
calls with balanced-paren scanning, extracts the arrow function body, and the
|
||||
first html(...) argument. Emits /root/rose-ash/spec/tests/hyperscript-upstream-tests.json
|
||||
in body-style Playwright format (matching existing body entries).
|
||||
"""
|
||||
import json, os, re, sys
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
HS_ROOT = Path('/tmp/hs-upstream')
|
||||
TEST_ROOT = HS_ROOT / 'test'
|
||||
OUT_JSON = Path('/root/rose-ash/spec/tests/hyperscript-upstream-tests.json')
|
||||
BACKUP = Path('/root/rose-ash/spec/tests/hyperscript-upstream-tests.pre-0.9.90.json')
|
||||
|
||||
SKIP_FILES = {'fixtures.js', 'global-setup.js', 'global-teardown.js',
|
||||
'entry.js', 'htmx-fixtures.js', 'playwright.config.js'}
|
||||
|
||||
# --- tokeniser-ish balanced-paren scanner -----------------------------------
|
||||
|
||||
def parse_string_literal(src, i):
|
||||
"""src[i] must be quote; return (value, next_i). Handles template literals with ${...}."""
|
||||
q = src[i]
|
||||
i += 1
|
||||
out = []
|
||||
while i < len(src):
|
||||
c = src[i]
|
||||
if c == '\\':
|
||||
nxt = src[i+1] if i+1 < len(src) else ''
|
||||
if nxt == 'n': out.append('\n'); i += 2
|
||||
elif nxt == 't': out.append('\t'); i += 2
|
||||
elif nxt == 'r': out.append('\r'); i += 2
|
||||
elif nxt == '\\': out.append('\\'); i += 2
|
||||
elif nxt == q: out.append(q); i += 2
|
||||
else:
|
||||
out.append(nxt); i += 2
|
||||
elif c == q:
|
||||
return ''.join(out), i + 1
|
||||
elif q == '`' and c == '$' and i+1 < len(src) and src[i+1] == '{':
|
||||
# template interpolation — skip balanced braces
|
||||
out.append('${'); i += 2
|
||||
depth = 1
|
||||
while i < len(src) and depth > 0:
|
||||
cc = src[i]
|
||||
if cc in ('"', "'", '`'):
|
||||
_, i = parse_string_literal(src, i)
|
||||
continue
|
||||
if cc == '{': depth += 1
|
||||
elif cc == '}': depth -= 1
|
||||
out.append(cc); i += 1
|
||||
else:
|
||||
out.append(c); i += 1
|
||||
raise ValueError("unterminated string")
|
||||
|
||||
def skip_comment_or_regex(src, i):
|
||||
"""If src[i:] starts a // comment, /* block */, or regex literal, return next index. Else None."""
|
||||
if src[i] != '/' or i+1 >= len(src):
|
||||
return None
|
||||
nxt = src[i+1]
|
||||
if nxt == '/':
|
||||
j = src.find('\n', i)
|
||||
return len(src) if j == -1 else j + 1
|
||||
if nxt == '*':
|
||||
j = src.find('*/', i)
|
||||
return len(src) if j == -1 else j + 2
|
||||
# regex heuristic: preceding non-space char is operator-ish
|
||||
k = i - 1
|
||||
while k >= 0 and src[k].isspace(): k -= 1
|
||||
prev = src[k] if k >= 0 else ''
|
||||
if prev and prev not in '(,;=!?&|:+-*/<>%^~{[\n' and prev not in '' :
|
||||
# not regex context — looks like division
|
||||
return None
|
||||
j = i + 1
|
||||
while j < len(src):
|
||||
cc = src[j]
|
||||
if cc == '\\':
|
||||
j += 2; continue
|
||||
if cc == '[':
|
||||
j += 1
|
||||
while j < len(src) and src[j] != ']':
|
||||
if src[j] == '\\': j += 2
|
||||
else: j += 1
|
||||
if j < len(src): j += 1
|
||||
continue
|
||||
if cc == '/':
|
||||
j += 1
|
||||
while j < len(src) and src[j].isalpha(): j += 1
|
||||
return j
|
||||
if cc == '\n':
|
||||
return None
|
||||
j += 1
|
||||
return None
|
||||
|
||||
def find_matching(src, start, open_c='(', close_c=')'):
|
||||
"""start is index of open_c; return index of matching close_c."""
|
||||
depth = 0
|
||||
i = start
|
||||
while i < len(src):
|
||||
c = src[i]
|
||||
if c in ('"', "'", '`'):
|
||||
try:
|
||||
_, i = parse_string_literal(src, i)
|
||||
except ValueError:
|
||||
return -1
|
||||
continue
|
||||
j = skip_comment_or_regex(src, i)
|
||||
if j is not None:
|
||||
i = j
|
||||
continue
|
||||
if c == open_c:
|
||||
depth += 1; i += 1
|
||||
elif c == close_c:
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
return i
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
return -1
|
||||
|
||||
# --- test extraction --------------------------------------------------------
|
||||
|
||||
def extract_arrow_body(call_src):
|
||||
"""Given the full `(...args...)` source of test(name, fn), extract the fn body.
|
||||
Returns the content between { and } of the arrow function body, or None."""
|
||||
# Find the arrow
|
||||
arrow = call_src.find('=>')
|
||||
if arrow == -1:
|
||||
return None
|
||||
# Find the first { after =>
|
||||
j = arrow + 2
|
||||
while j < len(call_src) and call_src[j].isspace(): j += 1
|
||||
if j >= len(call_src) or call_src[j] != '{':
|
||||
return None
|
||||
end = find_matching(call_src, j, '{', '}')
|
||||
if end == -1:
|
||||
return None
|
||||
body = call_src[j+1:end]
|
||||
# Strip leading newline + common indentation (for readability)
|
||||
return body
|
||||
|
||||
def extract_first_html(body):
|
||||
"""Find the first html(...) call in body and extract its literal string argument.
|
||||
Supports html("x" + "y"), html(`x`), html("x"). Returns '' if not findable."""
|
||||
m = re.search(r'\bhtml\s*\(', body)
|
||||
if not m:
|
||||
return ''
|
||||
lp = m.end() - 1
|
||||
rp = find_matching(body, lp, '(', ')')
|
||||
if rp == -1:
|
||||
return ''
|
||||
args = body[lp+1:rp].strip()
|
||||
# Args should be a string or concatenation of strings.
|
||||
parts = []
|
||||
i = 0
|
||||
while i < len(args):
|
||||
c = args[i]
|
||||
if c.isspace() or c == '+':
|
||||
i += 1; continue
|
||||
if c in ('"', "'", '`'):
|
||||
try:
|
||||
val, i = parse_string_literal(args, i)
|
||||
parts.append(val)
|
||||
except ValueError:
|
||||
return ''
|
||||
else:
|
||||
# not a pure string concatenation — bail
|
||||
return ''
|
||||
return ''.join(parts)
|
||||
|
||||
def extract_tests_from_file(path, rel_category):
|
||||
src = path.read_text()
|
||||
# Find every test( call (not test.describe, not test.skip.)
|
||||
tests = []
|
||||
i = 0
|
||||
while i < len(src):
|
||||
m = re.search(r'(?<![a-zA-Z0-9_$.])test\s*\(', src[i:])
|
||||
if not m:
|
||||
break
|
||||
abs_start = i + m.start()
|
||||
abs_paren = i + m.end() - 1
|
||||
# Ensure this is not test.describe / test.only / test.skip
|
||||
# The lookbehind prevents .describe case. But test( is fine.
|
||||
# parse name arg
|
||||
j = abs_paren + 1
|
||||
while j < len(src) and src[j].isspace(): j += 1
|
||||
if j >= len(src) or src[j] not in ('"', "'", '`'):
|
||||
i = abs_paren + 1
|
||||
continue
|
||||
try:
|
||||
tname, j2 = parse_string_literal(src, j)
|
||||
except ValueError:
|
||||
i = abs_paren + 1
|
||||
continue
|
||||
endp = find_matching(src, abs_paren, '(', ')')
|
||||
if endp == -1:
|
||||
i = abs_paren + 1
|
||||
continue
|
||||
call_src = src[abs_paren:endp+1]
|
||||
body = extract_arrow_body(call_src)
|
||||
if body is None:
|
||||
i = endp + 1
|
||||
continue
|
||||
html = extract_first_html(body)
|
||||
tests.append({
|
||||
'category': rel_category,
|
||||
'name': tname,
|
||||
'html': html,
|
||||
'body': body,
|
||||
'async': True,
|
||||
'complexity': classify_complexity(body),
|
||||
})
|
||||
i = endp + 1
|
||||
return tests
|
||||
|
||||
def classify_complexity(body):
|
||||
if 'sinon.' in body:
|
||||
return 'sinon'
|
||||
if '<script type="text/hyperscript"' in body or "<script type='text/hyperscript'" in body:
|
||||
return 'script-tag'
|
||||
if '<script type="text/hypertemplate"' in body or "<script type='text/hypertemplate'" in body:
|
||||
return 'script-tag'
|
||||
if 'showModal' in body or '<dialog' in body.lower():
|
||||
return 'dialog'
|
||||
if 'new Promise' in body or '.resolves' in body or 'Promise.' in body:
|
||||
return 'promise'
|
||||
if 'html(' not in body:
|
||||
if '_hyperscript.evaluate' in body or re.search(r'\bevaluate\s*\(', body):
|
||||
return 'eval-only'
|
||||
if re.search(r'\brun\s*\(', body):
|
||||
return 'run-eval'
|
||||
return 'simple'
|
||||
|
||||
# --- main -------------------------------------------------------------------
|
||||
|
||||
def rel_category(path):
|
||||
"""For test/commands/foo.js, test/features/foo.js → 'foo'.
|
||||
For test/core/foo.js → 'core/foo'. test/templates/foo.js → 'templates/foo' etc."""
|
||||
rel = path.relative_to(TEST_ROOT)
|
||||
parts = rel.parts
|
||||
stem = path.stem
|
||||
if len(parts) == 1:
|
||||
# Top-level — shouldn't happen since all tests are in subdirs
|
||||
return stem
|
||||
top = parts[0]
|
||||
if top in ('commands', 'features'):
|
||||
return stem
|
||||
# Single subdir like core/api.js → 'core/api'
|
||||
if len(parts) == 2:
|
||||
return f'{top}/{stem}'
|
||||
# Deeper nesting — join all parts except final extension
|
||||
return '/'.join(parts[:-1] + (stem,))
|
||||
|
||||
def main():
|
||||
# Back up existing JSON
|
||||
if OUT_JSON.exists() and not BACKUP.exists():
|
||||
import shutil
|
||||
shutil.copy2(OUT_JSON, BACKUP)
|
||||
print(f'Backed up existing JSON to {BACKUP}', file=sys.stderr)
|
||||
|
||||
all_tests = []
|
||||
file_count = 0
|
||||
for path in sorted(TEST_ROOT.rglob('*.js')):
|
||||
if path.name in SKIP_FILES:
|
||||
continue
|
||||
if any(p in ('vendor', 'node_modules', 'manual') for p in path.parts):
|
||||
continue
|
||||
cat = rel_category(path)
|
||||
tests = extract_tests_from_file(path, cat)
|
||||
all_tests.extend(tests)
|
||||
file_count += 1
|
||||
|
||||
# Dedup by (category, name) — stable
|
||||
seen = {}
|
||||
for t in all_tests:
|
||||
key = (t['category'], t['name'])
|
||||
if key not in seen:
|
||||
seen[key] = t
|
||||
deduped = list(seen.values())
|
||||
deduped.sort(key=lambda t: (t['category'], t['name']))
|
||||
|
||||
# Stats
|
||||
cat_counts = Counter(t['category'] for t in deduped)
|
||||
print(f'Scanned {file_count} files, extracted {len(all_tests)} tests ({len(deduped)} unique)')
|
||||
print(f'Categories: {len(cat_counts)}')
|
||||
for cat, n in cat_counts.most_common():
|
||||
print(f' {cat:40s} {n:4d}')
|
||||
|
||||
with OUT_JSON.open('w') as f:
|
||||
json.dump(deduped, f, indent=2, ensure_ascii=False)
|
||||
f.write('\n')
|
||||
print(f'\nWrote {OUT_JSON} ({len(deduped)} tests)')
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user