- scrape-hs-upstream.py: new scraper walks /tmp/hs-upstream/test/**/*.js
and emits body-style records for all 1,496 v0.9.90 tests (up from 831).
Widens coverage into 66 previously-missing categories — templates,
reactivity, behavior, worker, classRef, make, throw, htmx, tailwind,
viewTransition, and more.
- build-hs-manifest.py + hyperscript-upstream-manifest.{json,md}:
coverage manifest tagging each upstream test with a status
(runnable / skip-listed / untranslated / missing) and block reason.
- generate-sx-tests.py: emit (error "SKIP (...)") instead of silent
(hs-cleanup!) no-op for both skip-listed tests and generator-
untranslatable bodies. Stub counter now reports both buckets.
- hyperscript-feature-audit-0.9.90.md: gap audit against the 0.9.90
spec; pre-0.9.90.json backs up prior 831-test snapshot.
New honest baseline (ocaml runner, test-hyperscript-behavioral):
831 -> 1,496 tests; 645 -> 1,013 passing (67.7% conformance).
483 failures split: 45 skip-list, 151 untranslated, 287 real.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
298 lines
9.9 KiB
Python
298 lines
9.9 KiB
Python
#!/usr/bin/env python3
|
|
"""Scrape every test from _hyperscript v0.9.90 upstream into our JSON format.
|
|
|
|
Walks /tmp/hs-upstream/test/**/*.js, parses `test.describe(...)` and `test(...)`
|
|
calls with balanced-paren scanning, extracts the arrow function body, and the
|
|
first html(...) argument. Emits /root/rose-ash/spec/tests/hyperscript-upstream-tests.json
|
|
in body-style Playwright format (matching existing body entries).
|
|
"""
|
|
import json, os, re, sys
|
|
from collections import Counter
|
|
from pathlib import Path
|
|
|
|
HS_ROOT = Path('/tmp/hs-upstream')
|
|
TEST_ROOT = HS_ROOT / 'test'
|
|
OUT_JSON = Path('/root/rose-ash/spec/tests/hyperscript-upstream-tests.json')
|
|
BACKUP = Path('/root/rose-ash/spec/tests/hyperscript-upstream-tests.pre-0.9.90.json')
|
|
|
|
SKIP_FILES = {'fixtures.js', 'global-setup.js', 'global-teardown.js',
|
|
'entry.js', 'htmx-fixtures.js', 'playwright.config.js'}
|
|
|
|
# --- tokeniser-ish balanced-paren scanner -----------------------------------
|
|
|
|
def parse_string_literal(src, i):
|
|
"""src[i] must be quote; return (value, next_i). Handles template literals with ${...}."""
|
|
q = src[i]
|
|
i += 1
|
|
out = []
|
|
while i < len(src):
|
|
c = src[i]
|
|
if c == '\\':
|
|
nxt = src[i+1] if i+1 < len(src) else ''
|
|
if nxt == 'n': out.append('\n'); i += 2
|
|
elif nxt == 't': out.append('\t'); i += 2
|
|
elif nxt == 'r': out.append('\r'); i += 2
|
|
elif nxt == '\\': out.append('\\'); i += 2
|
|
elif nxt == q: out.append(q); i += 2
|
|
else:
|
|
out.append(nxt); i += 2
|
|
elif c == q:
|
|
return ''.join(out), i + 1
|
|
elif q == '`' and c == '$' and i+1 < len(src) and src[i+1] == '{':
|
|
# template interpolation — skip balanced braces
|
|
out.append('${'); i += 2
|
|
depth = 1
|
|
while i < len(src) and depth > 0:
|
|
cc = src[i]
|
|
if cc in ('"', "'", '`'):
|
|
_, i = parse_string_literal(src, i)
|
|
continue
|
|
if cc == '{': depth += 1
|
|
elif cc == '}': depth -= 1
|
|
out.append(cc); i += 1
|
|
else:
|
|
out.append(c); i += 1
|
|
raise ValueError("unterminated string")
|
|
|
|
def skip_comment_or_regex(src, i):
|
|
"""If src[i:] starts a // comment, /* block */, or regex literal, return next index. Else None."""
|
|
if src[i] != '/' or i+1 >= len(src):
|
|
return None
|
|
nxt = src[i+1]
|
|
if nxt == '/':
|
|
j = src.find('\n', i)
|
|
return len(src) if j == -1 else j + 1
|
|
if nxt == '*':
|
|
j = src.find('*/', i)
|
|
return len(src) if j == -1 else j + 2
|
|
# regex heuristic: preceding non-space char is operator-ish
|
|
k = i - 1
|
|
while k >= 0 and src[k].isspace(): k -= 1
|
|
prev = src[k] if k >= 0 else ''
|
|
if prev and prev not in '(,;=!?&|:+-*/<>%^~{[\n' and prev not in '' :
|
|
# not regex context — looks like division
|
|
return None
|
|
j = i + 1
|
|
while j < len(src):
|
|
cc = src[j]
|
|
if cc == '\\':
|
|
j += 2; continue
|
|
if cc == '[':
|
|
j += 1
|
|
while j < len(src) and src[j] != ']':
|
|
if src[j] == '\\': j += 2
|
|
else: j += 1
|
|
if j < len(src): j += 1
|
|
continue
|
|
if cc == '/':
|
|
j += 1
|
|
while j < len(src) and src[j].isalpha(): j += 1
|
|
return j
|
|
if cc == '\n':
|
|
return None
|
|
j += 1
|
|
return None
|
|
|
|
def find_matching(src, start, open_c='(', close_c=')'):
|
|
"""start is index of open_c; return index of matching close_c."""
|
|
depth = 0
|
|
i = start
|
|
while i < len(src):
|
|
c = src[i]
|
|
if c in ('"', "'", '`'):
|
|
try:
|
|
_, i = parse_string_literal(src, i)
|
|
except ValueError:
|
|
return -1
|
|
continue
|
|
j = skip_comment_or_regex(src, i)
|
|
if j is not None:
|
|
i = j
|
|
continue
|
|
if c == open_c:
|
|
depth += 1; i += 1
|
|
elif c == close_c:
|
|
depth -= 1
|
|
if depth == 0:
|
|
return i
|
|
i += 1
|
|
else:
|
|
i += 1
|
|
return -1
|
|
|
|
# --- test extraction --------------------------------------------------------
|
|
|
|
def extract_arrow_body(call_src):
|
|
"""Given the full `(...args...)` source of test(name, fn), extract the fn body.
|
|
Returns the content between { and } of the arrow function body, or None."""
|
|
# Find the arrow
|
|
arrow = call_src.find('=>')
|
|
if arrow == -1:
|
|
return None
|
|
# Find the first { after =>
|
|
j = arrow + 2
|
|
while j < len(call_src) and call_src[j].isspace(): j += 1
|
|
if j >= len(call_src) or call_src[j] != '{':
|
|
return None
|
|
end = find_matching(call_src, j, '{', '}')
|
|
if end == -1:
|
|
return None
|
|
body = call_src[j+1:end]
|
|
# Strip leading newline + common indentation (for readability)
|
|
return body
|
|
|
|
def extract_first_html(body):
|
|
"""Find the first html(...) call in body and extract its literal string argument.
|
|
Supports html("x" + "y"), html(`x`), html("x"). Returns '' if not findable."""
|
|
m = re.search(r'\bhtml\s*\(', body)
|
|
if not m:
|
|
return ''
|
|
lp = m.end() - 1
|
|
rp = find_matching(body, lp, '(', ')')
|
|
if rp == -1:
|
|
return ''
|
|
args = body[lp+1:rp].strip()
|
|
# Args should be a string or concatenation of strings.
|
|
parts = []
|
|
i = 0
|
|
while i < len(args):
|
|
c = args[i]
|
|
if c.isspace() or c == '+':
|
|
i += 1; continue
|
|
if c in ('"', "'", '`'):
|
|
try:
|
|
val, i = parse_string_literal(args, i)
|
|
parts.append(val)
|
|
except ValueError:
|
|
return ''
|
|
else:
|
|
# not a pure string concatenation — bail
|
|
return ''
|
|
return ''.join(parts)
|
|
|
|
def extract_tests_from_file(path, rel_category):
|
|
src = path.read_text()
|
|
# Find every test( call (not test.describe, not test.skip.)
|
|
tests = []
|
|
i = 0
|
|
while i < len(src):
|
|
m = re.search(r'(?<![a-zA-Z0-9_$.])test\s*\(', src[i:])
|
|
if not m:
|
|
break
|
|
abs_start = i + m.start()
|
|
abs_paren = i + m.end() - 1
|
|
# Ensure this is not test.describe / test.only / test.skip
|
|
# The lookbehind prevents .describe case. But test( is fine.
|
|
# parse name arg
|
|
j = abs_paren + 1
|
|
while j < len(src) and src[j].isspace(): j += 1
|
|
if j >= len(src) or src[j] not in ('"', "'", '`'):
|
|
i = abs_paren + 1
|
|
continue
|
|
try:
|
|
tname, j2 = parse_string_literal(src, j)
|
|
except ValueError:
|
|
i = abs_paren + 1
|
|
continue
|
|
endp = find_matching(src, abs_paren, '(', ')')
|
|
if endp == -1:
|
|
i = abs_paren + 1
|
|
continue
|
|
call_src = src[abs_paren:endp+1]
|
|
body = extract_arrow_body(call_src)
|
|
if body is None:
|
|
i = endp + 1
|
|
continue
|
|
html = extract_first_html(body)
|
|
tests.append({
|
|
'category': rel_category,
|
|
'name': tname,
|
|
'html': html,
|
|
'body': body,
|
|
'async': True,
|
|
'complexity': classify_complexity(body),
|
|
})
|
|
i = endp + 1
|
|
return tests
|
|
|
|
def classify_complexity(body):
|
|
if 'sinon.' in body:
|
|
return 'sinon'
|
|
if '<script type="text/hyperscript"' in body or "<script type='text/hyperscript'" in body:
|
|
return 'script-tag'
|
|
if '<script type="text/hypertemplate"' in body or "<script type='text/hypertemplate'" in body:
|
|
return 'script-tag'
|
|
if 'showModal' in body or '<dialog' in body.lower():
|
|
return 'dialog'
|
|
if 'new Promise' in body or '.resolves' in body or 'Promise.' in body:
|
|
return 'promise'
|
|
if 'html(' not in body:
|
|
if '_hyperscript.evaluate' in body or re.search(r'\bevaluate\s*\(', body):
|
|
return 'eval-only'
|
|
if re.search(r'\brun\s*\(', body):
|
|
return 'run-eval'
|
|
return 'simple'
|
|
|
|
# --- main -------------------------------------------------------------------
|
|
|
|
def rel_category(path):
|
|
"""For test/commands/foo.js, test/features/foo.js → 'foo'.
|
|
For test/core/foo.js → 'core/foo'. test/templates/foo.js → 'templates/foo' etc."""
|
|
rel = path.relative_to(TEST_ROOT)
|
|
parts = rel.parts
|
|
stem = path.stem
|
|
if len(parts) == 1:
|
|
# Top-level — shouldn't happen since all tests are in subdirs
|
|
return stem
|
|
top = parts[0]
|
|
if top in ('commands', 'features'):
|
|
return stem
|
|
# Single subdir like core/api.js → 'core/api'
|
|
if len(parts) == 2:
|
|
return f'{top}/{stem}'
|
|
# Deeper nesting — join all parts except final extension
|
|
return '/'.join(parts[:-1] + (stem,))
|
|
|
|
def main():
|
|
# Back up existing JSON
|
|
if OUT_JSON.exists() and not BACKUP.exists():
|
|
import shutil
|
|
shutil.copy2(OUT_JSON, BACKUP)
|
|
print(f'Backed up existing JSON to {BACKUP}', file=sys.stderr)
|
|
|
|
all_tests = []
|
|
file_count = 0
|
|
for path in sorted(TEST_ROOT.rglob('*.js')):
|
|
if path.name in SKIP_FILES:
|
|
continue
|
|
if any(p in ('vendor', 'node_modules', 'manual') for p in path.parts):
|
|
continue
|
|
cat = rel_category(path)
|
|
tests = extract_tests_from_file(path, cat)
|
|
all_tests.extend(tests)
|
|
file_count += 1
|
|
|
|
# Dedup by (category, name) — stable
|
|
seen = {}
|
|
for t in all_tests:
|
|
key = (t['category'], t['name'])
|
|
if key not in seen:
|
|
seen[key] = t
|
|
deduped = list(seen.values())
|
|
deduped.sort(key=lambda t: (t['category'], t['name']))
|
|
|
|
# Stats
|
|
cat_counts = Counter(t['category'] for t in deduped)
|
|
print(f'Scanned {file_count} files, extracted {len(all_tests)} tests ({len(deduped)} unique)')
|
|
print(f'Categories: {len(cat_counts)}')
|
|
for cat, n in cat_counts.most_common():
|
|
print(f' {cat:40s} {n:4d}')
|
|
|
|
with OUT_JSON.open('w') as f:
|
|
json.dump(deduped, f, indent=2, ensure_ascii=False)
|
|
f.write('\n')
|
|
print(f'\nWrote {OUT_JSON} ({len(deduped)} tests)')
|
|
|
|
if __name__ == '__main__':
|
|
main()
|