#!/usr/bin/env python3 """Build coverage manifest for _hyperscript v0.9.90 upstream tests. Strategy: 1. Parse every test file under /tmp/hs-upstream/test/ for `test(...)` and `test.describe(...)` calls (Playwright style). 2. Stack nested describes into name prefixes (the describe is the category marker — but our JSON keys by filename basename, so we use that). 3. Match against /root/rose-ash/spec/tests/hyperscript-upstream-tests.json. 4. Classify complexity, status, block_reason. 5. Emit manifest JSON + markdown summary. """ import json, os, re, sys from collections import Counter, defaultdict from pathlib import Path HS_ROOT = Path('/tmp/hs-upstream') TEST_ROOT = HS_ROOT / 'test' OUR_JSON = Path('/root/rose-ash/spec/tests/hyperscript-upstream-tests.json') GEN_PY = Path('/root/rose-ash/tests/playwright/generate-sx-tests.py') OUT_JSON = Path('/root/rose-ash/spec/tests/hyperscript-upstream-manifest.json') OUT_MD = Path('/root/rose-ash/spec/tests/hyperscript-upstream-manifest.md') # --------------------------------------------------------------------------- # Load SKIP_TEST_NAMES from generator # --------------------------------------------------------------------------- gen_src = GEN_PY.read_text() m = re.search(r'SKIP_TEST_NAMES\s*=\s*\{(.*?)\n\}', gen_src, re.DOTALL) assert m, "could not find SKIP_TEST_NAMES" skip_block = m.group(1) SKIP_NAMES = set() for line in skip_block.splitlines(): line = line.strip().rstrip(',') if not line or line.startswith('#'): continue # line is a quoted string literal try: # carefully parse the first string literal on the line # use a tiny regex because names contain both single and double quotes-escaped mm = re.match(r'^(["\'])(.*)\1\s*,?\s*(#.*)?$', line) if mm: raw = mm.group(2) # python-style unescape: mainly \\' and \\" raw = raw.encode('utf-8').decode('unicode_escape') SKIP_NAMES.add(raw) except Exception: pass print(f"Loaded {len(SKIP_NAMES)} skip names", file=sys.stderr) # --------------------------------------------------------------------------- # Load our snapshot JSON # --------------------------------------------------------------------------- our_tests = json.load(OUR_JSON.open()) our_map = {} def norm(s): return re.sub(r'\s+', ' ', s.strip()).lower() for t in our_tests: key = (t['category'], norm(t['name'])) our_map[key] = t print(f"Our JSON: {len(our_tests)} tests, {len(our_map)} unique (cat,name)", file=sys.stderr) # --------------------------------------------------------------------------- # Extract upstream tests # --------------------------------------------------------------------------- # Pattern: test(, ... or test.describe(, ... # We need to parse balanced parens to find the body snippet for classification. SKIP_FILES = {'fixtures.js', 'global-setup.js', 'global-teardown.js', 'entry.js', 'htmx-fixtures.js', 'playwright.config.js'} def parse_string_literal(src, i): """src[i] must be quote; return (value, next_i).""" q = src[i] assert q in ('"', "'", '`'), f"not a quote at {i}: {src[i-5:i+5]!r}" i += 1 out = [] while i < len(src): c = src[i] if c == '\\': nxt = src[i+1] if i+1 < len(src) else '' # preserve common escapes if nxt == 'n': out.append('\n'); i += 2 elif nxt == 't': out.append('\t'); i += 2 elif nxt == '\\': out.append('\\'); i += 2 elif nxt == q: out.append(q); i += 2 else: out.append(nxt); i += 2 elif c == q: return ''.join(out), i + 1 else: out.append(c); i += 1 raise ValueError(f"unterminated string starting at {i}") def find_matching_paren(src, start): """start is index of '('; return index of matching ')'.""" depth = 0 i = start while i < len(src): c = src[i] if c in ('"', "'", '`'): _, i = parse_string_literal(src, i) continue if c == '/' and i+1 < len(src): # regex or comment if src[i+1] == '/': # line comment j = src.find('\n', i) i = len(src) if j == -1 else j + 1 continue if src[i+1] == '*': j = src.find('*/', i) i = len(src) if j == -1 else j + 2 continue # regex literal: heuristic — only treat as regex if preceded by an # operator/paren/comma/etc prev = '' k = i - 1 while k >= 0 and src[k].isspace(): k -= 1 prev = src[k] if k >= 0 else '' if prev in '(,;=!?&|:+-*/<>%^~{[' or prev == '' or (k >= 0 and src[k-5:k+1] == 'return' if k>=5 else False): # scan regex j = i + 1 while j < len(src): cc = src[j] if cc == '\\': j += 2; continue if cc == '[': while j < len(src) and src[j] != ']': if src[j] == '\\': j += 2 else: j += 1 j += 1; continue if cc == '/': j += 1 while j < len(src) and src[j].isalpha(): j += 1 break if cc == '\n': break j += 1 i = j continue if c == '(': depth += 1; i += 1 elif c == ')': depth -= 1 if depth == 0: return i i += 1 else: i += 1 return -1 def extract_tests_from_file(path): src = path.read_text() tests = [] # list of dicts: name, body, describe_name # Find all test.describe( and test( calls, tracking nesting. # We do this by regex+scan approach: iterate over the file and at each match # track current describe stack (via start/end of the describe's body). # Simpler: find test.describe(...) blocks first, then within each find test(...) describes = [] # (name, start_idx_of_body_lbrace, end_idx) i = 0 while i < len(src): m = re.search(r'test\.describe\s*\(\s*', src[i:]) if not m: break start = i + m.start() paren = i + m.end() - 1 # paren points at '(' after describe # parse quoted string right after j = paren + 1 # skip whitespace while j < len(src) and src[j].isspace(): j += 1 if j >= len(src) or src[j] not in ('"', "'", '`'): i = start + len(m.group(0)) continue try: name, j = parse_string_literal(src, j) except Exception: i = start + len(m.group(0)) continue # find matching close paren endp = find_matching_paren(src, paren) if endp == -1: i = start + len(m.group(0)) continue describes.append((name, paren, endp)) i = paren + 1 # Now extract tests; each test is `test(` or `test(,` not `test.describe` for dname, dstart, dend in describes: region = src[dstart:dend] # Iterate matches of test( inside region that are not test.describe k = 0 while k < len(region): m = re.search(r'(?= len(src) or src[j] not in ('"', "'", '`'): k += m.end(); continue try: tname, j = parse_string_literal(src, j) except Exception: k += m.end(); continue endp = find_matching_paren(src, abs_paren) if endp == -1: k += m.end(); continue body = src[abs_paren:endp+1] tests.append({'name': tname, 'body': body, 'describe': dname}) # advance past this test k = (endp + 1) - dstart return tests # --------------------------------------------------------------------------- # Extract from all files # --------------------------------------------------------------------------- upstream_tests = [] for path in sorted(TEST_ROOT.rglob('*.js')): if path.name in SKIP_FILES: continue if 'vendor' in path.parts or 'node_modules' in path.parts or 'manual' in path.parts: continue rel = path.relative_to(HS_ROOT) category = path.stem # filename without .js # Special cases for organization tests = extract_tests_from_file(path) for t in tests: upstream_tests.append({ 'category': category, 'name': t['name'], 'body': t['body'], 'describe': t['describe'], 'upstream_file': str(rel), }) print(f"Extracted {len(upstream_tests)} upstream tests", file=sys.stderr) # --------------------------------------------------------------------------- # Identify post-snapshot additions # --------------------------------------------------------------------------- # A test is "post-snapshot" if it appears in a file modified in the window # AND not in our JSON. We'll compute based on the in_our_json check instead. # --------------------------------------------------------------------------- # Classify each test # --------------------------------------------------------------------------- def classify_complexity(body, name, existing): if existing: return existing.get('complexity', 'simple') b = body if 'sinon.' in b: return 'sinon' if '