#!/usr/bin/env python3
"""
Generate spec/tests/test-hyperscript-behavioral.sx from upstream _hyperscript test data.

Reads spec/tests/hyperscript-upstream-tests.json and produces SX deftest forms
that run in the Playwright sandbox with real DOM.

Handles two assertion formats:
- Chai-style (.should.equal / assert.*) — from v0.9.14 master tests
- Playwright-style (toHaveText / toHaveClass / etc.) — from dev branch tests (have `body` field)

Usage: python3 tests/playwright/generate-sx-tests.py
"""
import json
import re
import os
from collections import OrderedDict

PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
INPUT = os.path.join(PROJECT_ROOT, 'spec/tests/hyperscript-upstream-tests.json')
OUTPUT = os.path.join(PROJECT_ROOT, 'spec/tests/test-hyperscript-behavioral.sx')

with open(INPUT) as f:
    raw_tests = json.load(f)

# ── HTML parsing ──────────────────────────────────────────────────

def parse_html(html):
    """Parse HTML into list of element dicts.
    Uses Python's html.parser for reliability with same-tag siblings."""
    from html.parser import HTMLParser

    # Remove | separators
    html = html.replace(' | ', '')

    elements = []
    stack = []

    class Parser(HTMLParser):
        def handle_starttag(self, tag, attrs):
            el = {
                'tag': tag, 'id': None, 'classes': [], 'hs': None,
                'attrs': {}, 'inner': '', 'depth': len(stack)
            }
            for name, val in attrs:
                if name == 'id': el['id'] = val
                elif name == 'class': el['classes'] = (val or '').split()
                elif name == '_': el['hs'] = val
                elif name == 'style': el['attrs']['style'] = val or ''
                elif val is not None: el['attrs'][name] = val
            stack.append(el)
            # Only collect top-level elements
            if el['depth'] == 0:
                elements.append(el)

        def handle_endtag(self, tag):
            if stack and stack[-1]['tag'] == tag:
                stack.pop()

        def handle_data(self, data):
            pass

    Parser().feed(html)
    return elements


# ── Variable naming ───────────────────────────────────────────────

def assign_var_names(elements):
    """Assign unique SX variable names to elements."""
    var_names = []
    used_names = set()
    for i, el in enumerate(elements):
        if el['id']:
            var = f'_el-{el["id"]}'
        else:
            var = f'_el-{el["tag"]}'
        if var in used_names:
            var = f'{var}{i}'
        used_names.add(var)
        var_names.append(var)
    return var_names


# ── Chai-style parsers (v0.9.14 master tests) ────────────────────

def parse_action(action, ref):
    """Convert upstream Chai-style action to SX. Returns list of SX expressions."""
    if not action or action == '(see body)':
        return []

    exprs = []
    for part in action.split(';'):
        part = part.strip()
        if not part:
            continue

        m = re.match(r'(\w+)\.click\(\)', part)
        if m:
            exprs.append(f'(dom-dispatch {ref(m.group(1))} "click" nil)')
            continue

        m = re.match(r'(\w+)\.dispatchEvent\(new CustomEvent\("([\w:.-]+)"', part)
        if m:
            exprs.append(f'(dom-dispatch {ref(m.group(1))} "{m.group(2)}" nil)')
            continue

        m = re.match(r'(\w+)\.setAttribute\("([\w-]+)",\s*"([^"]*)"\)', part)
        if m:
            exprs.append(f'(dom-set-attr {ref(m.group(1))} "{m.group(2)}" "{m.group(3)}")')
            continue

        m = re.match(r'(\w+)\.focus\(\)', part)
        if m:
            exprs.append(f'(dom-focus {ref(m.group(1))})')
            continue

        m = re.match(r'(\w+)\.appendChild\(document\.createElement\("(\w+)"\)', part)
        if m:
            exprs.append(f'(dom-append {ref(m.group(1))} (dom-create-element "{m.group(2)}"))')
            continue

        safe = re.sub(r'[\'\"$@`(),;\\#\[\]{}]', '_', part[:40])
        exprs.append(f';; SKIP action: {safe}')

    return exprs


def parse_checks(check):
    """Convert Chai assertions to SX assert forms. Returns list of SX expressions.
    Only keeps post-action assertions (last occurrence per expression)."""
    if not check or check == '(no explicit assertion)':
        return []

    all_checks = []
    for part in check.split(' && '):
        part = part.strip()
        if not part:
            continue

        m = re.match(r'(\w+)\.classList\.contains\("([^"]+)"\)\.should\.equal\((true|false)\)', part)
        if m:
            name, cls, expected = m.group(1), m.group(2), m.group(3)
            if expected == 'true':
                all_checks.append(('class', name, cls, True))
            else:
                all_checks.append(('class', name, cls, False))
            continue

        m = re.match(r'(\w+)\.innerHTML\.should\.equal\("([^"]*)"\)', part)
        if m:
            all_checks.append(('innerHTML', m.group(1), m.group(2), None))
            continue

        m = re.match(r'(\w+)\.innerHTML\.should\.equal\((.+)\)', part)
        if m:
            all_checks.append(('innerHTML', m.group(1), m.group(2), None))
            continue

        m = re.match(r'(\w+)\.textContent\.should\.equal\("([^"]*)"\)', part)
        if m:
            all_checks.append(('textContent', m.group(1), m.group(2), None))
            continue

        m = re.match(r'(\w+)\.style\.(\w+)\.should\.equal\("([^"]*)"\)', part)
        if m:
            all_checks.append(('style', m.group(1), m.group(2), m.group(3)))
            continue

        m = re.match(r'(\w+)\.getAttribute\("([^"]+)"\)\.should\.equal\("([^"]*)"\)', part)
        if m:
            all_checks.append(('attr', m.group(1), m.group(2), m.group(3)))
            continue

        m = re.match(r'(\w+)\.hasAttribute\("([^"]+)"\)\.should\.equal\((true|false)\)', part)
        if m:
            all_checks.append(('hasAttr', m.group(1), m.group(2), m.group(3) == 'true'))
            continue

        m = re.match(r'getComputedStyle\((\w+)\)\.(\w+)\.should\.equal\("([^"]*)"\)', part)
        if m:
            all_checks.append(('computedStyle', m.group(1), m.group(2), m.group(3)))
            continue

        m = re.match(r'assert\.isNull\((\w+)\.parentElement\)', part)
        if m:
            all_checks.append(('noParent', m.group(1), None, None))
            continue
        m = re.match(r'assert\.isNotNull\((\w+)\.parentElement\)', part)
        if m:
            all_checks.append(('hasParent', m.group(1), None, None))
            continue

        m = re.match(r'(\w+)\.value\.should\.equal\("([^"]*)"\)', part)
        if m:
            all_checks.append(('value', m.group(1), m.group(2), None))
            continue

        all_checks.append(('skip', part[:60], None, None))

    # Deduplicate: keep last per (type, name, key)
    seen = {}
    for c in all_checks:
        key = (c[0], c[1], c[2] if c[0] == 'class' else None)
        seen[key] = c

    return list(seen.values())


def make_ref_fn(elements, var_names):
    """Create a ref function that maps upstream JS variable names to SX let-bound variables."""
    tag_to_var = {}
    id_to_var = {}
    last_var = var_names[-1] if var_names else '_el-div'

    for i, el in enumerate(elements):
        tag_to_var[el['tag']] = var_names[i]
        if el['id']:
            id_to_var[el['id']] = var_names[i]

    tags = {'div', 'form', 'button', 'input', 'span', 'p', 'a', 'section',
            'ul', 'li', 'select', 'textarea', 'details', 'dialog', 'template',
            'output'}

    def ref(name):
        if name in tags:
            return tag_to_var.get(name, last_var)
        if name in id_to_var:
            return id_to_var[name]
        if re.match(r'^[a-z]+\d*$', name) and len(elements) > 0:
            return last_var
        return f'(dom-query-by-id "{name}")'

    return ref


def check_to_sx(check, ref):
    """Convert a parsed Chai check tuple to an SX assertion."""
    typ, name, key, val = check
    r = ref(name)
    if typ == 'class' and val:
        return f'(assert (dom-has-class? {r} "{key}"))'
    elif typ == 'class' and not val:
        return f'(assert (not (dom-has-class? {r} "{key}")))'
    elif typ == 'innerHTML':
        escaped = key.replace('"', '\\"') if isinstance(key, str) else key
        return f'(assert= "{escaped}" (dom-inner-html {r}))'
    elif typ == 'textContent':
        escaped = key.replace('"', '\\"')
        return f'(assert= "{escaped}" (dom-text-content {r}))'
    elif typ == 'style':
        return f'(assert= "{val}" (dom-get-style {r} "{key}"))'
    elif typ == 'attr':
        return f'(assert= "{val}" (dom-get-attr {r} "{key}"))'
    elif typ == 'hasAttr' and val:
        return f'(assert (dom-has-attr? {r} "{key}"))'
    elif typ == 'hasAttr' and not val:
        return f'(assert (not (dom-has-attr? {r} "{key}")))'
    elif typ == 'computedStyle':
        return f';; SKIP computed style: {name}.{key}'
    elif typ == 'noParent':
        return f'(assert (nil? (dom-parent {r})))'
    elif typ == 'hasParent':
        return f'(assert (not (nil? (dom-parent {r}))))'
    elif typ == 'value':
        return f'(assert= "{key}" (dom-get-prop {r} "value"))'
    else:
        return f';; SKIP check: {typ} {name}'


# ── Playwright-style body parser (dev branch tests) ──────────────

def selector_to_sx(selector, elements, var_names):
    """Convert a CSS selector from find('selector') to SX DOM lookup expression."""
    selector = selector.strip("'\"")
    if selector.startswith('#'):
        # ID selector — might be compound like '#a output'
        if ' ' in selector:
            return f'(dom-query "{selector}")'
        return f'(dom-query-by-id "{selector[1:]}")'
    if selector.startswith('.'):
        return f'(dom-query "{selector}")'
    # Try tag match to a let-bound variable
    for i, el in enumerate(elements):
        if el['tag'] == selector and i < len(var_names):
            return var_names[i]
    # Fallback: query by tag
    return f'(dom-query "{selector}")'


def parse_pw_args(args_str):
    """Parse Playwright assertion arguments like 'foo', "bar" or "name", "value"."""
    args = []
    for m in re.finditer(r"""(['"])(.*?)\1""", args_str):
        args.append(m.group(2))
    return args


def pw_assertion_to_sx(target, negated, assert_type, args_str):
    """Convert a Playwright assertion to SX."""
    args = parse_pw_args(args_str)

    if assert_type == 'toHaveText':
        val = args[0] if args else ''
        escaped = val.replace('\\', '\\\\').replace('"', '\\"')
        if negated:
            return f'(assert (!= "{escaped}" (dom-text-content {target})))'
        return f'(assert= "{escaped}" (dom-text-content {target}))'

    elif assert_type == 'toHaveAttribute':
        attr_name = args[0] if args else ''
        if len(args) >= 2:
            attr_val = args[1].replace('\\', '\\\\').replace('"', '\\"')
            if negated:
                return f'(assert (!= "{attr_val}" (dom-get-attr {target} "{attr_name}")))'
            return f'(assert= "{attr_val}" (dom-get-attr {target} "{attr_name}"))'
        else:
            if negated:
                return f'(assert (not (dom-has-attr? {target} "{attr_name}")))'
            return f'(assert (dom-has-attr? {target} "{attr_name}"))'

    elif assert_type == 'toHaveClass':
        cls = args[0] if args else ''
        if not cls:
            # Handle regex like /outer-clicked/
            m = re.match(r'/(.+?)/', args_str)
            if m:
                cls = m.group(1)
        if negated:
            return f'(assert (not (dom-has-class? {target} "{cls}")))'
        return f'(assert (dom-has-class? {target} "{cls}"))'

    elif assert_type == 'toHaveCSS':
        prop = args[0] if args else ''
        val = args[1] if len(args) >= 2 else ''
        escaped = val.replace('\\', '\\\\').replace('"', '\\"')
        if negated:
            return f'(assert (!= "{escaped}" (dom-get-style {target} "{prop}")))'
        return f'(assert= "{escaped}" (dom-get-style {target} "{prop}"))'

    elif assert_type == 'toHaveValue':
        val = args[0] if args else ''
        escaped = val.replace('\\', '\\\\').replace('"', '\\"')
        if negated:
            return f'(assert (!= "{escaped}" (dom-get-prop {target} "value")))'
        return f'(assert= "{escaped}" (dom-get-prop {target} "value"))'

    elif assert_type == 'toBeVisible':
        if negated:
            return f'(assert (not (dom-visible? {target})))'
        return f'(assert (dom-visible? {target}))'

    elif assert_type == 'toBeHidden':
        if negated:
            return f'(assert (dom-visible? {target}))'
        return f'(assert (not (dom-visible? {target})))'

    elif assert_type == 'toBeChecked':
        if negated:
            return f'(assert (not (dom-get-prop {target} "checked")))'
        return f'(assert (dom-get-prop {target} "checked"))'

    return None


def parse_dev_body(body, elements, var_names):
    """Parse Playwright test body to extract actions and post-action assertions.

    Returns a single ordered list of SX expression strings (actions and assertions
    interleaved in their original order). Pre-action assertions are skipped.
    """
    ops = []
    found_first_action = False

    for line in body.split('\n'):
        line = line.strip()

        # Skip comments
        if line.startswith('//'):
            continue

        # Action: find('selector').click() or .dispatchEvent('event')
        m = re.search(r"find\((['\"])(.+?)\1\)\.(click|dispatchEvent)\(([^)]*)\)", line)
        if m and 'expect' not in line:
            found_first_action = True
            selector = m.group(2)
            action_type = m.group(3)
            action_arg = m.group(4).strip("'\"")
            target = selector_to_sx(selector, elements, var_names)
            if action_type == 'click':
                ops.append(f'(dom-dispatch {target} "click" nil)')
            elif action_type == 'dispatchEvent':
                ops.append(f'(dom-dispatch {target} "{action_arg}" nil)')
            continue

        # Skip lines before first action (pre-checks, setup)
        if not found_first_action:
            continue

        # Assertion: expect(find('selector')).[not.]toHaveText("value")
        m = re.search(
            r"expect\(find\((['\"])(.+?)\1\)\)\.(not\.)?"
            r"(toHaveText|toHaveClass|toHaveCSS|toHaveAttribute|toHaveValue|toBeVisible|toBeHidden|toBeChecked)"
            r"\(([^)]*)\)",
            line
        )
        if m:
            selector = m.group(2)
            negated = bool(m.group(3))
            assert_type = m.group(4)
            args_str = m.group(5)
            target = selector_to_sx(selector, elements, var_names)
            sx = pw_assertion_to_sx(target, negated, assert_type, args_str)
            if sx:
                ops.append(sx)
            continue

    return ops


# ── Test generation ───────────────────────────────────────────────

def emit_element_setup(lines, elements, var_names):
    """Emit SX for creating elements, setting attributes, appending to body, and activating."""
    for i, el in enumerate(elements):
        var = var_names[i]

        if el['id']:
            lines.append(f'      (dom-set-attr {var} "id" "{el["id"]}")')
        for cls in el['classes']:
            lines.append(f'      (dom-add-class {var} "{cls}")')
        if el['hs']:
            hs_val = el['hs']
            hs_val = hs_val.replace('\\', '')
            # Newlines/tabs in _hyperscript act as implicit 'then' separators.
            # Only insert 'then' before command keywords, not within expressions.
            cmd_kws = r'(?:set|put|get|add|remove|toggle|hide|show|if|repeat|for|wait|send|trigger|log|call|take|throw|return|append|tell|go|halt|settle|increment|decrement|fetch|make|install|measure|empty|reset|swap|default|morph|render|scroll|focus|select|pick|beep!)'
            # Replace multi-whitespace before a command keyword with ' then '
            hs_val = re.sub(r'\s{2,}(?=' + cmd_kws + r'\b)', ' then ', hs_val)
            # Also handle actual newlines
            hs_val = re.sub(r'\s*[\n\r]\s*', ' then ', hs_val)
            # Clean up: collapse spaces, dedupe then
            hs_val = re.sub(r'\s+', ' ', hs_val)
            hs_val = re.sub(r'(then\s*)+then', 'then', hs_val)
            hs_val = hs_val.strip()
            if not hs_val:
                lines.append(f'      (dom-append (dom-body) {var})')
                continue
            if hs_val.startswith('"') or hs_val.endswith('"'):
                lines.append(f'      ;; HS source has bare quotes — HTML parse artifact')
                lines.append(f'      (dom-append (dom-body) {var})')
                continue
            hs_escaped = hs_val.replace('\\', '\\\\').replace('"', '\\"')
            lines.append(f'      (dom-set-attr {var} "_" "{hs_escaped}")')
        for aname, aval in el['attrs'].items():
            if '\\' in aval or '\n' in aval or aname.startswith('['):
                lines.append(f'      ;; SKIP attr {aname} (contains special chars)')
                continue
            aval_escaped = aval.replace('"', '\\"')
            lines.append(f'      (dom-set-attr {var} "{aname}" "{aval_escaped}")')
        lines.append(f'      (dom-append (dom-body) {var})')
        if el['hs']:
            lines.append(f'      (hs-activate! {var})')


def generate_test_chai(test, elements, var_names, idx):
    """Generate SX deftest using Chai-style action/check fields."""
    ref = make_ref_fn(elements, var_names)
    actions = parse_action(test['action'], ref)
    checks = parse_checks(test['check'])

    lines = []
    lines.append(f'  (deftest "{test["name"]}"')
    lines.append('    (hs-cleanup!)')

    bindings = [f'({var_names[i]} (dom-create-element "{el["tag"]}"))' for i, el in enumerate(elements)]
    lines.append(f'    (let ({" ".join(bindings)})')

    emit_element_setup(lines, elements, var_names)

    for action in actions:
        lines.append(f'      {action}')
    for check in checks:
        sx = check_to_sx(check, ref)
        lines.append(f'      {sx}')

    lines.append('    ))')
    return '\n'.join(lines)


def generate_test_pw(test, elements, var_names, idx):
    """Generate SX deftest using Playwright-style body field."""
    ops = parse_dev_body(test['body'], elements, var_names)

    lines = []
    lines.append(f'  (deftest "{test["name"]}"')
    lines.append('    (hs-cleanup!)')

    bindings = [f'({var_names[i]} (dom-create-element "{el["tag"]}"))' for i, el in enumerate(elements)]
    lines.append(f'    (let ({" ".join(bindings)})')

    emit_element_setup(lines, elements, var_names)

    for op in ops:
        lines.append(f'      {op}')

    lines.append('    ))')
    return '\n'.join(lines)


def generate_eval_only_test(test, idx):
    """Generate SX deftest for no-HTML tests using eval-hs.
    Parses body field for run("expr").toBe(val) / expect(run("expr")).toBe(val) patterns."""
    body = test.get('body', '')
    lines = []
    lines.append(f'  (deftest "{test["name"]}"')

    # Extract run("expr").toBe(val) or expect(await run("expr")).toBe(val) patterns
    assertions = []
    for m in re.finditer(r'(?:expect\()?(?:await\s+)?run\(["\x27]([^"\x27]+)["\x27]\)\)?\.toBe\(([^)]+)\)', body):
        hs_expr = m.group(1).replace('\\', '').replace('"', '\\"')
        expected = m.group(2).strip()
        # Convert JS values to SX
        if expected == 'true': expected_sx = 'true'
        elif expected == 'false': expected_sx = 'false'
        elif expected == 'null' or expected == 'undefined': expected_sx = 'nil'
        elif expected.startswith('"') or expected.startswith("'"):
            expected_sx = '"' + expected.strip("\"'") + '"'
        else:
            try:
                float(expected)
                expected_sx = expected
            except ValueError:
                expected_sx = f'"{expected}"'
        assertions.append(f'    (assert= {expected_sx} (eval-hs "{hs_expr}"))')

    # Also handle toEqual patterns
    for m in re.finditer(r'(?:expect\()?(?:await\s+)?run\(["\x27]([^"\x27]+)["\x27]\)\)?\.toEqual\(([^)]+)\)', body):
        hs_expr = m.group(1).replace('\\', '').replace('"', '\\"')
        expected = m.group(2).strip()
        assertions.append(f'    ;; toEqual: {expected[:40]}')

    if not assertions:
        return None  # Can't convert this body pattern

    for a in assertions:
        lines.append(a)
    lines.append('  )')
    return '\n'.join(lines)


def generate_test(test, idx):
    """Generate SX deftest for an upstream test. Dispatches to Chai, PW, or eval-only."""
    elements = parse_html(test['html'])

    if not elements and not test.get('html', '').strip():
        # No HTML — try eval-only conversion
        return generate_eval_only_test(test, idx)
    if not elements:
        return None

    var_names = assign_var_names(elements)

    if test.get('body'):
        return generate_test_pw(test, elements, var_names, idx)
    else:
        return generate_test_chai(test, elements, var_names, idx)


# ── Output generation ─────────────────────────────────────────────

output = []
output.append(';; Hyperscript behavioral tests — auto-generated from upstream _hyperscript test suite')
output.append(f';; Source: spec/tests/hyperscript-upstream-tests.json ({len(raw_tests)} tests, v0.9.14 + dev)')
output.append(';; DO NOT EDIT — regenerate with: python3 tests/playwright/generate-sx-tests.py')
output.append('')
output.append(';; ── Test helpers ──────────────────────────────────────────────────')
output.append('')
output.append('(define hs-test-el')
output.append('  (fn (tag hs-src)')
output.append('    (let ((el (dom-create-element tag)))')
output.append('      (dom-set-attr el "_" hs-src)')
output.append('      (dom-append (dom-body) el)')
output.append('      (hs-activate! el)')
output.append('      el)))')
output.append('')
output.append('(define hs-cleanup!')
output.append('  (fn ()')
output.append('    (dom-set-inner-html (dom-body) "")))')
output.append('')

# Group by category
categories = OrderedDict()
for t in raw_tests:
    cat = t['category']
    if cat not in categories:
        categories[cat] = []
    categories[cat].append(t)

total = 0
skipped = 0
generated_counts = {}  # cat -> (generated, stubbed)
for cat, tests in categories.items():
    output.append(f';; ── {cat} ({len(tests)} tests) ──')
    output.append(f'(defsuite "hs-upstream-{cat}"')

    cat_gen = 0
    cat_stub = 0
    for i, t in enumerate(tests):
        sx = generate_test(t, i)
        if sx:
            output.append(sx)
            total += 1
            cat_gen += 1
        else:
            safe_name = t['name'].replace('"', "'")
            output.append(f'  (deftest "{safe_name}"')
            output.append(f'    (error "NOT IMPLEMENTED: test HTML could not be parsed into SX"))')
            total += 1
            cat_stub += 1

    output.append(')')
    output.append('')
    generated_counts[cat] = (cat_gen, cat_stub)

with open(OUTPUT, 'w') as f:
    f.write('\n'.join(output))

# Report
has_body = sum(1 for t in raw_tests if t.get('body'))
print(f'Generated {total} tests -> {OUTPUT}')
print(f'  Source: {len(raw_tests)} tests ({len(raw_tests) - has_body} Chai-style, {has_body} Playwright-style)')
print(f'  Categories: {len(categories)}')
for cat, (gen, stub) in generated_counts.items():
    marker = '' if stub == 0 else f' ({stub} stubs)'
    print(f'  {cat}: {gen}{marker}')