"""
HTML → sx source converter.

Converts an HTML string to an equivalent s-expression source string so that
raw HTML can be eliminated from the sx tree.

    html_to_sx('<p class="intro">Hello <em>world</em></p>')
    # → '(p :class "intro" "Hello " (em "world"))'

Uses only stdlib ``html.parser`` — no extra dependencies.
"""
from __future__ import annotations

from html.parser import HTMLParser

from .html import VOID_ELEMENTS, BOOLEAN_ATTRS


def html_to_sx(html: str) -> str:
    """Convert an HTML string to sx source."""
    if not html or not html.strip():
        return '""'
    parser = _SxBuilder()
    parser.feed(html)
    nodes = parser.finish()
    if not nodes:
        return '""'
    if len(nodes) == 1:
        return _serialize(nodes[0])
    return "(<> " + " ".join(_serialize(n) for n in nodes) + ")"


# ---------------------------------------------------------------------------
# Internal tree builder
# ---------------------------------------------------------------------------

class _TextNode:
    __slots__ = ("text",)
    def __init__(self, text: str):
        self.text = text

class _ElementNode:
    __slots__ = ("tag", "attrs", "children")
    def __init__(self, tag: str, attrs: list[tuple[str, str | None]]):
        self.tag = tag
        self.attrs = attrs
        self.children: list[_TextNode | _ElementNode] = []


class _SxBuilder(HTMLParser):
    def __init__(self):
        super().__init__(convert_charrefs=True)
        self._roots: list[_TextNode | _ElementNode] = []
        self._stack: list[_ElementNode] = []

    def _append(self, node: _TextNode | _ElementNode):
        if self._stack:
            self._stack[-1].children.append(node)
        else:
            self._roots.append(node)

    def handle_starttag(self, tag: str, attrs: list[tuple[str, str | None]]):
        node = _ElementNode(tag, attrs)
        self._append(node)
        if tag not in VOID_ELEMENTS:
            self._stack.append(node)

    def handle_endtag(self, tag: str):
        # Pop back to the matching open tag (tolerant of mismatches)
        for i in range(len(self._stack) - 1, -1, -1):
            if self._stack[i].tag == tag:
                self._stack[i + 1:] = []
                self._stack.pop(i)
                return

    def handle_data(self, data: str):
        if data:
            self._append(_TextNode(data))

    def handle_comment(self, data: str):
        pass  # skip HTML comments

    def finish(self) -> list[_TextNode | _ElementNode]:
        # Strip whitespace-only text nodes at root level
        return [n for n in self._roots
                if not (isinstance(n, _TextNode) and not n.text.strip())]


# ---------------------------------------------------------------------------
# Serializer
# ---------------------------------------------------------------------------

def _esc(s: str) -> str:
    """Escape a string for sx double-quoted literals."""
    return s.replace("\\", "\\\\").replace('"', '\\"')


def _serialize(node: _TextNode | _ElementNode) -> str:
    if isinstance(node, _TextNode):
        return f'"{_esc(node.text)}"'

    parts = [node.tag]
    for name, value in node.attrs:
        if name in BOOLEAN_ATTRS:
            if value is None or value == "" or value == name:
                parts.append(f":{name} true")
            else:
                parts.append(f':{name} "{_esc(value)}"')
        elif value is None:
            # Attribute without value (non-boolean) — treat as boolean true
            parts.append(f":{name} true")
        else:
            parts.append(f':{name} "{_esc(value)}"')

    if node.tag in VOID_ELEMENTS:
        return "(" + " ".join(parts) + ")"

    if node.children:
        child_parts = [_serialize(c) for c in node.children]
        return "(" + " ".join(parts) + " " + " ".join(child_parts) + ")"

    return "(" + " ".join(parts) + ")"