mono/shared/sx/css_registry.py

"""
On-demand CSS registry — parses tw.css at startup into a lookup table.

Maps HTML class names (e.g. "flex", "sm:hidden", "h-[60vh]") to their
CSS rule text.  The server uses this to send only the CSS rules needed
for each response, instead of the full Tailwind bundle.

Usage::

    load_css_registry("/path/to/tw.css")
    rules = lookup_rules({"flex", "p-2", "sm:hidden"})
    preamble = get_preamble()
"""
from __future__ import annotations

import hashlib
import re
from collections import OrderedDict
from pathlib import Path
from typing import Sequence


# ---------------------------------------------------------------------------
# Module state
# ---------------------------------------------------------------------------

_REGISTRY: dict[str, str] = {}       # class name → CSS rule text
_RULE_ORDER: dict[str, int] = {}     # class name → source order index
_PREAMBLE: str = ""                   # base/reset CSS (sent once per page)
_ALL_RULES: str = ""                  # full concatenated rules (for Jinja fallback)

# Hash cache: maps 8-char hex hash → frozenset of class names
_CSS_HASH_CACHE: OrderedDict[str, frozenset[str]] = OrderedDict()
_CSS_HASH_CACHE_MAX = 1000


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------

def load_css_registry(
    path: str | Path,
    *,
    extra_css: Sequence[str | Path] = (),
    url_rewrites: dict[str, str] | None = None,
) -> None:
    """Parse a Tailwind v3 CSS file and populate the registry.

    Parameters
    ----------
    path:
        Path to the main Tailwind CSS file (tw.css).
    extra_css:
        Additional CSS files to include in the preamble (inlined verbatim).
        These are loaded in order and prepended before the Tailwind rules.
    url_rewrites:
        Dict of ``{old_prefix: new_prefix}`` applied to all extra CSS files.
        e.g. ``{"../webfonts/": "/static/fontawesome/webfonts/"}``
    """
    global _PREAMBLE, _ALL_RULES
    _REGISTRY.clear()
    _RULE_ORDER.clear()

    css_path = Path(path)
    css = css_path.read_text(encoding="utf-8")
    rewrites = url_rewrites or {}

    # Load extra CSS files into a combined prefix
    sibling_css = ""
    for extra in extra_css:
        p = Path(extra)
        if p.exists():
            content = p.read_text(encoding="utf-8")
            for old, new in rewrites.items():
                content = content.replace(old, new)
            sibling_css += content

    # Split into preamble (resets, vars) and utility rules.
    # Tailwind v3 minified structure:
    #   - Custom property defaults (*,:after,:before{--tw-...})
    #   - Base resets (html, body, etc.)
    #   - Utility classes (.flex{...}, .hidden{...})
    #   - Responsive variants (@media ...{.sm\:...{...}})
    #
    # We treat everything before the first utility class selector as preamble.
    # A utility class selector starts with "." followed by a word char or backslash.

    preamble_parts: list[str] = []
    utility_css = css

    # Find first bare utility class (not inside a reset block)
    # Heuristic: the preamble ends at the first rule whose selector is ONLY
    # a class (starts with . and no *, :, html, body, etc.)
    #
    # More robust: walk rules and detect when selectors become single-class.
    rules = _split_rules(css)
    preamble_end = 0
    for i, rule in enumerate(rules):
        sel = _extract_selector(rule)
        if sel and _is_utility_selector(sel):
            preamble_end = i
            break

    _PREAMBLE = sibling_css + "".join(rules[:preamble_end])
    _index_rules(rules[preamble_end:])
    _ALL_RULES = _PREAMBLE + "".join(
        _REGISTRY[k] for k in sorted(_REGISTRY, key=lambda k: _RULE_ORDER.get(k, 0))
    )


def get_preamble() -> str:
    """Return the preamble CSS (resets + custom property defaults)."""
    return _PREAMBLE


def get_all_css() -> str:
    """Return preamble + all utility rules (for Jinja fallback pages)."""
    return _ALL_RULES


def lookup_rules(classes: set[str]) -> str:
    """Return concatenated CSS for a set of class names, preserving source order."""
    found = [(name, _RULE_ORDER.get(name, 0)) for name in classes if name in _REGISTRY]
    found.sort(key=lambda t: t[1])
    return "".join(_REGISTRY[name] for name, _ in found)


def scan_classes_from_sx(source: str) -> set[str]:
    """Extract class names from :class "..." patterns in sx source text.

    Works on both component definitions and page sx source.
    Also picks up classes from :class (str ...) concatenation patterns
    and ``;; @css class1 class2 ...`` comment annotations for dynamically
    constructed class names that the regex can't infer.
    """
    classes: set[str] = set()
    # Match :class "value" — the common case
    for m in re.finditer(r':class\s+"([^"]*)"', source):
        classes.update(m.group(1).split())
    # Match :class (str "a" " b" ...) — string concatenation
    for m in re.finditer(r':class\s+\(str\s+((?:"[^"]*"\s*)+)\)', source):
        for s in re.findall(r'"([^"]*)"', m.group(1)):
            classes.update(s.split())
    # Match ;; @css class1 class2 ... — explicit hints for dynamic classes
    for m in re.finditer(r';\s*@css\s+(.+)', source):
        classes.update(m.group(1).split())
    return classes


def registry_loaded() -> bool:
    """True if the registry has been populated."""
    return bool(_REGISTRY)


def store_css_hash(classes: set[str] | frozenset[str]) -> str:
    """Compute an 8-char hex hash of the class set, store in cache, return it."""
    fs = frozenset(classes)
    key = hashlib.sha256(",".join(sorted(fs)).encode()).hexdigest()[:8]
    # Move to end (LRU) or insert
    _CSS_HASH_CACHE[key] = fs
    _CSS_HASH_CACHE.move_to_end(key)
    # Evict oldest if over limit
    while len(_CSS_HASH_CACHE) > _CSS_HASH_CACHE_MAX:
        _CSS_HASH_CACHE.popitem(last=False)
    return key


def lookup_css_hash(h: str) -> set[str] | None:
    """Look up a class set by its hash. Returns None on cache miss."""
    fs = _CSS_HASH_CACHE.get(h)
    if fs is not None:
        _CSS_HASH_CACHE.move_to_end(h)
        return set(fs)
    return None


# ---------------------------------------------------------------------------
# Internals
# ---------------------------------------------------------------------------

def _split_rules(css: str) -> list[str]:
    """Split minified CSS into individual top-level rules using brace tracking.

    Each returned string is a complete rule including its braces, e.g.:
      ".flex{display:flex}"
      "@media (min-width:640px){.sm\\:hidden{display:none}}"
    """
    rules: list[str] = []
    depth = 0
    start = 0
    i = 0
    while i < len(css):
        ch = css[i]
        if ch == '{':
            depth += 1
        elif ch == '}':
            depth -= 1
            if depth == 0:
                rules.append(css[start:i + 1])
                start = i + 1
        i += 1
    # Trailing content (unlikely in valid CSS)
    if start < len(css):
        tail = css[start:].strip()
        if tail:
            rules.append(tail)
    return rules


def _extract_selector(rule: str) -> str:
    """Extract the selector portion before the first '{' in a rule."""
    brace = rule.find('{')
    return rule[:brace].strip() if brace >= 0 else ""


def _is_utility_selector(sel: str) -> bool:
    """Check if a selector looks like a single utility class (.flex, .\\!p-2, etc).

    Returns False for resets (*,:before,:after{...}), element selectors (html,body),
    and @media / @keyframes wrappers (handled separately).
    """
    if sel.startswith('@'):
        return False
    # Must start with a dot and be a single class
    if not sel.startswith('.'):
        return False
    # Exclude selectors with spaces (descendant combinator, .prose :where(...))
    if ' ' in sel:
        return False
    return True


def _css_selector_to_class(selector: str) -> str:
    """Convert a CSS selector to an HTML class name by unescaping.

    .sm\\:hidden  → sm:hidden
    .h-\\[60vh\\] → h-[60vh]
    .\\!p-2       → !p-2
    .hover\\:text-stone-700:hover → hover:text-stone-700
    """
    # Strip leading dot
    name = selector.lstrip('.')
    # Strip trailing pseudo-class/element (:hover, :focus, ::placeholder, etc.)
    # But don't strip escaped colons (\:) — those are part of the class name.
    # Unescaped colon = pseudo-class boundary.
    # Find the first unescaped colon (not preceded by backslash)
    result = []
    i = 0
    while i < len(name):
        if name[i] == '\\' and i + 1 < len(name):
            result.append(name[i + 1])
            i += 2
        elif name[i] == ':':
            break  # pseudo-class — stop here
        else:
            result.append(name[i])
            i += 1
    return "".join(result)


def _index_rules(rules: list[str]) -> None:
    """Index utility rules into _REGISTRY and _RULE_ORDER."""
    order = len(_RULE_ORDER)

    for rule in rules:
        sel = _extract_selector(rule)

        if sel.startswith('@media'):
            # Responsive/container wrapper — extract inner rules
            _index_media_block(rule, order)
            order += 1
            continue

        if sel.startswith('@'):
            # @keyframes, @supports, etc — skip
            continue

        if not sel.startswith('.'):
            continue

        # Compound selectors like ".prose :where(p)..." → key on root class "prose"
        if ' ' in sel:
            root_sel = sel.split(' ', 1)[0]
            class_name = _css_selector_to_class(root_sel)
            if class_name:
                # Append to existing entry
                _REGISTRY[class_name] = _REGISTRY.get(class_name, "") + rule
                if class_name not in _RULE_ORDER:
                    _RULE_ORDER[class_name] = order
            order += 1
            continue

        class_name = _css_selector_to_class(sel)
        if class_name:
            _REGISTRY[class_name] = _REGISTRY.get(class_name, "") + rule
            if class_name not in _RULE_ORDER:
                _RULE_ORDER[class_name] = order
            order += 1


def _index_media_block(rule: str, base_order: int) -> None:
    """Index individual class rules inside an @media block.

    For example:
        @media (min-width:640px){.sm\\:hidden{display:none}.sm\\:flex{display:flex}}

    Each inner rule gets stored wrapped in the @media query.
    """
    # Extract the @media wrapper and inner content
    first_brace = rule.find('{')
    if first_brace < 0:
        return
    media_prefix = rule[:first_brace + 1]  # "@media (min-width:640px){"
    # Inner content is between first { and last }
    inner = rule[first_brace + 1:]
    if inner.endswith('}'):
        inner = inner[:-1]  # strip the closing } of @media

    # Split inner content into individual rules
    inner_rules = _split_rules(inner)
    for i, inner_rule in enumerate(inner_rules):
        sel = _extract_selector(inner_rule)
        class_name = _css_selector_to_class(sel)
        if class_name:
            # Wrap in the @media block
            _REGISTRY[class_name] = media_prefix + inner_rule + "}"
            _RULE_ORDER[class_name] = base_order + i