rose-ash/blog/bp/blog/ghost/lexical_to_sx.py

"""
Lexical JSON → s-expression converter.

Mirrors lexical_renderer.py's registry/dispatch pattern but produces sx source
instead of HTML. Used for backfilling existing posts and on-the-fly conversion
when editing pre-migration posts in the SX editor.

Public API
----------
    lexical_to_sx(doc)  – Lexical JSON (dict or string) → sx source string
"""
from __future__ import annotations

import json
from typing import Callable

import mistune

from shared.sx.html_to_sx import html_to_sx


# ---------------------------------------------------------------------------
# Registry
# ---------------------------------------------------------------------------

_CONVERTERS: dict[str, Callable[[dict], str]] = {}


def _converter(node_type: str):
    """Decorator — register a function as the converter for *node_type*."""
    def decorator(fn: Callable[[dict], str]) -> Callable[[dict], str]:
        _CONVERTERS[node_type] = fn
        return fn
    return decorator


# ---------------------------------------------------------------------------
# Public entry point
# ---------------------------------------------------------------------------

def lexical_to_sx(doc: dict | str) -> str:
    """Convert a Lexical JSON document to an sx source string."""
    if isinstance(doc, str):
        doc = json.loads(doc)
    root = doc.get("root", doc)
    children = root.get("children", [])
    parts = [_convert_node(c) for c in children]
    parts = [p for p in parts if p]
    if not parts:
        return '(<> (p ""))'
    if len(parts) == 1:
        return parts[0]
    return "(<>\n  " + "\n  ".join(parts) + ")"


# ---------------------------------------------------------------------------
# Core dispatch
# ---------------------------------------------------------------------------

def _convert_node(node: dict) -> str:
    node_type = node.get("type", "")
    converter = _CONVERTERS.get(node_type)
    if converter:
        return converter(node)
    return ""


def _convert_children(children: list[dict]) -> str:
    """Convert children to inline sx content (for text nodes)."""
    parts = [_convert_node(c) for c in children]
    return " ".join(p for p in parts if p)


def _esc(s: str) -> str:
    """Escape a string for sx double-quoted literals."""
    return s.replace("\\", "\\\\").replace('"', '\\"')


# ---------------------------------------------------------------------------
# Text format bitmask
# ---------------------------------------------------------------------------

_FORMAT_BOLD = 1
_FORMAT_ITALIC = 2
_FORMAT_STRIKETHROUGH = 4
_FORMAT_UNDERLINE = 8
_FORMAT_CODE = 16
_FORMAT_SUBSCRIPT = 32
_FORMAT_SUPERSCRIPT = 64

_FORMAT_WRAPPERS: list[tuple[int, str]] = [
    (_FORMAT_BOLD,          "strong"),
    (_FORMAT_ITALIC,        "em"),
    (_FORMAT_STRIKETHROUGH, "s"),
    (_FORMAT_UNDERLINE,     "u"),
    (_FORMAT_CODE,          "code"),
    (_FORMAT_SUBSCRIPT,     "sub"),
    (_FORMAT_SUPERSCRIPT,   "sup"),
]


def _wrap_format(text_sx: str, fmt: int) -> str:
    for mask, tag in _FORMAT_WRAPPERS:
        if fmt & mask:
            text_sx = f"({tag} {text_sx})"
    return text_sx


# ---------------------------------------------------------------------------
# Tier 1 — text nodes
# ---------------------------------------------------------------------------

@_converter("text")
def _text(node: dict) -> str:
    text = node.get("text", "")
    if not text:
        return ""
    sx = f'"{_esc(text)}"'
    fmt = node.get("format", 0)
    if isinstance(fmt, int) and fmt:
        sx = _wrap_format(sx, fmt)
    return sx


@_converter("linebreak")
def _linebreak(_node: dict) -> str:
    return '"\\n"'


@_converter("tab")
def _tab(_node: dict) -> str:
    return '"\\t"'


@_converter("paragraph")
def _paragraph(node: dict) -> str:
    inner = _convert_children(node.get("children", []))
    if not inner:
        inner = '""'
    return f"(p {inner})"


@_converter("extended-text")
def _extended_text(node: dict) -> str:
    # extended-text can be block-level (with children) or inline (with text).
    # When it has a "text" field, treat it as a plain text node.
    if "text" in node:
        return _text(node)
    return _paragraph(node)


@_converter("heading")
def _heading(node: dict) -> str:
    tag = node.get("tag", "h2")
    inner = _convert_children(node.get("children", []))
    if not inner:
        inner = '""'
    return f"({tag} {inner})"


@_converter("extended-heading")
def _extended_heading(node: dict) -> str:
    if "text" in node:
        return _text(node)
    return _heading(node)


@_converter("quote")
def _quote(node: dict) -> str:
    inner = _convert_children(node.get("children", []))
    return f"(blockquote {inner})" if inner else '(blockquote "")'


@_converter("extended-quote")
def _extended_quote(node: dict) -> str:
    if "text" in node:
        return _text(node)
    return _quote(node)


@_converter("link")
def _link(node: dict) -> str:
    href = node.get("url", "")
    inner = _convert_children(node.get("children", []))
    if not inner:
        inner = f'"{_esc(href)}"'
    return f'(a :href "{_esc(href)}" {inner})'


@_converter("autolink")
def _autolink(node: dict) -> str:
    return _link(node)


@_converter("at-link")
def _at_link(node: dict) -> str:
    return _link(node)


@_converter("list")
def _list(node: dict) -> str:
    tag = "ol" if node.get("listType") == "number" else "ul"
    inner = _convert_children(node.get("children", []))
    return f"({tag} {inner})" if inner else f"({tag})"


@_converter("listitem")
def _listitem(node: dict) -> str:
    inner = _convert_children(node.get("children", []))
    return f"(li {inner})" if inner else '(li "")'


@_converter("horizontalrule")
def _horizontalrule(_node: dict) -> str:
    return "(hr)"


@_converter("code")
def _code(node: dict) -> str:
    inner = _convert_children(node.get("children", []))
    return f"(code {inner})" if inner else ""


@_converter("codeblock")
def _codeblock(node: dict) -> str:
    lang = node.get("language", "")
    code = node.get("code", "")
    lang_attr = f' :class "language-{_esc(lang)}"' if lang else ""
    return f'(pre (code{lang_attr} "{_esc(code)}"))'


@_converter("code-highlight")
def _code_highlight(node: dict) -> str:
    text = node.get("text", "")
    return f'"{_esc(text)}"' if text else ""


# ---------------------------------------------------------------------------
# Tier 2 — common cards
# ---------------------------------------------------------------------------

@_converter("image")
def _image(node: dict) -> str:
    src = node.get("src", "")
    alt = node.get("alt", "")
    caption = node.get("caption", "")
    width = node.get("cardWidth", "") or node.get("width", "")
    href = node.get("href", "")

    parts = [f':src "{_esc(src)}"']
    if alt:
        parts.append(f':alt "{_esc(alt)}"')
    if caption:
        parts.append(f":caption {html_to_sx(caption)}")
    if width:
        parts.append(f':width "{_esc(width)}"')
    if href:
        parts.append(f':href "{_esc(href)}"')
    return "(~kg-image " + " ".join(parts) + ")"


@_converter("gallery")
def _gallery(node: dict) -> str:
    images = node.get("images", [])
    if not images:
        return ""

    # Group images into rows of 3 (matching lexical_renderer.py)
    rows = []
    for i in range(0, len(images), 3):
        row_imgs = images[i:i + 3]
        row_items = []
        for img in row_imgs:
            item_parts = [f'"src" "{_esc(img.get("src", ""))}"']
            if img.get("alt"):
                item_parts.append(f'"alt" "{_esc(img["alt"])}"')
            if img.get("caption"):
                item_parts.append(f'"caption" {html_to_sx(img["caption"])}')
            row_items.append("(dict " + " ".join(item_parts) + ")")
        rows.append("(list " + " ".join(row_items) + ")")

    images_sx = "(list " + " ".join(rows) + ")"
    caption = node.get("caption", "")
    caption_attr = f" :caption {html_to_sx(caption)}" if caption else ""
    return f"(~kg-gallery :images {images_sx}{caption_attr})"


@_converter("html")
def _html_card(node: dict) -> str:
    raw = node.get("html", "")
    inner = html_to_sx(raw)
    return f"(~kg-html {inner})"


@_converter("embed")
def _embed(node: dict) -> str:
    embed_html = node.get("html", "")
    caption = node.get("caption", "")
    parts = [f':html "{_esc(embed_html)}"']
    if caption:
        parts.append(f":caption {html_to_sx(caption)}")
    return "(~kg-embed " + " ".join(parts) + ")"


@_converter("bookmark")
def _bookmark(node: dict) -> str:
    url = node.get("url", "")
    meta = node.get("metadata", {})
    parts = [f':url "{_esc(url)}"']

    title = meta.get("title", "") or node.get("title", "")
    if title:
        parts.append(f':title "{_esc(title)}"')
    desc = meta.get("description", "") or node.get("description", "")
    if desc:
        parts.append(f':description "{_esc(desc)}"')
    icon = meta.get("icon", "") or node.get("icon", "")
    if icon:
        parts.append(f':icon "{_esc(icon)}"')
    author = meta.get("author", "") or node.get("author", "")
    if author:
        parts.append(f':author "{_esc(author)}"')
    publisher = meta.get("publisher", "") or node.get("publisher", "")
    if publisher:
        parts.append(f':publisher "{_esc(publisher)}"')
    thumbnail = meta.get("thumbnail", "") or node.get("thumbnail", "")
    if thumbnail:
        parts.append(f':thumbnail "{_esc(thumbnail)}"')
    caption = node.get("caption", "")
    if caption:
        parts.append(f":caption {html_to_sx(caption)}")

    return "(~kg-bookmark " + " ".join(parts) + ")"


@_converter("callout")
def _callout(node: dict) -> str:
    color = node.get("backgroundColor", "grey")
    emoji = node.get("calloutEmoji", "")
    inner = _convert_children(node.get("children", []))

    parts = [f':color "{_esc(color)}"']
    if emoji:
        parts.append(f':emoji "{_esc(emoji)}"')
    if inner:
        parts.append(f':content {inner}')
    return "(~kg-callout " + " ".join(parts) + ")"


@_converter("button")
def _button(node: dict) -> str:
    text = node.get("buttonText", "")
    url = node.get("buttonUrl", "")
    alignment = node.get("alignment", "center")
    return f'(~kg-button :url "{_esc(url)}" :text "{_esc(text)}" :alignment "{_esc(alignment)}")'


@_converter("toggle")
def _toggle(node: dict) -> str:
    heading = node.get("heading", "")
    inner = _convert_children(node.get("children", []))
    content_attr = f" :content {inner}" if inner else ""
    return f'(~kg-toggle :heading "{_esc(heading)}"{content_attr})'


@_converter("audio")
def _audio(node: dict) -> str:
    src = node.get("src", "")
    title = node.get("title", "")
    duration = node.get("duration", 0)
    thumbnail = node.get("thumbnailSrc", "")

    duration_min = int(duration) // 60
    duration_sec = int(duration) % 60
    duration_str = f"{duration_min}:{duration_sec:02d}"

    parts = [f':src "{_esc(src)}"']
    if title:
        parts.append(f':title "{_esc(title)}"')
    parts.append(f':duration "{duration_str}"')
    if thumbnail:
        parts.append(f':thumbnail "{_esc(thumbnail)}"')
    return "(~kg-audio " + " ".join(parts) + ")"


@_converter("video")
def _video(node: dict) -> str:
    src = node.get("src", "")
    caption = node.get("caption", "")
    width = node.get("cardWidth", "")
    thumbnail = node.get("thumbnailSrc", "") or node.get("customThumbnailSrc", "")
    loop = node.get("loop", False)

    parts = [f':src "{_esc(src)}"']
    if caption:
        parts.append(f":caption {html_to_sx(caption)}")
    if width:
        parts.append(f':width "{_esc(width)}"')
    if thumbnail:
        parts.append(f':thumbnail "{_esc(thumbnail)}"')
    if loop:
        parts.append(":loop true")
    return "(~kg-video " + " ".join(parts) + ")"


@_converter("file")
def _file(node: dict) -> str:
    src = node.get("src", "")
    filename = node.get("fileName", "")
    title = node.get("title", "") or filename
    file_size = node.get("fileSize", 0)
    caption = node.get("caption", "")

    # Format size
    size_str = ""
    if file_size:
        kb = file_size / 1024
        if kb < 1024:
            size_str = f"{kb:.0f} KB"
        else:
            size_str = f"{kb / 1024:.1f} MB"

    parts = [f':src "{_esc(src)}"']
    if filename:
        parts.append(f':filename "{_esc(filename)}"')
    if title:
        parts.append(f':title "{_esc(title)}"')
    if size_str:
        parts.append(f':filesize "{size_str}"')
    if caption:
        parts.append(f":caption {html_to_sx(caption)}")
    return "(~kg-file " + " ".join(parts) + ")"


@_converter("paywall")
def _paywall(_node: dict) -> str:
    return "(~kg-paywall)"


@_converter("markdown")
def _markdown(node: dict) -> str:
    md_text = node.get("markdown", "")
    rendered = mistune.html(md_text)
    inner = html_to_sx(rendered)
    return f"(~kg-md {inner})"