""" Lexical JSON → s-expression converter. Mirrors lexical_renderer.py's registry/dispatch pattern but produces sx source instead of HTML. Used for backfilling existing posts and on-the-fly conversion when editing pre-migration posts in the SX editor. Public API ---------- lexical_to_sx(doc) – Lexical JSON (dict or string) → sx source string """ from __future__ import annotations import json from typing import Callable import mistune from shared.sx.html_to_sx import html_to_sx # --------------------------------------------------------------------------- # Registry # --------------------------------------------------------------------------- _CONVERTERS: dict[str, Callable[[dict], str]] = {} def _converter(node_type: str): """Decorator — register a function as the converter for *node_type*.""" def decorator(fn: Callable[[dict], str]) -> Callable[[dict], str]: _CONVERTERS[node_type] = fn return fn return decorator # --------------------------------------------------------------------------- # Public entry point # --------------------------------------------------------------------------- def lexical_to_sx(doc: dict | str) -> str: """Convert a Lexical JSON document to an sx source string.""" if isinstance(doc, str): doc = json.loads(doc) root = doc.get("root", doc) children = root.get("children", []) parts = [_convert_node(c) for c in children] parts = [p for p in parts if p] if not parts: return '(<> (p ""))' if len(parts) == 1: return parts[0] return "(<>\n " + "\n ".join(parts) + ")" # --------------------------------------------------------------------------- # Core dispatch # --------------------------------------------------------------------------- def _convert_node(node: dict) -> str: node_type = node.get("type", "") converter = _CONVERTERS.get(node_type) if converter: return converter(node) return "" def _convert_children(children: list[dict]) -> str: """Convert children to inline sx content (for text nodes).""" parts = [_convert_node(c) for c in children] return " ".join(p for p in parts if p) def _esc(s: str) -> str: """Escape a string for sx double-quoted literals.""" return s.replace("\\", "\\\\").replace('"', '\\"') # --------------------------------------------------------------------------- # Text format bitmask # --------------------------------------------------------------------------- _FORMAT_BOLD = 1 _FORMAT_ITALIC = 2 _FORMAT_STRIKETHROUGH = 4 _FORMAT_UNDERLINE = 8 _FORMAT_CODE = 16 _FORMAT_SUBSCRIPT = 32 _FORMAT_SUPERSCRIPT = 64 _FORMAT_WRAPPERS: list[tuple[int, str]] = [ (_FORMAT_BOLD, "strong"), (_FORMAT_ITALIC, "em"), (_FORMAT_STRIKETHROUGH, "s"), (_FORMAT_UNDERLINE, "u"), (_FORMAT_CODE, "code"), (_FORMAT_SUBSCRIPT, "sub"), (_FORMAT_SUPERSCRIPT, "sup"), ] def _wrap_format(text_sx: str, fmt: int) -> str: for mask, tag in _FORMAT_WRAPPERS: if fmt & mask: text_sx = f"({tag} {text_sx})" return text_sx # --------------------------------------------------------------------------- # Tier 1 — text nodes # --------------------------------------------------------------------------- @_converter("text") def _text(node: dict) -> str: text = node.get("text", "") if not text: return "" sx = f'"{_esc(text)}"' fmt = node.get("format", 0) if isinstance(fmt, int) and fmt: sx = _wrap_format(sx, fmt) return sx @_converter("linebreak") def _linebreak(_node: dict) -> str: return '"\\n"' @_converter("tab") def _tab(_node: dict) -> str: return '"\\t"' @_converter("paragraph") def _paragraph(node: dict) -> str: inner = _convert_children(node.get("children", [])) if not inner: inner = '""' return f"(p {inner})" @_converter("extended-text") def _extended_text(node: dict) -> str: # extended-text can be block-level (with children) or inline (with text). # When it has a "text" field, treat it as a plain text node. if "text" in node: return _text(node) return _paragraph(node) @_converter("heading") def _heading(node: dict) -> str: tag = node.get("tag", "h2") inner = _convert_children(node.get("children", [])) if not inner: inner = '""' return f"({tag} {inner})" @_converter("extended-heading") def _extended_heading(node: dict) -> str: if "text" in node: return _text(node) return _heading(node) @_converter("quote") def _quote(node: dict) -> str: inner = _convert_children(node.get("children", [])) return f"(blockquote {inner})" if inner else '(blockquote "")' @_converter("extended-quote") def _extended_quote(node: dict) -> str: if "text" in node: return _text(node) return _quote(node) @_converter("link") def _link(node: dict) -> str: href = node.get("url", "") inner = _convert_children(node.get("children", [])) if not inner: inner = f'"{_esc(href)}"' return f'(a :href "{_esc(href)}" {inner})' @_converter("autolink") def _autolink(node: dict) -> str: return _link(node) @_converter("at-link") def _at_link(node: dict) -> str: return _link(node) @_converter("list") def _list(node: dict) -> str: tag = "ol" if node.get("listType") == "number" else "ul" inner = _convert_children(node.get("children", [])) return f"({tag} {inner})" if inner else f"({tag})" @_converter("listitem") def _listitem(node: dict) -> str: inner = _convert_children(node.get("children", [])) return f"(li {inner})" if inner else '(li "")' @_converter("horizontalrule") def _horizontalrule(_node: dict) -> str: return "(hr)" @_converter("code") def _code(node: dict) -> str: inner = _convert_children(node.get("children", [])) return f"(code {inner})" if inner else "" @_converter("codeblock") def _codeblock(node: dict) -> str: lang = node.get("language", "") code = node.get("code", "") lang_attr = f' :class "language-{_esc(lang)}"' if lang else "" return f'(pre (code{lang_attr} "{_esc(code)}"))' @_converter("code-highlight") def _code_highlight(node: dict) -> str: text = node.get("text", "") return f'"{_esc(text)}"' if text else "" # --------------------------------------------------------------------------- # Tier 2 — common cards # --------------------------------------------------------------------------- @_converter("image") def _image(node: dict) -> str: src = node.get("src", "") alt = node.get("alt", "") caption = node.get("caption", "") width = node.get("cardWidth", "") or node.get("width", "") href = node.get("href", "") parts = [f':src "{_esc(src)}"'] if alt: parts.append(f':alt "{_esc(alt)}"') if caption: parts.append(f":caption {html_to_sx(caption)}") if width: parts.append(f':width "{_esc(width)}"') if href: parts.append(f':href "{_esc(href)}"') return "(~kg-image " + " ".join(parts) + ")" @_converter("gallery") def _gallery(node: dict) -> str: images = node.get("images", []) if not images: return "" # Group images into rows of 3 (matching lexical_renderer.py) rows = [] for i in range(0, len(images), 3): row_imgs = images[i:i + 3] row_items = [] for img in row_imgs: item_parts = [f'"src" "{_esc(img.get("src", ""))}"'] if img.get("alt"): item_parts.append(f'"alt" "{_esc(img["alt"])}"') if img.get("caption"): item_parts.append(f'"caption" {html_to_sx(img["caption"])}') row_items.append("(dict " + " ".join(item_parts) + ")") rows.append("(list " + " ".join(row_items) + ")") images_sx = "(list " + " ".join(rows) + ")" caption = node.get("caption", "") caption_attr = f" :caption {html_to_sx(caption)}" if caption else "" return f"(~kg-gallery :images {images_sx}{caption_attr})" @_converter("html") def _html_card(node: dict) -> str: raw = node.get("html", "") inner = html_to_sx(raw) return f"(~kg-html {inner})" @_converter("embed") def _embed(node: dict) -> str: embed_html = node.get("html", "") caption = node.get("caption", "") parts = [f':html "{_esc(embed_html)}"'] if caption: parts.append(f":caption {html_to_sx(caption)}") return "(~kg-embed " + " ".join(parts) + ")" @_converter("bookmark") def _bookmark(node: dict) -> str: url = node.get("url", "") meta = node.get("metadata", {}) parts = [f':url "{_esc(url)}"'] title = meta.get("title", "") or node.get("title", "") if title: parts.append(f':title "{_esc(title)}"') desc = meta.get("description", "") or node.get("description", "") if desc: parts.append(f':description "{_esc(desc)}"') icon = meta.get("icon", "") or node.get("icon", "") if icon: parts.append(f':icon "{_esc(icon)}"') author = meta.get("author", "") or node.get("author", "") if author: parts.append(f':author "{_esc(author)}"') publisher = meta.get("publisher", "") or node.get("publisher", "") if publisher: parts.append(f':publisher "{_esc(publisher)}"') thumbnail = meta.get("thumbnail", "") or node.get("thumbnail", "") if thumbnail: parts.append(f':thumbnail "{_esc(thumbnail)}"') caption = node.get("caption", "") if caption: parts.append(f":caption {html_to_sx(caption)}") return "(~kg-bookmark " + " ".join(parts) + ")" @_converter("callout") def _callout(node: dict) -> str: color = node.get("backgroundColor", "grey") emoji = node.get("calloutEmoji", "") inner = _convert_children(node.get("children", [])) parts = [f':color "{_esc(color)}"'] if emoji: parts.append(f':emoji "{_esc(emoji)}"') if inner: parts.append(f':content {inner}') return "(~kg-callout " + " ".join(parts) + ")" @_converter("button") def _button(node: dict) -> str: text = node.get("buttonText", "") url = node.get("buttonUrl", "") alignment = node.get("alignment", "center") return f'(~kg-button :url "{_esc(url)}" :text "{_esc(text)}" :alignment "{_esc(alignment)}")' @_converter("toggle") def _toggle(node: dict) -> str: heading = node.get("heading", "") inner = _convert_children(node.get("children", [])) content_attr = f" :content {inner}" if inner else "" return f'(~kg-toggle :heading "{_esc(heading)}"{content_attr})' @_converter("audio") def _audio(node: dict) -> str: src = node.get("src", "") title = node.get("title", "") duration = node.get("duration", 0) thumbnail = node.get("thumbnailSrc", "") duration_min = int(duration) // 60 duration_sec = int(duration) % 60 duration_str = f"{duration_min}:{duration_sec:02d}" parts = [f':src "{_esc(src)}"'] if title: parts.append(f':title "{_esc(title)}"') parts.append(f':duration "{duration_str}"') if thumbnail: parts.append(f':thumbnail "{_esc(thumbnail)}"') return "(~kg-audio " + " ".join(parts) + ")" @_converter("video") def _video(node: dict) -> str: src = node.get("src", "") caption = node.get("caption", "") width = node.get("cardWidth", "") thumbnail = node.get("thumbnailSrc", "") or node.get("customThumbnailSrc", "") loop = node.get("loop", False) parts = [f':src "{_esc(src)}"'] if caption: parts.append(f":caption {html_to_sx(caption)}") if width: parts.append(f':width "{_esc(width)}"') if thumbnail: parts.append(f':thumbnail "{_esc(thumbnail)}"') if loop: parts.append(":loop true") return "(~kg-video " + " ".join(parts) + ")" @_converter("file") def _file(node: dict) -> str: src = node.get("src", "") filename = node.get("fileName", "") title = node.get("title", "") or filename file_size = node.get("fileSize", 0) caption = node.get("caption", "") # Format size size_str = "" if file_size: kb = file_size / 1024 if kb < 1024: size_str = f"{kb:.0f} KB" else: size_str = f"{kb / 1024:.1f} MB" parts = [f':src "{_esc(src)}"'] if filename: parts.append(f':filename "{_esc(filename)}"') if title: parts.append(f':title "{_esc(title)}"') if size_str: parts.append(f':filesize "{size_str}"') if caption: parts.append(f":caption {html_to_sx(caption)}") return "(~kg-file " + " ".join(parts) + ")" @_converter("paywall") def _paywall(_node: dict) -> str: return "(~kg-paywall)" @_converter("markdown") def _markdown(node: dict) -> str: md_text = node.get("markdown", "") rendered = mistune.html(md_text) inner = html_to_sx(rendered) return f"(~kg-md {inner})"