Add shared/sx/html_to_sx.py (HTMLParser-based HTML→sx converter) and update lexical_to_sx.py so HTML cards, markdown cards, and captions all produce native sx expressions instead of opaque HTML strings. - ~kg-html now wraps native sx children (editor can identify the block) - New ~kg-md component for markdown card blocks - Captions are sx expressions, not escaped HTML strings - kg_cards.sx: replace (raw! caption) with direct caption rendering - sx-editor.js: htmlToSx() via DOMParser, serializeInline for captions, _childrenSx for ~kg-html/~kg-md, new kg-md edit UI - Migration script (blog/scripts/migrate_sx_html.py) to re-convert stored sx_content from lexical source Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
446 lines
13 KiB
Python
446 lines
13 KiB
Python
"""
|
||
Lexical JSON → s-expression converter.
|
||
|
||
Mirrors lexical_renderer.py's registry/dispatch pattern but produces sx source
|
||
instead of HTML. Used for backfilling existing posts and on-the-fly conversion
|
||
when editing pre-migration posts in the SX editor.
|
||
|
||
Public API
|
||
----------
|
||
lexical_to_sx(doc) – Lexical JSON (dict or string) → sx source string
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
from typing import Callable
|
||
|
||
import mistune
|
||
|
||
from shared.sx.html_to_sx import html_to_sx
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Registry
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_CONVERTERS: dict[str, Callable[[dict], str]] = {}
|
||
|
||
|
||
def _converter(node_type: str):
|
||
"""Decorator — register a function as the converter for *node_type*."""
|
||
def decorator(fn: Callable[[dict], str]) -> Callable[[dict], str]:
|
||
_CONVERTERS[node_type] = fn
|
||
return fn
|
||
return decorator
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Public entry point
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def lexical_to_sx(doc: dict | str) -> str:
|
||
"""Convert a Lexical JSON document to an sx source string."""
|
||
if isinstance(doc, str):
|
||
doc = json.loads(doc)
|
||
root = doc.get("root", doc)
|
||
children = root.get("children", [])
|
||
parts = [_convert_node(c) for c in children]
|
||
parts = [p for p in parts if p]
|
||
if not parts:
|
||
return '(<> (p ""))'
|
||
if len(parts) == 1:
|
||
return parts[0]
|
||
return "(<>\n " + "\n ".join(parts) + ")"
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Core dispatch
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _convert_node(node: dict) -> str:
|
||
node_type = node.get("type", "")
|
||
converter = _CONVERTERS.get(node_type)
|
||
if converter:
|
||
return converter(node)
|
||
return ""
|
||
|
||
|
||
def _convert_children(children: list[dict]) -> str:
|
||
"""Convert children to inline sx content (for text nodes)."""
|
||
parts = [_convert_node(c) for c in children]
|
||
return " ".join(p for p in parts if p)
|
||
|
||
|
||
def _esc(s: str) -> str:
|
||
"""Escape a string for sx double-quoted literals."""
|
||
return s.replace("\\", "\\\\").replace('"', '\\"')
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Text format bitmask
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_FORMAT_BOLD = 1
|
||
_FORMAT_ITALIC = 2
|
||
_FORMAT_STRIKETHROUGH = 4
|
||
_FORMAT_UNDERLINE = 8
|
||
_FORMAT_CODE = 16
|
||
_FORMAT_SUBSCRIPT = 32
|
||
_FORMAT_SUPERSCRIPT = 64
|
||
|
||
_FORMAT_WRAPPERS: list[tuple[int, str]] = [
|
||
(_FORMAT_BOLD, "strong"),
|
||
(_FORMAT_ITALIC, "em"),
|
||
(_FORMAT_STRIKETHROUGH, "s"),
|
||
(_FORMAT_UNDERLINE, "u"),
|
||
(_FORMAT_CODE, "code"),
|
||
(_FORMAT_SUBSCRIPT, "sub"),
|
||
(_FORMAT_SUPERSCRIPT, "sup"),
|
||
]
|
||
|
||
|
||
def _wrap_format(text_sx: str, fmt: int) -> str:
|
||
for mask, tag in _FORMAT_WRAPPERS:
|
||
if fmt & mask:
|
||
text_sx = f"({tag} {text_sx})"
|
||
return text_sx
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Tier 1 — text nodes
|
||
# ---------------------------------------------------------------------------
|
||
|
||
@_converter("text")
|
||
def _text(node: dict) -> str:
|
||
text = node.get("text", "")
|
||
if not text:
|
||
return ""
|
||
sx = f'"{_esc(text)}"'
|
||
fmt = node.get("format", 0)
|
||
if isinstance(fmt, int) and fmt:
|
||
sx = _wrap_format(sx, fmt)
|
||
return sx
|
||
|
||
|
||
@_converter("linebreak")
|
||
def _linebreak(_node: dict) -> str:
|
||
return '"\\n"'
|
||
|
||
|
||
@_converter("tab")
|
||
def _tab(_node: dict) -> str:
|
||
return '"\\t"'
|
||
|
||
|
||
@_converter("paragraph")
|
||
def _paragraph(node: dict) -> str:
|
||
inner = _convert_children(node.get("children", []))
|
||
if not inner:
|
||
inner = '""'
|
||
return f"(p {inner})"
|
||
|
||
|
||
@_converter("extended-text")
|
||
def _extended_text(node: dict) -> str:
|
||
# extended-text can be block-level (with children) or inline (with text).
|
||
# When it has a "text" field, treat it as a plain text node.
|
||
if "text" in node:
|
||
return _text(node)
|
||
return _paragraph(node)
|
||
|
||
|
||
@_converter("heading")
|
||
def _heading(node: dict) -> str:
|
||
tag = node.get("tag", "h2")
|
||
inner = _convert_children(node.get("children", []))
|
||
if not inner:
|
||
inner = '""'
|
||
return f"({tag} {inner})"
|
||
|
||
|
||
@_converter("extended-heading")
|
||
def _extended_heading(node: dict) -> str:
|
||
if "text" in node:
|
||
return _text(node)
|
||
return _heading(node)
|
||
|
||
|
||
@_converter("quote")
|
||
def _quote(node: dict) -> str:
|
||
inner = _convert_children(node.get("children", []))
|
||
return f"(blockquote {inner})" if inner else '(blockquote "")'
|
||
|
||
|
||
@_converter("extended-quote")
|
||
def _extended_quote(node: dict) -> str:
|
||
if "text" in node:
|
||
return _text(node)
|
||
return _quote(node)
|
||
|
||
|
||
@_converter("link")
|
||
def _link(node: dict) -> str:
|
||
href = node.get("url", "")
|
||
inner = _convert_children(node.get("children", []))
|
||
if not inner:
|
||
inner = f'"{_esc(href)}"'
|
||
return f'(a :href "{_esc(href)}" {inner})'
|
||
|
||
|
||
@_converter("autolink")
|
||
def _autolink(node: dict) -> str:
|
||
return _link(node)
|
||
|
||
|
||
@_converter("at-link")
|
||
def _at_link(node: dict) -> str:
|
||
return _link(node)
|
||
|
||
|
||
@_converter("list")
|
||
def _list(node: dict) -> str:
|
||
tag = "ol" if node.get("listType") == "number" else "ul"
|
||
inner = _convert_children(node.get("children", []))
|
||
return f"({tag} {inner})" if inner else f"({tag})"
|
||
|
||
|
||
@_converter("listitem")
|
||
def _listitem(node: dict) -> str:
|
||
inner = _convert_children(node.get("children", []))
|
||
return f"(li {inner})" if inner else '(li "")'
|
||
|
||
|
||
@_converter("horizontalrule")
|
||
def _horizontalrule(_node: dict) -> str:
|
||
return "(hr)"
|
||
|
||
|
||
@_converter("code")
|
||
def _code(node: dict) -> str:
|
||
inner = _convert_children(node.get("children", []))
|
||
return f"(code {inner})" if inner else ""
|
||
|
||
|
||
@_converter("codeblock")
|
||
def _codeblock(node: dict) -> str:
|
||
lang = node.get("language", "")
|
||
code = node.get("code", "")
|
||
lang_attr = f' :class "language-{_esc(lang)}"' if lang else ""
|
||
return f'(pre (code{lang_attr} "{_esc(code)}"))'
|
||
|
||
|
||
@_converter("code-highlight")
|
||
def _code_highlight(node: dict) -> str:
|
||
text = node.get("text", "")
|
||
return f'"{_esc(text)}"' if text else ""
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Tier 2 — common cards
|
||
# ---------------------------------------------------------------------------
|
||
|
||
@_converter("image")
|
||
def _image(node: dict) -> str:
|
||
src = node.get("src", "")
|
||
alt = node.get("alt", "")
|
||
caption = node.get("caption", "")
|
||
width = node.get("cardWidth", "") or node.get("width", "")
|
||
href = node.get("href", "")
|
||
|
||
parts = [f':src "{_esc(src)}"']
|
||
if alt:
|
||
parts.append(f':alt "{_esc(alt)}"')
|
||
if caption:
|
||
parts.append(f":caption {html_to_sx(caption)}")
|
||
if width:
|
||
parts.append(f':width "{_esc(width)}"')
|
||
if href:
|
||
parts.append(f':href "{_esc(href)}"')
|
||
return "(~kg-image " + " ".join(parts) + ")"
|
||
|
||
|
||
@_converter("gallery")
|
||
def _gallery(node: dict) -> str:
|
||
images = node.get("images", [])
|
||
if not images:
|
||
return ""
|
||
|
||
# Group images into rows of 3 (matching lexical_renderer.py)
|
||
rows = []
|
||
for i in range(0, len(images), 3):
|
||
row_imgs = images[i:i + 3]
|
||
row_items = []
|
||
for img in row_imgs:
|
||
item_parts = [f'"src" "{_esc(img.get("src", ""))}"']
|
||
if img.get("alt"):
|
||
item_parts.append(f'"alt" "{_esc(img["alt"])}"')
|
||
if img.get("caption"):
|
||
item_parts.append(f'"caption" {html_to_sx(img["caption"])}')
|
||
row_items.append("(dict " + " ".join(item_parts) + ")")
|
||
rows.append("(list " + " ".join(row_items) + ")")
|
||
|
||
images_sx = "(list " + " ".join(rows) + ")"
|
||
caption = node.get("caption", "")
|
||
caption_attr = f" :caption {html_to_sx(caption)}" if caption else ""
|
||
return f"(~kg-gallery :images {images_sx}{caption_attr})"
|
||
|
||
|
||
@_converter("html")
|
||
def _html_card(node: dict) -> str:
|
||
raw = node.get("html", "")
|
||
inner = html_to_sx(raw)
|
||
return f"(~kg-html {inner})"
|
||
|
||
|
||
@_converter("embed")
|
||
def _embed(node: dict) -> str:
|
||
embed_html = node.get("html", "")
|
||
caption = node.get("caption", "")
|
||
parts = [f':html "{_esc(embed_html)}"']
|
||
if caption:
|
||
parts.append(f":caption {html_to_sx(caption)}")
|
||
return "(~kg-embed " + " ".join(parts) + ")"
|
||
|
||
|
||
@_converter("bookmark")
|
||
def _bookmark(node: dict) -> str:
|
||
url = node.get("url", "")
|
||
meta = node.get("metadata", {})
|
||
parts = [f':url "{_esc(url)}"']
|
||
|
||
title = meta.get("title", "") or node.get("title", "")
|
||
if title:
|
||
parts.append(f':title "{_esc(title)}"')
|
||
desc = meta.get("description", "") or node.get("description", "")
|
||
if desc:
|
||
parts.append(f':description "{_esc(desc)}"')
|
||
icon = meta.get("icon", "") or node.get("icon", "")
|
||
if icon:
|
||
parts.append(f':icon "{_esc(icon)}"')
|
||
author = meta.get("author", "") or node.get("author", "")
|
||
if author:
|
||
parts.append(f':author "{_esc(author)}"')
|
||
publisher = meta.get("publisher", "") or node.get("publisher", "")
|
||
if publisher:
|
||
parts.append(f':publisher "{_esc(publisher)}"')
|
||
thumbnail = meta.get("thumbnail", "") or node.get("thumbnail", "")
|
||
if thumbnail:
|
||
parts.append(f':thumbnail "{_esc(thumbnail)}"')
|
||
caption = node.get("caption", "")
|
||
if caption:
|
||
parts.append(f":caption {html_to_sx(caption)}")
|
||
|
||
return "(~kg-bookmark " + " ".join(parts) + ")"
|
||
|
||
|
||
@_converter("callout")
|
||
def _callout(node: dict) -> str:
|
||
color = node.get("backgroundColor", "grey")
|
||
emoji = node.get("calloutEmoji", "")
|
||
inner = _convert_children(node.get("children", []))
|
||
|
||
parts = [f':color "{_esc(color)}"']
|
||
if emoji:
|
||
parts.append(f':emoji "{_esc(emoji)}"')
|
||
if inner:
|
||
parts.append(f':content {inner}')
|
||
return "(~kg-callout " + " ".join(parts) + ")"
|
||
|
||
|
||
@_converter("button")
|
||
def _button(node: dict) -> str:
|
||
text = node.get("buttonText", "")
|
||
url = node.get("buttonUrl", "")
|
||
alignment = node.get("alignment", "center")
|
||
return f'(~kg-button :url "{_esc(url)}" :text "{_esc(text)}" :alignment "{_esc(alignment)}")'
|
||
|
||
|
||
@_converter("toggle")
|
||
def _toggle(node: dict) -> str:
|
||
heading = node.get("heading", "")
|
||
inner = _convert_children(node.get("children", []))
|
||
content_attr = f" :content {inner}" if inner else ""
|
||
return f'(~kg-toggle :heading "{_esc(heading)}"{content_attr})'
|
||
|
||
|
||
@_converter("audio")
|
||
def _audio(node: dict) -> str:
|
||
src = node.get("src", "")
|
||
title = node.get("title", "")
|
||
duration = node.get("duration", 0)
|
||
thumbnail = node.get("thumbnailSrc", "")
|
||
|
||
duration_min = int(duration) // 60
|
||
duration_sec = int(duration) % 60
|
||
duration_str = f"{duration_min}:{duration_sec:02d}"
|
||
|
||
parts = [f':src "{_esc(src)}"']
|
||
if title:
|
||
parts.append(f':title "{_esc(title)}"')
|
||
parts.append(f':duration "{duration_str}"')
|
||
if thumbnail:
|
||
parts.append(f':thumbnail "{_esc(thumbnail)}"')
|
||
return "(~kg-audio " + " ".join(parts) + ")"
|
||
|
||
|
||
@_converter("video")
|
||
def _video(node: dict) -> str:
|
||
src = node.get("src", "")
|
||
caption = node.get("caption", "")
|
||
width = node.get("cardWidth", "")
|
||
thumbnail = node.get("thumbnailSrc", "") or node.get("customThumbnailSrc", "")
|
||
loop = node.get("loop", False)
|
||
|
||
parts = [f':src "{_esc(src)}"']
|
||
if caption:
|
||
parts.append(f":caption {html_to_sx(caption)}")
|
||
if width:
|
||
parts.append(f':width "{_esc(width)}"')
|
||
if thumbnail:
|
||
parts.append(f':thumbnail "{_esc(thumbnail)}"')
|
||
if loop:
|
||
parts.append(":loop true")
|
||
return "(~kg-video " + " ".join(parts) + ")"
|
||
|
||
|
||
@_converter("file")
|
||
def _file(node: dict) -> str:
|
||
src = node.get("src", "")
|
||
filename = node.get("fileName", "")
|
||
title = node.get("title", "") or filename
|
||
file_size = node.get("fileSize", 0)
|
||
caption = node.get("caption", "")
|
||
|
||
# Format size
|
||
size_str = ""
|
||
if file_size:
|
||
kb = file_size / 1024
|
||
if kb < 1024:
|
||
size_str = f"{kb:.0f} KB"
|
||
else:
|
||
size_str = f"{kb / 1024:.1f} MB"
|
||
|
||
parts = [f':src "{_esc(src)}"']
|
||
if filename:
|
||
parts.append(f':filename "{_esc(filename)}"')
|
||
if title:
|
||
parts.append(f':title "{_esc(title)}"')
|
||
if size_str:
|
||
parts.append(f':filesize "{size_str}"')
|
||
if caption:
|
||
parts.append(f":caption {html_to_sx(caption)}")
|
||
return "(~kg-file " + " ".join(parts) + ")"
|
||
|
||
|
||
@_converter("paywall")
|
||
def _paywall(_node: dict) -> str:
|
||
return "(~kg-paywall)"
|
||
|
||
|
||
@_converter("markdown")
|
||
def _markdown(node: dict) -> str:
|
||
md_text = node.get("markdown", "")
|
||
rendered = mistune.html(md_text)
|
||
inner = html_to_sx(rendered)
|
||
return f"(~kg-md {inner})"
|