Eliminate raw HTML injection: convert ~kg-html/captions to native sx

Add shared/sx/html_to_sx.py (HTMLParser-based HTML→sx converter) and
update lexical_to_sx.py so HTML cards, markdown cards, and captions all
produce native sx expressions instead of opaque HTML strings.

- ~kg-html now wraps native sx children (editor can identify the block)
- New ~kg-md component for markdown card blocks
- Captions are sx expressions, not escaped HTML strings
- kg_cards.sx: replace (raw! caption) with direct caption rendering
- sx-editor.js: htmlToSx() via DOMParser, serializeInline for captions,
  _childrenSx for ~kg-html/~kg-md, new kg-md edit UI
- Migration script (blog/scripts/migrate_sx_html.py) to re-convert
  stored sx_content from lexical source

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-03-02 19:57:27 +00:00
parent 4668c30890
commit 8ceb9aee62
7 changed files with 595 additions and 25 deletions

View File

@@ -5,8 +5,9 @@ import sys
import os
import pytest
# The lexical_to_sx module is standalone (only depends on json).
# Import it directly to avoid pulling in the full blog app.
# Add project root so shared.sx.html_to_sx resolves, plus the ghost dir.
_project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
sys.path.insert(0, _project_root)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "bp", "blog", "ghost"))
from lexical_to_sx import lexical_to_sx
@@ -176,6 +177,13 @@ class TestCards:
assert ':width "wide"' in result
assert ':caption "Fig 1"' in result
def test_image_html_caption(self):
result = lexical_to_sx(_doc({
"type": "image", "src": "p.jpg", "alt": "",
"caption": 'Photo by <a href="https://x.com">Author</a>'
}))
assert ':caption (<> "Photo by " (a :href "https://x.com" "Author"))' in result
def test_bookmark(self):
result = lexical_to_sx(_doc({
"type": "bookmark", "url": "https://example.com",
@@ -214,7 +222,7 @@ class TestCards:
result = lexical_to_sx(_doc({
"type": "html", "html": "<div>custom</div>"
}))
assert "(~kg-html " in result
assert result == '(~kg-html (div "custom"))'
def test_embed(self):
result = lexical_to_sx(_doc({
@@ -224,6 +232,14 @@ class TestCards:
assert "(~kg-embed " in result
assert ':caption "Video"' in result
def test_markdown(self):
result = lexical_to_sx(_doc({
"type": "markdown", "markdown": "**bold** text"
}))
assert result.startswith("(~kg-md ")
assert "(p " in result
assert "(strong " in result
def test_video(self):
result = lexical_to_sx(_doc({
"type": "video", "src": "v.mp4", "cardWidth": "wide"