Files
rose-ash/shared/sx/tests/test_html_to_sx.py
giles 8ceb9aee62 Eliminate raw HTML injection: convert ~kg-html/captions to native sx
Add shared/sx/html_to_sx.py (HTMLParser-based HTML→sx converter) and
update lexical_to_sx.py so HTML cards, markdown cards, and captions all
produce native sx expressions instead of opaque HTML strings.

- ~kg-html now wraps native sx children (editor can identify the block)
- New ~kg-md component for markdown card blocks
- Captions are sx expressions, not escaped HTML strings
- kg_cards.sx: replace (raw! caption) with direct caption rendering
- sx-editor.js: htmlToSx() via DOMParser, serializeInline for captions,
  _childrenSx for ~kg-html/~kg-md, new kg-md edit UI
- Migration script (blog/scripts/migrate_sx_html.py) to re-convert
  stored sx_content from lexical source

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-03-02 19:57:27 +00:00

160 lines
4.8 KiB
Python

"""Unit tests for html_to_sx converter."""
from __future__ import annotations
import pytest
from shared.sx.html_to_sx import html_to_sx
class TestBasicElements:
def test_simple_paragraph(self):
assert html_to_sx("<p>Hello</p>") == '(p "Hello")'
def test_with_class(self):
assert html_to_sx('<p class="intro">Hi</p>') == '(p :class "intro" "Hi")'
def test_multiple_attrs(self):
result = html_to_sx('<a href="u" class="link">click</a>')
assert result == '(a :href "u" :class "link" "click")'
def test_nested_inline(self):
result = html_to_sx("<p>Hello <em>world</em></p>")
assert result == '(p "Hello " (em "world"))'
def test_deeply_nested(self):
result = html_to_sx("<div><p><strong>bold</strong></p></div>")
assert result == '(div (p (strong "bold")))'
class TestVoidElements:
def test_br(self):
assert html_to_sx("<br>") == "(br)"
def test_img(self):
result = html_to_sx('<img src="a.jpg" alt="pic">')
assert result == '(img :src "a.jpg" :alt "pic")'
def test_hr(self):
assert html_to_sx("<hr>") == "(hr)"
def test_input(self):
result = html_to_sx('<input type="text" value="hi">')
assert result == '(input :type "text" :value "hi")'
class TestBooleanAttrs:
def test_checked(self):
result = html_to_sx('<input type="checkbox" checked>')
assert result == '(input :type "checkbox" :checked true)'
def test_disabled(self):
result = html_to_sx('<button disabled>No</button>')
assert result == '(button :disabled true "No")'
def test_controls(self):
result = html_to_sx('<video controls></video>')
assert result == '(video :controls true)'
class TestTopLevel:
def test_multiple_top_level(self):
result = html_to_sx("<p>A</p><p>B</p>")
assert result == '(<> (p "A") (p "B"))'
def test_single_top_level(self):
result = html_to_sx("<p>Only</p>")
assert result == '(p "Only")'
def test_text_only(self):
result = html_to_sx("just text")
assert result == '"just text"'
def test_empty(self):
assert html_to_sx("") == '""'
def test_whitespace_only(self):
assert html_to_sx(" \n ") == '""'
class TestWhitespace:
def test_root_whitespace_stripped(self):
result = html_to_sx("\n<p>A</p>\n<p>B</p>\n")
assert result == '(<> (p "A") (p "B"))'
class TestEntities:
def test_amp(self):
result = html_to_sx("<p>A &amp; B</p>")
assert result == '(p "A & B")'
def test_lt_gt(self):
result = html_to_sx("<p>&lt;tag&gt;</p>")
assert result == '(p "<tag>")'
def test_nbsp(self):
result = html_to_sx("<p>hello&nbsp;world</p>")
assert result == '(p "hello\u00a0world")'
class TestEscaping:
def test_quotes_in_text(self):
result = html_to_sx('<p>He said "hello"</p>')
assert result == '(p "He said \\"hello\\"")'
def test_backslash_in_text(self):
result = html_to_sx("<p>a\\b</p>")
assert result == '(p "a\\\\b")'
def test_quotes_in_attr(self):
# Attribute values with quotes get escaped
result = html_to_sx('<div title="a&quot;b">x</div>')
assert result == '(div :title "a\\"b" "x")'
class TestComments:
def test_comment_stripped(self):
result = html_to_sx("<!-- comment --><p>hi</p>")
assert result == '(p "hi")'
class TestMixedContent:
def test_caption_with_link(self):
result = html_to_sx('Photo by <a href="https://x.com">Author</a>')
assert result == '(<> "Photo by " (a :href "https://x.com" "Author"))'
def test_caption_plain_text(self):
result = html_to_sx("Figure 1")
assert result == '"Figure 1"'
class TestRoundtrip:
"""html_to_sx → parse → render should produce equivalent HTML."""
def _roundtrip(self, html_in: str) -> str:
from shared.sx.parser import parse
from shared.sx.html import render
sx_src = html_to_sx(html_in)
expr = parse(sx_src)
return render(expr)
def test_simple(self):
assert self._roundtrip("<p>Hello</p>") == "<p>Hello</p>"
def test_nested(self):
assert self._roundtrip("<p>A <em>B</em> C</p>") == "<p>A <em>B</em> C</p>"
def test_void(self):
assert self._roundtrip('<img src="a.jpg" alt="">') == '<img src="a.jpg" alt="">'
def test_link(self):
html = '<a href="https://example.com">click</a>'
assert self._roundtrip(html) == html
def test_entities_roundtrip(self):
# Entities get decoded by parser, then re-escaped by render
assert self._roundtrip("<p>A &amp; B</p>") == "<p>A &amp; B</p>"
def test_multi_block(self):
html = "<p>A</p><p>B</p>"
assert self._roundtrip(html) == html