Add shared/sx/html_to_sx.py (HTMLParser-based HTML→sx converter) and update lexical_to_sx.py so HTML cards, markdown cards, and captions all produce native sx expressions instead of opaque HTML strings. - ~kg-html now wraps native sx children (editor can identify the block) - New ~kg-md component for markdown card blocks - Captions are sx expressions, not escaped HTML strings - kg_cards.sx: replace (raw! caption) with direct caption rendering - sx-editor.js: htmlToSx() via DOMParser, serializeInline for captions, _childrenSx for ~kg-html/~kg-md, new kg-md edit UI - Migration script (blog/scripts/migrate_sx_html.py) to re-convert stored sx_content from lexical source Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
160 lines
4.8 KiB
Python
160 lines
4.8 KiB
Python
"""Unit tests for html_to_sx converter."""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
from shared.sx.html_to_sx import html_to_sx
|
|
|
|
|
|
class TestBasicElements:
|
|
def test_simple_paragraph(self):
|
|
assert html_to_sx("<p>Hello</p>") == '(p "Hello")'
|
|
|
|
def test_with_class(self):
|
|
assert html_to_sx('<p class="intro">Hi</p>') == '(p :class "intro" "Hi")'
|
|
|
|
def test_multiple_attrs(self):
|
|
result = html_to_sx('<a href="u" class="link">click</a>')
|
|
assert result == '(a :href "u" :class "link" "click")'
|
|
|
|
def test_nested_inline(self):
|
|
result = html_to_sx("<p>Hello <em>world</em></p>")
|
|
assert result == '(p "Hello " (em "world"))'
|
|
|
|
def test_deeply_nested(self):
|
|
result = html_to_sx("<div><p><strong>bold</strong></p></div>")
|
|
assert result == '(div (p (strong "bold")))'
|
|
|
|
|
|
class TestVoidElements:
|
|
def test_br(self):
|
|
assert html_to_sx("<br>") == "(br)"
|
|
|
|
def test_img(self):
|
|
result = html_to_sx('<img src="a.jpg" alt="pic">')
|
|
assert result == '(img :src "a.jpg" :alt "pic")'
|
|
|
|
def test_hr(self):
|
|
assert html_to_sx("<hr>") == "(hr)"
|
|
|
|
def test_input(self):
|
|
result = html_to_sx('<input type="text" value="hi">')
|
|
assert result == '(input :type "text" :value "hi")'
|
|
|
|
|
|
class TestBooleanAttrs:
|
|
def test_checked(self):
|
|
result = html_to_sx('<input type="checkbox" checked>')
|
|
assert result == '(input :type "checkbox" :checked true)'
|
|
|
|
def test_disabled(self):
|
|
result = html_to_sx('<button disabled>No</button>')
|
|
assert result == '(button :disabled true "No")'
|
|
|
|
def test_controls(self):
|
|
result = html_to_sx('<video controls></video>')
|
|
assert result == '(video :controls true)'
|
|
|
|
|
|
class TestTopLevel:
|
|
def test_multiple_top_level(self):
|
|
result = html_to_sx("<p>A</p><p>B</p>")
|
|
assert result == '(<> (p "A") (p "B"))'
|
|
|
|
def test_single_top_level(self):
|
|
result = html_to_sx("<p>Only</p>")
|
|
assert result == '(p "Only")'
|
|
|
|
def test_text_only(self):
|
|
result = html_to_sx("just text")
|
|
assert result == '"just text"'
|
|
|
|
def test_empty(self):
|
|
assert html_to_sx("") == '""'
|
|
|
|
def test_whitespace_only(self):
|
|
assert html_to_sx(" \n ") == '""'
|
|
|
|
|
|
class TestWhitespace:
|
|
def test_root_whitespace_stripped(self):
|
|
result = html_to_sx("\n<p>A</p>\n<p>B</p>\n")
|
|
assert result == '(<> (p "A") (p "B"))'
|
|
|
|
|
|
class TestEntities:
|
|
def test_amp(self):
|
|
result = html_to_sx("<p>A & B</p>")
|
|
assert result == '(p "A & B")'
|
|
|
|
def test_lt_gt(self):
|
|
result = html_to_sx("<p><tag></p>")
|
|
assert result == '(p "<tag>")'
|
|
|
|
def test_nbsp(self):
|
|
result = html_to_sx("<p>hello world</p>")
|
|
assert result == '(p "hello\u00a0world")'
|
|
|
|
|
|
class TestEscaping:
|
|
def test_quotes_in_text(self):
|
|
result = html_to_sx('<p>He said "hello"</p>')
|
|
assert result == '(p "He said \\"hello\\"")'
|
|
|
|
def test_backslash_in_text(self):
|
|
result = html_to_sx("<p>a\\b</p>")
|
|
assert result == '(p "a\\\\b")'
|
|
|
|
def test_quotes_in_attr(self):
|
|
# Attribute values with quotes get escaped
|
|
result = html_to_sx('<div title="a"b">x</div>')
|
|
assert result == '(div :title "a\\"b" "x")'
|
|
|
|
|
|
class TestComments:
|
|
def test_comment_stripped(self):
|
|
result = html_to_sx("<!-- comment --><p>hi</p>")
|
|
assert result == '(p "hi")'
|
|
|
|
|
|
class TestMixedContent:
|
|
def test_caption_with_link(self):
|
|
result = html_to_sx('Photo by <a href="https://x.com">Author</a>')
|
|
assert result == '(<> "Photo by " (a :href "https://x.com" "Author"))'
|
|
|
|
def test_caption_plain_text(self):
|
|
result = html_to_sx("Figure 1")
|
|
assert result == '"Figure 1"'
|
|
|
|
|
|
class TestRoundtrip:
|
|
"""html_to_sx → parse → render should produce equivalent HTML."""
|
|
|
|
def _roundtrip(self, html_in: str) -> str:
|
|
from shared.sx.parser import parse
|
|
from shared.sx.html import render
|
|
sx_src = html_to_sx(html_in)
|
|
expr = parse(sx_src)
|
|
return render(expr)
|
|
|
|
def test_simple(self):
|
|
assert self._roundtrip("<p>Hello</p>") == "<p>Hello</p>"
|
|
|
|
def test_nested(self):
|
|
assert self._roundtrip("<p>A <em>B</em> C</p>") == "<p>A <em>B</em> C</p>"
|
|
|
|
def test_void(self):
|
|
assert self._roundtrip('<img src="a.jpg" alt="">') == '<img src="a.jpg" alt="">'
|
|
|
|
def test_link(self):
|
|
html = '<a href="https://example.com">click</a>'
|
|
assert self._roundtrip(html) == html
|
|
|
|
def test_entities_roundtrip(self):
|
|
# Entities get decoded by parser, then re-escaped by render
|
|
assert self._roundtrip("<p>A & B</p>") == "<p>A & B</p>"
|
|
|
|
def test_multi_block(self):
|
|
html = "<p>A</p><p>B</p>"
|
|
assert self._roundtrip(html) == html
|