Files
rose-ash/lib/host/tests/htmlsx.sx
giles 7e2275b90c host: SX-native HTML→SX converter (the radar migrator) + first-class HTML import
lib/host/htmlsx.sx — a pure-SX HTML → SX converter (char-level tokenizer + stack parser):
host/html->sx turns a post's HTML into an (article …) tree that host/blog--decompose! consumes
— img / p / figure+figcaption / iframe / headings / blockquote / lists, inline strong/em/a kept
nested (decompose flattens to text), entities decoded to UTF-8, comments+doctype skipped. This
replaces the one-off external Python converter used for the nt-live-encore import.

import-post! now accepts a raw "html" field (converted via html->sx, serialized to sx_content,
decomposed) alongside "sx_content" — so importing real Ghost HTML is first-class. Wired
htmlsx.sx into conformance.sh + serve.sh module lists (loads in conformance AND live).

New htmlsx suite 8/8 (text/entities/void/nested/figure/iframe/comments + an html→sx→decompose→
typed-cards round-trip); blog 197/197 (+ import-from-html test).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-07-01 15:32:06 +00:00

64 lines
3.4 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
;; lib/host/tests/htmlsx.sx — the pure-SX HTML → SX converter (host/html->sx). Covers text,
;; entities, void/nested tags, attributes, figure/iframe, and an end-to-end import round-trip.
(define host-ht-pass 0)
(define host-ht-fail 0)
(define host-ht-fails (list))
(define host-ht-test
(fn (name actual expected)
(if (= actual expected)
(set! host-ht-pass (+ host-ht-pass 1))
(begin
(set! host-ht-fail (+ host-ht-fail 1))
(append! host-ht-fails {:name name :actual actual :expected expected})))))
;; a paragraph with inline formatting — kept nested (decompose flattens to text later).
(host-ht-test "a <p> with inline <strong> parses to (p \"…\" (strong \"…\") \"…\")"
(str (host/html->sx "<p>Hello <strong>world</strong> now</p>"))
"(article (p \"Hello \" (strong \"world\") \" now\"))")
;; HTML entities decode to UTF-8 (not \\uXXXX).
(host-ht-test "entities decode (&amp; &pound; &#x2019;)"
(str (host/html->sx "<p>Tom &amp; Jerry cost &pound;5 &#x2019;n up</p>"))
"(article (p \"Tom & Jerry cost £5 n up\"))")
;; a void <img> keeps its attributes as keyword attrs.
(host-ht-test "a void <img> keeps :src/:alt attrs"
(str (host/html->sx "<img src=\"a.jpg\" alt=\"a photo\">"))
"(article (img :alt \"a photo\" :src \"a.jpg\"))")
;; a <figure> with an <img> + <figcaption> nests correctly.
(host-ht-test "a <figure> nests an <img> and a <figcaption>"
(str (host/html->sx "<figure><img src=\"y.jpg\" alt=\"y\"><figcaption>a caption</figcaption></figure>"))
"(article (figure (img :alt \"y\" :src \"y.jpg\") (figcaption \"a caption\")))")
;; an <iframe> is a void-ish embed (self-contained token).
(host-ht-test "an <iframe> becomes a leaf with its :src"
(str (host/html->sx "<iframe src=\"https://youtube.com/embed/x\"></iframe>"))
"(article (iframe :src \"https://youtube.com/embed/x\"))")
;; comments + doctype are skipped; whitespace-only text is dropped.
(host-ht-test "comments/doctype are skipped, blank text dropped"
(str (host/html->sx "<!-- hi --> <p>x</p>\n <p>y</p>"))
"(article (p \"x\") (p \"y\"))")
;; headings map through (decompose then turns h2 into card-heading).
(host-ht-test "headings + paragraphs come through in order"
(str (host/html->sx "<h2>Title</h2><p>body</p>"))
"(article (h2 \"Title\") (p \"body\"))")
;; ── END TO END: HTML → SX → decompose → typed card objects ──────────
(host/blog-use-store! (persist/open))
(host/blog-seed-types!)
(host-ht-test "html->sx feeds decompose: a real snippet becomes typed cards"
(begin
(host/blog-put! "htdoc" "HT" "(p)" "published")
(host/blog--decompose! "htdoc"
(host/html->sx "<h2>Heading</h2><p>Some <strong>bold</strong> text.</p><figure><img src=\"p.jpg\" alt=\"a\"><figcaption>cap</figcaption></figure><iframe src=\"https://youtube.com/embed/z\"></iframe>"))
(list (host/blog-is-a? "htdoc__body__b0" "card-heading")
(host/blog-is-a? "htdoc__body__b1" "card-text")
(get (host/blog-field-values-of "htdoc__body__b1") "text") ;; strong flattened to text
(host/blog-is-a? "htdoc__body__b2" "card-image")
(get (host/blog-field-values-of "htdoc__body__b2") "caption")
(host/blog-is-a? "htdoc__body__b3" "card-embed")))
(list true true "Some bold text." true "cap" true))
(define host-ht-tests-run!
(fn ()
{:total (+ host-ht-pass host-ht-fail)
:passed host-ht-pass :failed host-ht-fail :fails host-ht-fails}))