lib/host/htmlsx.sx — a pure-SX HTML → SX converter (char-level tokenizer + stack parser): host/html->sx turns a post's HTML into an (article …) tree that host/blog--decompose! consumes — img / p / figure+figcaption / iframe / headings / blockquote / lists, inline strong/em/a kept nested (decompose flattens to text), entities decoded to UTF-8, comments+doctype skipped. This replaces the one-off external Python converter used for the nt-live-encore import. import-post! now accepts a raw "html" field (converted via html->sx, serialized to sx_content, decomposed) alongside "sx_content" — so importing real Ghost HTML is first-class. Wired htmlsx.sx into conformance.sh + serve.sh module lists (loads in conformance AND live). New htmlsx suite 8/8 (text/entities/void/nested/figure/iframe/comments + an html→sx→decompose→ typed-cards round-trip); blog 197/197 (+ import-from-html test). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
64 lines
3.4 KiB
Plaintext
64 lines
3.4 KiB
Plaintext
;; lib/host/tests/htmlsx.sx — the pure-SX HTML → SX converter (host/html->sx). Covers text,
|
||
;; entities, void/nested tags, attributes, figure/iframe, and an end-to-end import round-trip.
|
||
|
||
(define host-ht-pass 0)
|
||
(define host-ht-fail 0)
|
||
(define host-ht-fails (list))
|
||
(define host-ht-test
|
||
(fn (name actual expected)
|
||
(if (= actual expected)
|
||
(set! host-ht-pass (+ host-ht-pass 1))
|
||
(begin
|
||
(set! host-ht-fail (+ host-ht-fail 1))
|
||
(append! host-ht-fails {:name name :actual actual :expected expected})))))
|
||
|
||
;; a paragraph with inline formatting — kept nested (decompose flattens to text later).
|
||
(host-ht-test "a <p> with inline <strong> parses to (p \"…\" (strong \"…\") \"…\")"
|
||
(str (host/html->sx "<p>Hello <strong>world</strong> now</p>"))
|
||
"(article (p \"Hello \" (strong \"world\") \" now\"))")
|
||
;; HTML entities decode to UTF-8 (not \\uXXXX).
|
||
(host-ht-test "entities decode (& £ ’)"
|
||
(str (host/html->sx "<p>Tom & Jerry cost £5 ’n up</p>"))
|
||
"(article (p \"Tom & Jerry cost £5 ’n up\"))")
|
||
;; a void <img> keeps its attributes as keyword attrs.
|
||
(host-ht-test "a void <img> keeps :src/:alt attrs"
|
||
(str (host/html->sx "<img src=\"a.jpg\" alt=\"a photo\">"))
|
||
"(article (img :alt \"a photo\" :src \"a.jpg\"))")
|
||
;; a <figure> with an <img> + <figcaption> nests correctly.
|
||
(host-ht-test "a <figure> nests an <img> and a <figcaption>"
|
||
(str (host/html->sx "<figure><img src=\"y.jpg\" alt=\"y\"><figcaption>a caption</figcaption></figure>"))
|
||
"(article (figure (img :alt \"y\" :src \"y.jpg\") (figcaption \"a caption\")))")
|
||
;; an <iframe> is a void-ish embed (self-contained token).
|
||
(host-ht-test "an <iframe> becomes a leaf with its :src"
|
||
(str (host/html->sx "<iframe src=\"https://youtube.com/embed/x\"></iframe>"))
|
||
"(article (iframe :src \"https://youtube.com/embed/x\"))")
|
||
;; comments + doctype are skipped; whitespace-only text is dropped.
|
||
(host-ht-test "comments/doctype are skipped, blank text dropped"
|
||
(str (host/html->sx "<!-- hi --> <p>x</p>\n <p>y</p>"))
|
||
"(article (p \"x\") (p \"y\"))")
|
||
;; headings map through (decompose then turns h2 into card-heading).
|
||
(host-ht-test "headings + paragraphs come through in order"
|
||
(str (host/html->sx "<h2>Title</h2><p>body</p>"))
|
||
"(article (h2 \"Title\") (p \"body\"))")
|
||
|
||
;; ── END TO END: HTML → SX → decompose → typed card objects ──────────
|
||
(host/blog-use-store! (persist/open))
|
||
(host/blog-seed-types!)
|
||
(host-ht-test "html->sx feeds decompose: a real snippet becomes typed cards"
|
||
(begin
|
||
(host/blog-put! "htdoc" "HT" "(p)" "published")
|
||
(host/blog--decompose! "htdoc"
|
||
(host/html->sx "<h2>Heading</h2><p>Some <strong>bold</strong> text.</p><figure><img src=\"p.jpg\" alt=\"a\"><figcaption>cap</figcaption></figure><iframe src=\"https://youtube.com/embed/z\"></iframe>"))
|
||
(list (host/blog-is-a? "htdoc__body__b0" "card-heading")
|
||
(host/blog-is-a? "htdoc__body__b1" "card-text")
|
||
(get (host/blog-field-values-of "htdoc__body__b1") "text") ;; strong flattened to text
|
||
(host/blog-is-a? "htdoc__body__b2" "card-image")
|
||
(get (host/blog-field-values-of "htdoc__body__b2") "caption")
|
||
(host/blog-is-a? "htdoc__body__b3" "card-embed")))
|
||
(list true true "Some bold text." true "cap" true))
|
||
|
||
(define host-ht-tests-run!
|
||
(fn ()
|
||
{:total (+ host-ht-pass host-ht-fail)
|
||
:passed host-ht-pass :failed host-ht-fail :fails host-ht-fails}))
|