blogimport: lexical->persist genesis-import + at-rest parity verifier (55/55)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m9s

Implements plans/migration/data-migration.md (the un-started long-pole) and the
data-layer half of slice-01-blog §4. Host-ops migration module composing
content-on-sx + persist public APIs; isolated from lib/host and lib/content.

- lexical.sx: Ghost lexical (as SX dicts) -> content block list, deterministic ids
- import.sx: genesis import into content:<id> op-log, idempotent, + postmeta stream
- verify.sx: replay-and-diff vs row-derived oracle (proves round-trip lossless)

Inline formatting flattens to plain text (Phase-5 runs swap-point isolated in
lex-inline-text); live Postgres source (Q-M4) + improved-converter re-import (Q-M5)
flagged in README. 55/55 conformance: lexical 23, import 21, verify 11.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-30 13:14:30 +00:00
parent 1597eaa4f8
commit a4d93c61cc
10 changed files with 683 additions and 0 deletions

View File

@@ -0,0 +1,62 @@
; lib/blogimport/tests/import.sx — genesis import + idempotency
(st-bootstrap-classes!)
(content-bootstrap-blocks!)
(content-bootstrap-doc!)
(content-bootstrap-callout!)
(content-bootstrap-media!)
(define
p1
{:id "post-1" :slug "hello" :title "Hello" :status "published"
:visibility "public" :tags (list "news") :authors (list "u1")
:lexical {:root {:children (list
{:type "heading" :tag "h1" :children (list {:type "text" :text "Hello"})}
{:type "paragraph" :children (list {:type "text" :text "world"})})}}})
(define
p2
{:id "post-2" :slug "two" :title "Two" :status "published"
:lexical {:children (list
{:type "paragraph" :children (list {:type "text" :text "second"})})}})
; ---- genesis-ops ordering ----
(define ops1 (blogimport/genesis-ops (blogimport/lex-blocks (get p1 :lexical))))
(bi-test "genesis op kinds" (map (fn (o) (get o :op)) ops1) (list "insert" "insert"))
(bi-test "genesis first after nil" (get (nth ops1 0) :after) nil)
(bi-test "genesis second after first id" (get (nth ops1 1) :after) "b0")
; ---- import one ----
(define B (persist/open))
(define r1 (blogimport/import-post! B p1 10))
(bi-test "import imported flag" (get r1 :imported) true)
(bi-test "import block count" (get r1 :blocks) 2)
(bi-test "stream version-count" (content/version-count B "post-1") 2)
(bi-test "head ids" (doc-ids (content/head B "post-1")) (list "b0" "b1"))
(bi-test "head body text"
(str (blk-send (doc-find (content/head B "post-1") "b1") "text")) "world")
(bi-test "head heading level"
(blk-send (doc-find (content/head B "post-1") "b0") "level") 1)
; ---- metadata round-trip ----
(bi-test "meta round-trip" (blogimport/load-meta B "post-1") (blogimport/post-meta p1))
(bi-test "meta title" (get (blogimport/load-meta B "post-1") :title) "Hello")
(bi-test "meta tags" (get (blogimport/load-meta B "post-1") :tags) (list "news"))
; ---- idempotent re-import (skip-if-exists, no duplication) ----
(define r1b (blogimport/import-post! B p1 99))
(bi-test "reimport skipped" (get r1b :imported) false)
(bi-test "reimport reason" (get r1b :reason) "exists")
(bi-test "version-count unchanged after reimport" (content/version-count B "post-1") 2)
(bi-test "head ids unchanged after reimport"
(doc-ids (content/head B "post-1")) (list "b0" "b1"))
; ---- import-all! coverage scoreboard ----
(define B2 (persist/open))
(define cov1 (blogimport/import-all! B2 (list p1 p2) 5))
(bi-test "import-all total" (get cov1 :total) 2)
(bi-test "import-all imported" (get cov1 :imported) 2)
(bi-test "import-all skipped" (get cov1 :skipped) 0)
; re-run is fully idempotent
(define cov2 (blogimport/import-all! B2 (list p1 p2) 6))
(bi-test "import-all rerun imported" (get cov2 :imported) 0)
(bi-test "import-all rerun skipped" (get cov2 :skipped) 2)

View File

@@ -0,0 +1,92 @@
; lib/blogimport/tests/lexical.sx — lexical -> content block converter
(st-bootstrap-classes!)
(content-bootstrap-blocks!)
(content-bootstrap-doc!)
(content-bootstrap-callout!)
(content-bootstrap-media!)
; ---- a representative lexical document (Ghost editor JSON, as SX dicts) ----
(define
doc
{:root {:children (list
{:type "heading" :tag "h2" :children (list {:type "text" :text "Title"})}
{:type "paragraph" :children (list
{:type "text" :text "plain "}
{:type "text" :text "bold" :format 1}
{:type "text" :text " then "}
{:type "link" :url "/x" :children (list {:type "text" :text "a link"})})}
{:type "quote" :children (list {:type "text" :text "wise words"})}
{:type "list" :listType "number" :children (list
{:type "listitem" :children (list {:type "text" :text "one"})}
{:type "listitem" :children (list {:type "text" :text "two"})})}
{:type "codeblock" :language "python" :code "print(1)"}
{:type "horizontalrule"}
{:type "image" :src "/c.png" :alt "a cat"}
{:type "callout" :backgroundColor "blue" :children (list {:type "text" :text "note!"})}
{:type "twitter" :url "https://t/x"})}})
(define blocks (blogimport/lex-blocks doc))
; ---- structure ----
(bi-test "block count" (len blocks) 9)
(bi-test "ids by position" (map blk-id blocks)
(list "b0" "b1" "b2" "b3" "b4" "b5" "b6" "b7" "b8"))
(bi-test "types in order" (map blk-type blocks)
(list "heading" "text" "quote" "list" "code" "divider" "image" "callout" "embed"))
; ---- heading ----
(bi-test "heading level" (blk-send (nth blocks 0) "level") 2)
(bi-test "heading text" (str (blk-send (nth blocks 0) "text")) "Title")
; ---- paragraph with inline bold + link, flattened to plain concatenation ----
(bi-test "paragraph flattened text"
(str (blk-send (nth blocks 1) "text")) "plain bold then a link")
; ---- quote ----
(bi-test "quote text" (str (blk-send (nth blocks 2) "text")) "wise words")
; ---- ordered list with items ----
(bi-test "list ordered" (blk-send (nth blocks 3) "ordered") true)
(bi-test "list items" (blk-send (nth blocks 3) "items") (list "one" "two"))
; ---- code block ----
(bi-test "code language" (str (blk-send (nth blocks 4) "language")) "python")
(bi-test "code text" (str (blk-send (nth blocks 4) "text")) "print(1)")
; ---- image ----
(bi-test "image src" (str (blk-send (nth blocks 6) "src")) "/c.png")
(bi-test "image alt" (str (blk-send (nth blocks 6) "alt")) "a cat")
; ---- callout ----
(bi-test "callout kind" (str (blk-send (nth blocks 7) "kind")) "blue")
(bi-test "callout text" (str (blk-send (nth blocks 7) "text")) "note!")
; ---- unknown card routed to embed, provider records original type ----
(bi-test "unknown -> embed provider" (str (blk-send (nth blocks 8) "provider")) "twitter")
; ---- heading level mapping ----
(bi-test "h1 level" (lex-heading-level "h1") 1)
(bi-test "h4 level" (lex-heading-level "h4") 4)
(bi-test "unknown tag default" (lex-heading-level "hx") 2)
; ---- bullet list ----
(define
bdoc
{:children (list {:type "list" :listType "bullet" :children (list
{:type "listitem" :children (list {:type "text" :text "x"})})})})
(bi-test "bullet not ordered" (blk-send (nth (blogimport/lex-blocks bdoc) 0) "ordered") false)
; ---- empty doc ----
(bi-test "empty doc -> no blocks" (len (blogimport/lex-blocks {:root {:children (list)}})) 0)
; ---- bare-children doc (no :root wrapper) ----
(bi-test "bare children doc"
(map blk-type (blogimport/lex-blocks {:children (list {:type "paragraph" :children (list {:type "text" :text "hi"})})}))
(list "text"))
; ---- linebreak/tab in inline flattening ----
(bi-test "linebreak flatten"
(str (blk-send (nth (blogimport/lex-blocks
{:children (list {:type "paragraph" :children (list
{:type "text" :text "a"} {:type "linebreak"} {:type "text" :text "b"})})}) 0) "text"))
"a\nb")

View File

@@ -0,0 +1,57 @@
; lib/blogimport/tests/verify.sx — shadow-diff at rest (round-trip parity)
(st-bootstrap-classes!)
(content-bootstrap-blocks!)
(content-bootstrap-doc!)
(content-bootstrap-callout!)
(content-bootstrap-media!)
(define
p1
{:id "post-1" :slug "hello" :title "Hello" :status "published"
:visibility "public" :tags (list "news") :authors (list "u1")
:lexical {:root {:children (list
{:type "heading" :tag "h2" :children (list {:type "text" :text "Title"})}
{:type "paragraph" :children (list
{:type "text" :text "plain "}
{:type "text" :text "bold" :format 1})}
{:type "list" :listType "number" :children (list
{:type "listitem" :children (list {:type "text" :text "one"})}
{:type "listitem" :children (list {:type "text" :text "two"})})}
{:type "image" :src "/c.png" :alt "cat"})}}})
(define
px
{:id "post-x" :slug "ghost" :title "Ghost" :status "published"
:lexical {:children (list {:type "paragraph" :children (list {:type "text" :text "never imported"})})}})
; ---- happy path: replayed == oracle ----
(define B (persist/open))
(blogimport/import-post! B p1 10)
(define v1 (blogimport/verify-post B p1))
(bi-test "verify ok" (get v1 :ok) true)
(bi-test "verify block-ok" (get v1 :block-ok) true)
(bi-test "verify meta-ok" (get v1 :meta-ok) true)
; ---- oracle block model is what we expect (inline bold flattened) ----
(define orc (blogimport/oracle p1))
(bi-test "oracle types"
(get (get orc :blocks) :types) (list "heading" "text" "list" "image"))
(bi-test "oracle contents"
(get (get orc :blocks) :contents) (list "Title" "plain bold" (list "one" "two") "/c.png"))
; ---- corruption is DETECTED (op-log diverges from oracle) ----
(content/commit! B "post-1" (op-update "b1" "text" "CORRUPTED") 100)
(define v2 (blogimport/verify-post B p1))
(bi-test "verify detects corruption" (get v2 :ok) false)
(bi-test "verify corruption is block-level" (get v2 :block-ok) false)
; ---- an un-imported post fails verification (empty replay vs non-empty oracle) ----
(bi-test "unimported not ok" (get (blogimport/verify-post B px) :ok) false)
; ---- verify-all coverage scoreboard ----
(define B3 (persist/open))
(blogimport/import-post! B3 p1 10)
(define cov (blogimport/verify-all B3 (list p1 px)))
(bi-test "verify-all total" (get cov :total) 2)
(bi-test "verify-all ok count" (get cov :ok) 1)
(bi-test "verify-all mismatched" (get cov :mismatched) (list "post-x"))