blogimport: lexical->persist genesis-import + at-rest parity verifier (55/55)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m9s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m9s
Implements plans/migration/data-migration.md (the un-started long-pole) and the data-layer half of slice-01-blog §4. Host-ops migration module composing content-on-sx + persist public APIs; isolated from lib/host and lib/content. - lexical.sx: Ghost lexical (as SX dicts) -> content block list, deterministic ids - import.sx: genesis import into content:<id> op-log, idempotent, + postmeta stream - verify.sx: replay-and-diff vs row-derived oracle (proves round-trip lossless) Inline formatting flattens to plain text (Phase-5 runs swap-point isolated in lex-inline-text); live Postgres source (Q-M4) + improved-converter re-import (Q-M5) flagged in README. 55/55 conformance: lexical 23, import 21, verify 11. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
73
lib/blogimport/verify.sx
Normal file
73
lib/blogimport/verify.sx
Normal file
@@ -0,0 +1,73 @@
|
||||
; lib/blogimport/verify.sx
|
||||
; Shadow-diff at rest (plans/migration/data-migration.md §6, slice-01-blog.md §4).
|
||||
;
|
||||
; After backfill, replay each content:<id> stream -> materialized doc -> block
|
||||
; model, and diff against the row-derived oracle (lexical->blocks computed directly).
|
||||
; Structural compare with `=` (not equal?). This proves the genesis import + op-log
|
||||
; replay is LOSSLESS — "did the backfill corrupt anything" at rest.
|
||||
;
|
||||
; The oracle here is the in-memory lexical->blocks of the SAME post, so the property
|
||||
; verified is round-trip fidelity through persist. Cross-checking against the LIVE
|
||||
; Python block model (the "does SX match Python" half of Q-D2) is a later wiring
|
||||
; step that needs the Python oracle via the internal-data query (Q-M4) — flagged,
|
||||
; not built. The diff plumbing here is the twin that step reuses.
|
||||
|
||||
; --- salient content per block (normalized; same on both sides) -----------------
|
||||
; ids are deterministic + identical on both sides, so they are kept (not stripped).
|
||||
(define
|
||||
blogimport/blk-content
|
||||
(fn (b)
|
||||
(let ((t (blk-type b)))
|
||||
(cond
|
||||
((equal? t "image") (str (blk-send b "src")))
|
||||
((equal? t "media") (str (blk-send b "src")))
|
||||
((equal? t "embed") (str (blk-send b "url")))
|
||||
((equal? t "list") (blk-send b "items"))
|
||||
((equal? t "divider") "")
|
||||
(else (str (blk-send b "text")))))))
|
||||
|
||||
; --- block model of a block list ------------------------------------------------
|
||||
(define
|
||||
blogimport/blocks-model
|
||||
(fn (blocks)
|
||||
{:ids (map blk-id blocks)
|
||||
:types (map blk-type blocks)
|
||||
:contents (map blogimport/blk-content blocks)}))
|
||||
|
||||
; --- oracle: lexical->blocks computed directly from the post (no persist) --------
|
||||
(define
|
||||
blogimport/oracle
|
||||
(fn (post)
|
||||
{:blocks (blogimport/blocks-model (blogimport/lex-blocks (get post :lexical)))
|
||||
:meta (blogimport/post-meta post)}))
|
||||
|
||||
; --- replayed: from the persisted stream ----------------------------------------
|
||||
(define
|
||||
blogimport/replayed
|
||||
(fn (b id)
|
||||
{:blocks (blogimport/blocks-model (content/blocks (content/head b id)))
|
||||
:meta (blogimport/load-meta b id)}))
|
||||
|
||||
; --- verify one post: replayed must equal oracle --------------------------------
|
||||
(define
|
||||
blogimport/verify-post
|
||||
(fn (b post)
|
||||
(let ((id (get post :id)))
|
||||
(let ((orc (blogimport/oracle post))
|
||||
(rep (blogimport/replayed b id)))
|
||||
(let ((block-ok (= (get orc :blocks) (get rep :blocks)))
|
||||
(meta-ok (= (get orc :meta) (get rep :meta))))
|
||||
{:id id
|
||||
:ok (and block-ok meta-ok)
|
||||
:block-ok block-ok
|
||||
:meta-ok meta-ok})))))
|
||||
|
||||
; --- verify many: coverage scoreboard -------------------------------------------
|
||||
(define
|
||||
blogimport/verify-all
|
||||
(fn (b posts)
|
||||
(let ((results (map (fn (p) (blogimport/verify-post b p)) posts)))
|
||||
{:total (len results)
|
||||
:ok (len (filter (fn (r) (get r :ok)) results))
|
||||
:mismatched (map (fn (r) (get r :id))
|
||||
(filter (fn (r) (not (get r :ok))) results))})))
|
||||
Reference in New Issue
Block a user