Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m9s
Implements plans/migration/data-migration.md (the un-started long-pole) and the data-layer half of slice-01-blog §4. Host-ops migration module composing content-on-sx + persist public APIs; isolated from lib/host and lib/content. - lexical.sx: Ghost lexical (as SX dicts) -> content block list, deterministic ids - import.sx: genesis import into content:<id> op-log, idempotent, + postmeta stream - verify.sx: replay-and-diff vs row-derived oracle (proves round-trip lossless) Inline formatting flattens to plain text (Phase-5 runs swap-point isolated in lex-inline-text); live Postgres source (Q-M4) + improved-converter re-import (Q-M5) flagged in README. 55/55 conformance: lexical 23, import 21, verify 11. Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
130 lines
5.4 KiB
Plaintext
130 lines
5.4 KiB
Plaintext
; lib/blogimport/lexical.sx
|
|
; Lexical (Ghost editor JSON, as SX dicts) -> content-on-sx block list.
|
|
;
|
|
; The blog migration's lexical->blocks converter. Lives on the blog/migration
|
|
; side (NOT lib/content, NOT lib/host) per plans/migration/data-migration.md §7.
|
|
;
|
|
; Input shape: a lexical document is an SX dict mirroring the JSON 1:1, e.g.
|
|
; {:root {:children (list
|
|
; {:type "heading" :tag "h2" :children (list {:type "text" :text "Hi"})}
|
|
; {:type "paragraph" :children (list
|
|
; {:type "text" :text "plain "}
|
|
; {:type "text" :text "bold" :format 1}
|
|
; {:type "link" :url "/x" :children (list {:type "text" :text "here"})})})}}
|
|
;
|
|
; Block ids are assigned deterministically by top-level position ("b0","b1",...)
|
|
; so a re-import yields the SAME block sequence (data-migration.md §5 ordering rule).
|
|
;
|
|
; INLINE FORMATTING: architecture's content model holds PLAIN-STRING text
|
|
; (mk-text id text). Phase-5 rich inline runs are not merged here yet, so inline
|
|
; nodes are flattened to their plain concatenation (== asText, drift-proof). The
|
|
; single swap-point for the runs upgrade is `lex-inline-text` below — when
|
|
; content-on-sx Phase 5 lands on architecture, return runs there instead of a
|
|
; string. (slice-01-blog.md Q-B1; "prove the machinery first, then swap".)
|
|
|
|
; Inline format bitmask (lexical): bold=1 italic=2 strikethrough=4 underline=8
|
|
; code=16 subscript=32 superscript=64. Decoding the bitmask into mark keywords is
|
|
; deferred to the Phase-5 runs upgrade (no bitwise prim on architecture, and the
|
|
; active path flattens to plain text anyway). The :format field is read at the
|
|
; swap-point `lex-inline-text` when runs land.
|
|
|
|
; --- inline node -> plain text --------------------------------------------------
|
|
(define
|
|
lex-inline-node-text
|
|
(fn (node)
|
|
(let ((t (get node :type)))
|
|
(cond
|
|
((equal? t "text") (or (get node :text) ""))
|
|
((equal? t "linebreak") "\n")
|
|
((equal? t "tab") "\t")
|
|
((equal? t "link") (lex-inline-text (or (get node :children) (list))))
|
|
((equal? t "autolink") (lex-inline-text (or (get node :children) (list))))
|
|
((equal? t "at-link") (lex-inline-text (or (get node :children) (list))))
|
|
((equal? t "code-highlight") (or (get node :text) ""))
|
|
(else "")))))
|
|
|
|
; flatten a list of inline nodes to one plain string.
|
|
; *** Phase-5 swap-point: return a runs list here once mk-text accepts runs. ***
|
|
(define
|
|
lex-inline-text
|
|
(fn (children)
|
|
(reduce
|
|
(fn (acc n) (str acc (lex-inline-node-text n)))
|
|
""
|
|
children)))
|
|
|
|
; --- helpers --------------------------------------------------------------------
|
|
(define
|
|
lex-heading-level
|
|
(fn (tag)
|
|
(cond
|
|
((equal? tag "h1") 1)
|
|
((equal? tag "h2") 2)
|
|
((equal? tag "h3") 3)
|
|
((equal? tag "h4") 4)
|
|
((equal? tag "h5") 5)
|
|
((equal? tag "h6") 6)
|
|
(else 2))))
|
|
|
|
(define
|
|
lex-listitem-text
|
|
(fn (item)
|
|
(lex-inline-text (or (get item :children) (list)))))
|
|
|
|
; --- one lexical block node -> a content block (id assigned by caller) ----------
|
|
(define
|
|
lex-block
|
|
(fn (node id)
|
|
(let ((t (get node :type)))
|
|
(cond
|
|
((equal? t "paragraph")
|
|
(mk-text id (lex-inline-text (or (get node :children) (list)))))
|
|
((equal? t "extended-text")
|
|
(mk-text id (lex-inline-text (or (get node :children) (list)))))
|
|
((equal? t "heading")
|
|
(mk-heading id (lex-heading-level (get node :tag))
|
|
(lex-inline-text (or (get node :children) (list)))))
|
|
((equal? t "extended-heading")
|
|
(mk-heading id (lex-heading-level (get node :tag))
|
|
(lex-inline-text (or (get node :children) (list)))))
|
|
((equal? t "quote")
|
|
(mk-quote id "" (lex-inline-text (or (get node :children) (list)))))
|
|
((equal? t "extended-quote")
|
|
(mk-quote id "" (lex-inline-text (or (get node :children) (list)))))
|
|
((equal? t "codeblock")
|
|
(mk-code id (or (get node :language) "") (or (get node :code) "")))
|
|
((equal? t "list")
|
|
(mk-list id
|
|
(equal? (get node :listType) "number")
|
|
(map lex-listitem-text (or (get node :children) (list)))))
|
|
((equal? t "horizontalrule") (mk-divider id))
|
|
((equal? t "image")
|
|
(mk-image id (or (get node :src) "") (or (get node :alt) "")))
|
|
((equal? t "callout")
|
|
(mk-callout id (or (get node :backgroundColor) "grey")
|
|
(lex-inline-text (or (get node :children) (list)))))
|
|
((equal? t "video") (mk-media id "video" (or (get node :src) "")))
|
|
((equal? t "audio") (mk-media id "audio" (or (get node :src) "")))
|
|
((equal? t "embed") (mk-embed id (or (get node :url) "") "embed"))
|
|
((equal? t "bookmark") (mk-embed id (or (get node :url) "") "bookmark"))
|
|
; unknown/unsupported card: route to a generic embed tagged by type so
|
|
; nothing is silently dropped (provider records the original node type).
|
|
(else (mk-embed id "" (or t "unknown")))))))
|
|
|
|
; --- doc -> top-level children list ---------------------------------------------
|
|
(define
|
|
lex-doc-children
|
|
(fn (doc)
|
|
(cond
|
|
((not (equal? (get doc :root) nil)) (or (get (get doc :root) :children) (list)))
|
|
((not (equal? (get doc :children) nil)) (get doc :children))
|
|
(else (list)))))
|
|
|
|
; --- doc -> content block list (deterministic ids by position) ------------------
|
|
(define
|
|
blogimport/lex-blocks
|
|
(fn (doc)
|
|
(map-indexed
|
|
(fn (i node) (lex-block node (str "b" i)))
|
|
(lex-doc-children doc))))
|