diff --git a/lib/blogimport/drafts/README.md b/lib/blogimport/drafts/README.md new file mode 100644 index 00000000..7a2c37ca --- /dev/null +++ b/lib/blogimport/drafts/README.md @@ -0,0 +1,88 @@ +# Blog-side draft — the `published-posts` migration query + +The one blog-app change needed to make `lib/blogimport`'s live source (Q-M4) real. +Two parts: an SX **defquery** (`published-posts.sx` in this dir) and a Python +**provider** it binds to. Both go in the **blog app** (production `blog/` tree); they +are drafted here so the importer ships with its dependency spelled out. Apply on the +blog app's branch, not on this migration branch. + +## Why a new query (not reuse post-by-id) + +`blogimport/source.sx` needs, for every published post: `id, slug, title, status, +visibility, tags, authors, lexical`. The existing providers +(`blog/services/__init__.py` `SqlBlogService.get_post_by_*`) return a `PostDTO` whose +`_post_to_dto` exposes `sx_content`/`html` but **not `lexical`** — and the canonical +migration path is lexical→blocks (slice-01-blog Q-B1), not sx_content. So a dedicated +migration provider that returns full rows including the raw lexical body is the +minimal, honest change. One batch call covers both enumeration (Q-D2 corpus) and +bodies. + +## 1. defquery (→ `blog/queries.sx`) + +See `published-posts.sx` in this directory: + +```lisp +(defquery published-posts () + "Enumerate every published, non-page blog post as a full row INCLUDING the raw + lexical body — the SX migration corpus (Q-D2). Read-only ..." + (service "blog" "list-published-posts")) +``` + +Kebab→snake convention (as for `get-post-by-slug` → `get_post_by_slug`) binds +`"list-published-posts"` to the `SqlBlogService.list_published_posts` method below. + +## 2. Python provider (→ `blog/services/__init__.py`, in `SqlBlogService`) + +```python +from sqlalchemy.orm import selectinload # add to imports + + async def list_published_posts(self, session: AsyncSession) -> list[dict]: + """Migration corpus: every published, non-page post as a full row INCLUDING + the raw lexical body (Q-D2). Read-only; consumed by the SX blogimport + backfill/verify. Mirrors ghost_db.list_posts() base visibility filters.""" + result = await session.execute( + select(Post) + .where( + Post.deleted_at.is_(None), + Post.status == "published", + Post.is_page.is_(False), + ) + .options(selectinload(Post.tags), selectinload(Post.authors)) + .order_by(Post.published_at.desc().nullslast()) + ) + return [ + { + "id": p.id, + "uuid": p.uuid, + "slug": p.slug, + "title": p.title, + "status": p.status, + "visibility": p.visibility, + "lexical": p.lexical, + "tags": [t.slug for t in p.tags], + "authors": [a.slug for a in p.authors], + } + for p in result.scalars().unique().all() + ] +``` + +**Confirm before applying:** +- The relationship names on `Post` (`tags`, `authors`) — check `blog/models/content.py` + join tables (`post_tags`, `post_authors`); adjust `selectinload` + the comprehensions + if they differ. `.unique()` is needed because the eager joins fan out rows. +- `Post.uuid` and `Post.lexical` columns exist (`models/content.py` ~lines 61-63). +- Visibility filters match `ghost_db.list_posts()` (drafts excluded, pages excluded) so + the corpus is exactly the published read-path set. + +## 3. Verify the contract + +After applying, the response shape must match `blogimport/parse-row` +(`lib/blogimport/source.sx`): keys `:uuid|:id :slug :title :status :visibility :tags +:authors :lexical`, with `:lexical` a JSON string (parsed via `dream-json-parse`). The +mock in `lib/blogimport/tests/source.sx` is the executable spec of this contract. + +## 4. Then wire the transport (host loop) + +`blogimport/backfill!`/`sync-verify` take an injected `fetch-fn`. In production that is +the host's HMAC `fetch_data` wrapper (`GET /internal/data/published-posts`) — wiring +that lives in `lib/host`, not here. diff --git a/lib/blogimport/drafts/published-posts.sx b/lib/blogimport/drafts/published-posts.sx new file mode 100644 index 00000000..2997588c --- /dev/null +++ b/lib/blogimport/drafts/published-posts.sx @@ -0,0 +1,16 @@ +; DRAFT — proposed addition to blog/queries.sx (the blog app's internal-data surface). +; Resolves the one blog-side gap for Q-M4: blogimport needs to enumerate published +; posts AND read their raw lexical bodies. The existing post-by-id/slug/ids queries +; return a PostDTO that carries sx_content/html but NOT lexical, so a dedicated +; migration query that returns full rows (incl. lexical) is the minimal change. +; +; Paste this defquery into blog/queries.sx alongside the others, and add the matching +; `list_published_posts` provider to SqlBlogService (see drafts/README.md). +; +; This file is a DRAFT artifact (not loaded by anything); it is parse-validated only. + +(defquery published-posts () + "Enumerate every published, non-page blog post as a full row INCLUDING the raw + lexical body — the SX migration corpus (Q-D2). Read-only; used by the blogimport + backfill + at-rest verify. Newest-first." + (service "blog" "list-published-posts")) diff --git a/lib/blogimport/scoreboard.json b/lib/blogimport/scoreboard.json index 5870bda1..01c323ce 100644 --- a/lib/blogimport/scoreboard.json +++ b/lib/blogimport/scoreboard.json @@ -3,9 +3,9 @@ "lexical": {"pass": 23, "fail": 0}, "import": {"pass": 21, "fail": 0}, "verify": {"pass": 11, "fail": 0}, - "source": {"pass": 20, "fail": 0} + "source": {"pass": 21, "fail": 0} }, - "total_pass": 75, + "total_pass": 76, "total_fail": 0, - "total": 75 + "total": 76 } diff --git a/lib/blogimport/scoreboard.md b/lib/blogimport/scoreboard.md index 56dbd803..3fee538a 100644 --- a/lib/blogimport/scoreboard.md +++ b/lib/blogimport/scoreboard.md @@ -7,5 +7,5 @@ _Generated by `lib/blogimport/conformance.sh`_ | lexical | 23 | 0 | 23 | | import | 21 | 0 | 21 | | verify | 11 | 0 | 11 | -| source | 20 | 0 | 20 | -| **Total** | **75** | **0** | **75** | +| source | 21 | 0 | 21 | +| **Total** | **76** | **0** | **76** | diff --git a/lib/blogimport/source.sx b/lib/blogimport/source.sx index 143b849e..b1a2abdf 100644 --- a/lib/blogimport/source.sx +++ b/lib/blogimport/source.sx @@ -17,11 +17,14 @@ ; here with dream-json-parse into the SX dict shape blogimport/lex-blocks expects. ; (If a handler returns :lexical already-structured, it is used as-is.) ; -; REQUIRED BLOG-SIDE ADDITION (the one gap): blog/queries.sx exposes fetch-by-id/slug -; but NO enumeration query. The corpus (Q-D2 = every published post) needs a -; `published-posts` query returning the published ids/slugs (Python: list_posts( -; status="published"), blog/bp/blog/ghost_db.py:102). Flagged for the blog app; mocked -; in tests. Until it exists, callers can pass an explicit id list to backfill-ids!. +; REQUIRED BLOG-SIDE ADDITION (the one gap — draft in drafts/published-posts.sx): +; the migration needs a `published-posts` query that returns full published-post ROWS +; INCLUDING the raw `:lexical` body. The existing post-by-id/slug providers return a +; PostDTO that carries sx_content/html but NOT lexical (blog/services/__init__.py +; _post_to_dto), so they cannot feed the canonical lexical->blocks converter. One new +; provider (Python list_published_posts over list_posts(status="published"), +; blog/bp/blog/ghost_db.py:102) covers both enumeration AND bodies in one batch call. +; Mocked here against that contract; see drafts/ for the paste-ready blog-side change. (define blogimport/dep-json-parse dream-json-parse) @@ -47,45 +50,34 @@ :authors (or (get row :authors) (list)) :lexical (blogimport/parse-lexical (get row :lexical))})) -; --- fetch one post via an internal-data query ---------------------------------- +; --- the published-post rows from the live source (one batch query) ------------- (define - blogimport/fetch-post - (fn (fetch-fn query params) - (blogimport/parse-row (fetch-fn query params)))) - -; --- enumerate published post ids (needs the `published-posts` query) ----------- -(define - blogimport/published-ids + blogimport/source-rows (fn (fetch-fn) (fetch-fn "published-posts" {}))) -; --- fetch all published posts as importer `post` dicts ------------------------- +; --- all published posts as importer `post` dicts ------------------------------- (define blogimport/source-posts - (fn (fetch-fn) - (map - (fn (id) (blogimport/fetch-post fetch-fn "post-by-id" {:id id})) - (blogimport/published-ids fetch-fn)))) - -; --- fetch an explicit id list (fallback before the enumeration query lands) ---- -(define - blogimport/source-posts-by-ids - (fn (fetch-fn ids) - (map (fn (id) (blogimport/fetch-post fetch-fn "post-by-id" {:id id})) ids))) + (fn (fetch-fn) (map blogimport/parse-row (blogimport/source-rows fetch-fn)))) ; --- end-to-end drivers --------------------------------------------------------- -; backfill = enumerate -> fetch -> genesis-import (idempotent). Re-runnable as the +; backfill = enumerate+fetch -> genesis-import (idempotent). Re-runnable as the ; one-way DB->persist sync (data-migration.md Strategy 1). (define blogimport/backfill! (fn (b fetch-fn at) (blogimport/import-all! b (blogimport/source-posts fetch-fn) at))) +; partial backfill: client-side filter to a subset of ids (no extra blog query). (define blogimport/backfill-ids! (fn (b fetch-fn ids at) - (blogimport/import-all! b (blogimport/source-posts-by-ids fetch-fn ids) at))) + (blogimport/import-all! + b + (filter (fn (p) (contains? ids (get p :id))) (blogimport/source-posts fetch-fn)) + at))) -; sync-verify = enumerate -> fetch -> shadow-diff the persisted streams at rest. +; sync-verify = fetch -> shadow-diff the persisted streams at rest. (define blogimport/sync-verify (fn (b fetch-fn) diff --git a/lib/blogimport/tests/source.sx b/lib/blogimport/tests/source.sx index b80f3258..a933e826 100644 --- a/lib/blogimport/tests/source.sx +++ b/lib/blogimport/tests/source.sx @@ -5,7 +5,7 @@ (content-bootstrap-callout!) (content-bootstrap-media!) -; ---- canned service responses (lexical arrives as a JSON STRING, the DB column) ---- +; ---- canned service rows (lexical arrives as a JSON STRING, the DB column) ---- (define lex1 "{\"root\":{\"children\":[{\"type\":\"heading\",\"tag\":\"h2\",\"children\":[{\"type\":\"text\",\"text\":\"Live\"}]},{\"type\":\"paragraph\",\"children\":[{\"type\":\"text\",\"text\":\"from db\"}]}]}}") @@ -19,16 +19,12 @@ :lexical "{\"children\":[{\"type\":\"paragraph\",\"children\":[{\"type\":\"text\",\"text\":\"second\"}]}]}"}) ; ---- mock transport: (fetch-fn query params) -> response ---- +; the `published-posts` migration query returns full rows (incl. lexical) in one batch. (define mock-fetch (fn (query params) (cond - ((equal? query "published-posts") (list "post-1" "post-2")) - ((equal? query "post-by-id") - (cond - ((equal? (get params :id) "post-1") row1) - ((equal? (get params :id) "post-2") row2) - (else nil))) + ((equal? query "published-posts") (list row1 row2)) (else nil)))) ; ---- parse-row maps fields + parses the lexical JSON string ---- @@ -49,8 +45,8 @@ (bi-test "parse-row structured lexical used as-is" (map blk-type (blogimport/lex-blocks (get post3 :lexical))) (list "text")) -; ---- enumeration + source-posts ---- -(bi-test "published-ids" (blogimport/published-ids mock-fetch) (list "post-1" "post-2")) +; ---- source-rows / source-posts ---- +(bi-test "source-rows count" (len (blogimport/source-rows mock-fetch)) 2) (bi-test "source-posts ids" (map (fn (p) (get p :id)) (blogimport/source-posts mock-fetch)) (list "post-1" "post-2")) @@ -76,8 +72,9 @@ (bi-test "sync-verify ok" (get sv :ok) 2) (bi-test "sync-verify no mismatch" (get sv :mismatched) (list)) -; ---- explicit-id fallback path (before the enumeration query lands) ---- +; ---- partial backfill: client-side id filter (no extra blog query) ---- (define B2 (persist/open)) (define covx (blogimport/backfill-ids! B2 mock-fetch (list "post-2") 10)) (bi-test "backfill-ids imported" (get covx :imported) 1) (bi-test "backfill-ids post-2 ids" (doc-ids (content/head B2 "post-2")) (list "b0")) +(bi-test "backfill-ids other not imported" (content/version-count B2 "post-1") 0)