From 4d889716a350d4f4a72453e6554a7aecbf444430 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 12:52:34 +0000 Subject: [PATCH] content: in-document prose search via asText (763/763) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit content/search-text + search-text-ids find every block whose (asText b) contains a term — spanning all text-bearing fields by reusing the canonical asText projection, so it can't drift from stats/find-replace. Section wrappers excluded. +7 query tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/content/query.sx | 30 +++++++++++++++++++--- lib/content/scoreboard.json | 6 ++--- lib/content/scoreboard.md | 4 +-- lib/content/tests/query.sx | 51 ++++++++++++++++++++++++++++++++++++- plans/content-on-sx.md | 12 ++++++++- 5 files changed, 93 insertions(+), 10 deletions(-) diff --git a/lib/content/query.sx b/lib/content/query.sx index a312a45b..3f7a6156 100644 --- a/lib/content/query.sx +++ b/lib/content/query.sx @@ -1,10 +1,10 @@ ;; content-on-sx — block query + table of contents. ;; ;; Collect blocks across the whole tree (descending into sections) by predicate -;; or type, and derive a table of contents from headings. Tree detection is -;; inline (class + st-iv-get) so this needs no section.sx. +;; or type, search them by prose, and derive a table of contents from headings. +;; Tree detection is inline (class + st-iv-get) so this needs no section.sx. ;; -;; Requires (loaded by harness): block.sx, doc.sx. +;; Requires (loaded by harness): block.sx, doc.sx, text.sx (asText for search). (define qry-section? @@ -45,6 +45,30 @@ content/select-ids (fn (doc pred) (map (fn (b) (blk-id b)) (content/select doc pred)))) +;; Blocks (tree-wide, excluding section containers) whose own prose contains +;; `term`. "Prose" is (asText b), so search covers exactly what every block +;; exposes as text — text/heading/code/quote/callout text, image alt, list +;; items, table headers+cells — with no separate field list to drift from +;; asText / find-replace / stats. Case-sensitive substring match. +(define + content/search-text + (fn + (doc term) + (content/select + doc + (fn + (b) + (and + (not (qry-section? b)) + (>= (index-of (asText b) term) 0)))))) + +;; Same search, returning matching block ids in document order. +(define + content/search-text-ids + (fn + (doc term) + (map (fn (b) (blk-id b)) (content/search-text doc term)))) + ;; table of contents: {:id :level :text} for every heading, in document order. (define content/headings diff --git a/lib/content/scoreboard.json b/lib/content/scoreboard.json index cb2a09cf..30edd305 100644 --- a/lib/content/scoreboard.json +++ b/lib/content/scoreboard.json @@ -14,7 +14,7 @@ "tree-edit": {"pass": 17, "fail": 0}, "move": {"pass": 11, "fail": 0}, "clone": {"pass": 10, "fail": 0}, - "query": {"pass": 13, "fail": 0}, + "query": {"pass": 20, "fail": 0}, "toc": {"pass": 8, "fail": 0}, "anchor": {"pass": 6, "fail": 0}, "outline": {"pass": 14, "fail": 0}, @@ -42,7 +42,7 @@ "md-doc": {"pass": 12, "fail": 0}, "fed": {"pass": 20, "fail": 0} }, - "total_pass": 756, + "total_pass": 763, "total_fail": 0, - "total": 756 + "total": 763 } diff --git a/lib/content/scoreboard.md b/lib/content/scoreboard.md index 14c2c9b7..5ce5087f 100644 --- a/lib/content/scoreboard.md +++ b/lib/content/scoreboard.md @@ -18,7 +18,7 @@ _Generated by `lib/content/conformance.sh`_ | tree-edit | 17 | 0 | 17 | | move | 11 | 0 | 11 | | clone | 10 | 0 | 10 | -| query | 13 | 0 | 13 | +| query | 20 | 0 | 20 | | toc | 8 | 0 | 8 | | anchor | 6 | 0 | 6 | | outline | 14 | 0 | 14 | @@ -45,4 +45,4 @@ _Generated by `lib/content/conformance.sh`_ | md-import | 38 | 0 | 38 | | md-doc | 12 | 0 | 12 | | fed | 20 | 0 | 20 | -| **Total** | **756** | **0** | **756** | +| **Total** | **763** | **0** | **763** | diff --git a/lib/content/tests/query.sx b/lib/content/tests/query.sx index 873c82cc..e6695bf5 100644 --- a/lib/content/tests/query.sx +++ b/lib/content/tests/query.sx @@ -1,8 +1,11 @@ -;; Extension — block query + table of contents. +;; Extension — block query + table of contents + prose search. (st-bootstrap-classes!) (content/bootstrap!) +(content-bootstrap-text!) (content-bootstrap-section!) +(content-bootstrap-table!) +(content-bootstrap-callout!) (define d @@ -87,3 +90,49 @@ "deep toc level" (get (first (content/headings deep)) :level) 3) + +;; ── prose search (content/search-text) ── +;; "cat" appears in text, image alt, a list item, a table cell, and a callout +;; — every text-bearing field — so search must find all five via asText. +(define + sd + (doc-append + (doc-append + (doc-append + (doc-append + (doc-append + (doc-empty "sd") + (mk-heading "sh" 1 "Welcome aboard")) + (mk-text "st" "the cat sat")) + (mk-image "si" "/x.png" "a cat photo")) + (mk-list "sl" false (list "first cat" "second dog"))) + (mk-section + "sec" + (list + (mk-table "stb" (list "Animal") (list (list "cat") (list "fish"))) + (mk-callout "sc" "note" "beware of cat"))))) + +(content-test + "search across every text-bearing field" + (content/search-text-ids sd "cat") + (list "st" "si" "sl" "stb" "sc")) +(content-test "search count" (len (content/search-text sd "cat")) 5) +(content-test + "search heading text" + (content/search-text-ids sd "Welcome") + (list "sh")) +(content-test + "search list item only" + (content/search-text-ids sd "dog") + (list "sl")) +(content-test "search no match" (content/search-text-ids sd "zzz") (list)) +;; section containers are excluded — a term living only inside a section's +;; children returns the child, never the section wrapper. +(content-test + "search excludes section wrapper" + (content/search-text-ids sd "fish") + (list "stb")) +(content-test + "search returns block objects" + (blk-id (first (content/search-text sd "Welcome"))) + "sh") diff --git a/plans/content-on-sx.md b/plans/content-on-sx.md index d0de3f9a..c36178d4 100644 --- a/plans/content-on-sx.md +++ b/plans/content-on-sx.md @@ -19,7 +19,7 @@ injected adapter, not core. ## Status (rolling) -`bash lib/content/conformance.sh` → **756/756** (Phases 1–4 COMPLETE + ~34 extensions, hardened: HTML/SX escaping, Markdown render + import/export incl. tables & frontmatter (full round-trip), CvRDT flat + nested-tree + durable replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees + deep editing + flatten + relative reorder, doc stats + summary + multi-doc index, table + callout + media blocks, HTML page wrapper + SEO page, doc composition + id-remap, portable data + wire serialization, block query + transforms + find/replace, TOC + anchored headings + outline, normalization) +`bash lib/content/conformance.sh` → **763/763** (Phases 1–4 COMPLETE + ~34 extensions, hardened: HTML/SX escaping, Markdown render + import/export incl. tables & frontmatter (full round-trip), CvRDT flat + nested-tree + durable replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees + deep editing + flatten + relative reorder, doc stats + summary + multi-doc index, table + callout + media blocks, HTML page wrapper + SEO page, doc composition + id-remap, portable data + wire serialization, block query + transforms + find/replace, TOC + anchored headings + outline, normalization) ## Ground rules @@ -113,6 +113,16 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─ ## Progress log +- 2026-06-07 — Feature: in-document prose search. `content/search-text` (and + `content/search-text-ids`) return every content block, tree-wide, whose + `(asText b)` contains a term — so search spans text/heading/code/quote/callout + text, image alt, list items and table cells **by construction**: it reuses the + one canonical "prose of a block" projection (asText) rather than re-listing + fields, so it can't drift from stats/find-replace. Section containers are + excluded (a term living only in a section's children returns the child, not the + wrapper). +7 query tests (cross-field match, count, single-field, no-match, + section exclusion, object return). 763/763. + - 2026-06-07 — Consistency: `find-replace` now rewrites **every** text-bearing field, not just `text`. New `fr-rewrite` dispatches per block type — `alt` of image blocks, each item of list blocks, and every header/cell of table blocks