From 49af15452461b6b59f61d84fea3630ff660d3185 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 04:11:48 +0000 Subject: [PATCH] content: document normalization (normalize.sx) + 11 tests (605/605) Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/content/conformance.sh | 3 +- lib/content/normalize.sx | 49 +++++++++++++++++ lib/content/scoreboard.json | 5 +- lib/content/scoreboard.md | 3 +- lib/content/tests/normalize.sx | 99 ++++++++++++++++++++++++++++++++++ plans/content-on-sx.md | 8 ++- 6 files changed, 162 insertions(+), 5 deletions(-) create mode 100644 lib/content/normalize.sx create mode 100644 lib/content/tests/normalize.sx diff --git a/lib/content/conformance.sh b/lib/content/conformance.sh index b2bf0974..e586f27e 100755 --- a/lib/content/conformance.sh +++ b/lib/content/conformance.sh @@ -15,7 +15,7 @@ if [ ! -x "$SX_SERVER" ]; then fi fi -SUITES=(block doc render api meta page page-full markdown text section compose tree-edit clone query toc transform stats table data wire validate store snapshot crdt crdt-store sync md-import md-doc fed) +SUITES=(block doc render api meta page page-full markdown text section compose tree-edit clone query toc transform normalize stats table data wire validate store snapshot crdt crdt-store sync md-import md-doc fed) OUT_JSON="lib/content/scoreboard.json" OUT_MD="lib/content/scoreboard.md" @@ -51,6 +51,7 @@ run_suite() { (load "lib/content/query.sx") (load "lib/content/toc.sx") (load "lib/content/transform.sx") +(load "lib/content/normalize.sx") (load "lib/content/stats.sx") (load "lib/content/table.sx") (load "lib/content/data.sx") diff --git a/lib/content/normalize.sx b/lib/content/normalize.sx new file mode 100644 index 00000000..a27ad514 --- /dev/null +++ b/lib/content/normalize.sx @@ -0,0 +1,49 @@ +;; content-on-sx — document normalization. +;; +;; A cleanup pass: drop empty text blocks and empty sections across the tree. +;; Sections are normalised first, so a section that becomes empty (all children +;; dropped) is itself dropped. For tidying imported/edited documents. Immutable. +;; Inline tree handling (no section.sx dep). +;; +;; Requires (loaded by harness): block.sx, doc.sx. + +(define + norm-section? + (fn (b) (and (st-instance? b) (= (get b :class) "CtSection")))) +(define + norm-empty-text? + (fn (b) (and (= (blk-type b) "text") (= (str (blk-get b "text")) "")))) +(define + norm-empty-section? + (fn + (b) + (and + (norm-section? b) + (let + ((ch (st-iv-get b "children"))) + (or (= ch nil) (= (len ch) 0)))))) + +(define + norm-recurse + (fn + (b) + (if + (norm-section? b) + (let + ((ch (st-iv-get b "children"))) + (if (list? ch) (st-iv-set! b "children" (norm-blocks ch)) b)) + b))) + +(define + norm-keep? + (fn + (b) + (if (norm-empty-text? b) false (if (norm-empty-section? b) false true)))) + +(define + norm-blocks + (fn (blocks) (filter norm-keep? (map norm-recurse blocks)))) + +(define + content/normalize + (fn (doc) (doc-with-blocks doc (norm-blocks (doc-blocks doc))))) diff --git a/lib/content/scoreboard.json b/lib/content/scoreboard.json index 76d76f3f..b1b199de 100644 --- a/lib/content/scoreboard.json +++ b/lib/content/scoreboard.json @@ -16,6 +16,7 @@ "query": {"pass": 13, "fail": 0}, "toc": {"pass": 8, "fail": 0}, "transform": {"pass": 12, "fail": 0}, + "normalize": {"pass": 11, "fail": 0}, "stats": {"pass": 17, "fail": 0}, "table": {"pass": 15, "fail": 0}, "data": {"pass": 21, "fail": 0}, @@ -30,7 +31,7 @@ "md-doc": {"pass": 12, "fail": 0}, "fed": {"pass": 20, "fail": 0} }, - "total_pass": 594, + "total_pass": 605, "total_fail": 0, - "total": 594 + "total": 605 } diff --git a/lib/content/scoreboard.md b/lib/content/scoreboard.md index d1fe2e50..721ad016 100644 --- a/lib/content/scoreboard.md +++ b/lib/content/scoreboard.md @@ -20,6 +20,7 @@ _Generated by `lib/content/conformance.sh`_ | query | 13 | 0 | 13 | | toc | 8 | 0 | 8 | | transform | 12 | 0 | 12 | +| normalize | 11 | 0 | 11 | | stats | 17 | 0 | 17 | | table | 15 | 0 | 15 | | data | 21 | 0 | 21 | @@ -33,4 +34,4 @@ _Generated by `lib/content/conformance.sh`_ | md-import | 38 | 0 | 38 | | md-doc | 12 | 0 | 12 | | fed | 20 | 0 | 20 | -| **Total** | **594** | **0** | **594** | +| **Total** | **605** | **0** | **605** | diff --git a/lib/content/tests/normalize.sx b/lib/content/tests/normalize.sx new file mode 100644 index 00000000..f3cb03a9 --- /dev/null +++ b/lib/content/tests/normalize.sx @@ -0,0 +1,99 @@ +;; Extension — document normalization (drop empty text blocks + empty sections). + +(st-bootstrap-classes!) +(content/bootstrap!) +(content-bootstrap-section!) + +;; ── drop empty text blocks ── +(define + d + (doc-append + (doc-append + (doc-append (doc-empty "d") (mk-heading "h" 1 "Hi")) + (mk-text "empty" "")) + (mk-text "p" "Body"))) +(content-test + "drops empty text" + (doc-ids (content/normalize d)) + (list "h" "p")) +(content-test "normalize immutable" (doc-ids d) (list "h" "empty" "p")) +(content-test + "keeps non-empty text" + (str (blk-send (doc-find (content/normalize d) "p") "text")) + "Body") + +;; ── drop empty sections ── +(define + d2 + (doc-append + (doc-append (doc-empty "d") (mk-text "p" "x")) + (mk-section "empty-sec" (list)))) +(content-test + "drops empty section" + (doc-ids (content/normalize d2)) + (list "p")) + +;; ── section that becomes empty (all children dropped) is itself dropped ── +(define + d3 + (doc-append + (doc-empty "d") + (mk-section "s" (list (mk-text "e1" "") (mk-text "e2" ""))))) +(content-test + "section emptied then dropped" + (doc-ids (content/normalize d3)) + (list)) + +;; ── section with some content keeps surviving children ── +(define + d4 + (doc-append + (doc-empty "d") + (mk-section + "s" + (list (mk-text "e" "") (mk-heading "k" 2 "Keep"))))) +(define n4 (content/normalize d4)) +(content-test "section kept" (doc-ids n4) (list "s")) +(content-test + "empty child dropped, real kept" + (doc-tree-ids n4) + (list "s" "k")) + +;; ── nested: empty deep section removed, content bubbles correctly ── +(define + d5 + (doc-append + (doc-empty "d") + (mk-section + "outer" + (list (mk-text "a" "A") (mk-section "inner" (list (mk-text "x" ""))))))) +(content-test + "nested empty inner dropped" + (doc-tree-ids (content/normalize d5)) + (list "outer" "a")) + +;; ── already-clean doc unchanged ── +(define + clean + (doc-append + (doc-append (doc-empty "d") (mk-heading "h" 1 "T")) + (mk-text "p" "B"))) +(content-test + "clean doc unchanged ids" + (doc-ids (content/normalize clean)) + (list "h" "p")) +(content-test + "clean doc render" + (asHTML (content/normalize clean)) + (asHTML clean)) + +;; ── non-text empties preserved (divider, image with empty alt) ── +(define + d6 + (doc-append + (doc-append (doc-empty "d") (mk-divider "dv")) + (mk-image "i" "/a.png" ""))) +(content-test + "divider + image kept" + (doc-ids (content/normalize d6)) + (list "dv" "i")) diff --git a/plans/content-on-sx.md b/plans/content-on-sx.md index d2cd50a3..34b06654 100644 --- a/plans/content-on-sx.md +++ b/plans/content-on-sx.md @@ -19,7 +19,7 @@ injected adapter, not core. ## Status (rolling) -`bash lib/content/conformance.sh` → **594/594** (Phases 1–4 COMPLETE + extensions: HTML/SX escaping, Markdown render + import/export incl. tables & frontmatter (full round-trip), CRDT replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees + deep tree editing, doc stats, table block, HTML page wrapper + SEO page, doc composition + id-remap, portable data + wire serialization, block query + transforms, TOC rendering) +`bash lib/content/conformance.sh` → **605/605** (Phases 1–4 COMPLETE + extensions: HTML/SX escaping, Markdown render + import/export incl. tables & frontmatter (full round-trip), CRDT replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees + deep tree editing, doc stats, table block, HTML page wrapper + SEO page, doc composition + id-remap, portable data + wire serialization, block query + transforms, TOC rendering, normalization) ## Ground rules @@ -97,11 +97,17 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─ - [x] block query + TOC (`query.sx`: content/select/select-type/count-type/headings) - [x] block transforms (`transform.sx`: content/map-blocks/map-type/set-field-on) - [x] TOC rendering (`toc.sx`: content/toc-markdown + toc-html from headings) +- [x] document normalization (`normalize.sx`: content/normalize, drop empty blocks/sections) - [x] portable data serialization (`data.sx`: content/to-data + from-data, round-trips tree) - [x] wire serialization (`wire.sx`: content/to-wire + from-wire, SX-text on the wire) ## Progress log +- 2026-06-07 — Extension: document normalization (`normalize.sx`). + `content/normalize` drops empty text blocks and empty sections tree-wide; + sections are normalised first so one emptied by the pass is itself removed. + For tidying imported/edited docs; non-text empties (dividers, blank-alt images) + preserved. Inline tree handling; immutable. 11 tests; suite 605/605. - 2026-06-07 — Extension: table-of-contents rendering (`toc.sx`). `content/toc-markdown` produces a Markdown bullet list indented by heading level with `[text](#id)` links; `content/toc-html` produces a `