content: document normalization (normalize.sx) + 11 tests (605/605)
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 19s

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 04:11:48 +00:00
parent fe2475c49d
commit 49af154524
6 changed files with 162 additions and 5 deletions

View File

@@ -15,7 +15,7 @@ if [ ! -x "$SX_SERVER" ]; then
fi
fi
SUITES=(block doc render api meta page page-full markdown text section compose tree-edit clone query toc transform stats table data wire validate store snapshot crdt crdt-store sync md-import md-doc fed)
SUITES=(block doc render api meta page page-full markdown text section compose tree-edit clone query toc transform normalize stats table data wire validate store snapshot crdt crdt-store sync md-import md-doc fed)
OUT_JSON="lib/content/scoreboard.json"
OUT_MD="lib/content/scoreboard.md"
@@ -51,6 +51,7 @@ run_suite() {
(load "lib/content/query.sx")
(load "lib/content/toc.sx")
(load "lib/content/transform.sx")
(load "lib/content/normalize.sx")
(load "lib/content/stats.sx")
(load "lib/content/table.sx")
(load "lib/content/data.sx")

49
lib/content/normalize.sx Normal file
View File

@@ -0,0 +1,49 @@
;; content-on-sx — document normalization.
;;
;; A cleanup pass: drop empty text blocks and empty sections across the tree.
;; Sections are normalised first, so a section that becomes empty (all children
;; dropped) is itself dropped. For tidying imported/edited documents. Immutable.
;; Inline tree handling (no section.sx dep).
;;
;; Requires (loaded by harness): block.sx, doc.sx.
(define
norm-section?
(fn (b) (and (st-instance? b) (= (get b :class) "CtSection"))))
(define
norm-empty-text?
(fn (b) (and (= (blk-type b) "text") (= (str (blk-get b "text")) ""))))
(define
norm-empty-section?
(fn
(b)
(and
(norm-section? b)
(let
((ch (st-iv-get b "children")))
(or (= ch nil) (= (len ch) 0))))))
(define
norm-recurse
(fn
(b)
(if
(norm-section? b)
(let
((ch (st-iv-get b "children")))
(if (list? ch) (st-iv-set! b "children" (norm-blocks ch)) b))
b)))
(define
norm-keep?
(fn
(b)
(if (norm-empty-text? b) false (if (norm-empty-section? b) false true))))
(define
norm-blocks
(fn (blocks) (filter norm-keep? (map norm-recurse blocks))))
(define
content/normalize
(fn (doc) (doc-with-blocks doc (norm-blocks (doc-blocks doc)))))

View File

@@ -16,6 +16,7 @@
"query": {"pass": 13, "fail": 0},
"toc": {"pass": 8, "fail": 0},
"transform": {"pass": 12, "fail": 0},
"normalize": {"pass": 11, "fail": 0},
"stats": {"pass": 17, "fail": 0},
"table": {"pass": 15, "fail": 0},
"data": {"pass": 21, "fail": 0},
@@ -30,7 +31,7 @@
"md-doc": {"pass": 12, "fail": 0},
"fed": {"pass": 20, "fail": 0}
},
"total_pass": 594,
"total_pass": 605,
"total_fail": 0,
"total": 594
"total": 605
}

View File

@@ -20,6 +20,7 @@ _Generated by `lib/content/conformance.sh`_
| query | 13 | 0 | 13 |
| toc | 8 | 0 | 8 |
| transform | 12 | 0 | 12 |
| normalize | 11 | 0 | 11 |
| stats | 17 | 0 | 17 |
| table | 15 | 0 | 15 |
| data | 21 | 0 | 21 |
@@ -33,4 +34,4 @@ _Generated by `lib/content/conformance.sh`_
| md-import | 38 | 0 | 38 |
| md-doc | 12 | 0 | 12 |
| fed | 20 | 0 | 20 |
| **Total** | **594** | **0** | **594** |
| **Total** | **605** | **0** | **605** |

View File

@@ -0,0 +1,99 @@
;; Extension — document normalization (drop empty text blocks + empty sections).
(st-bootstrap-classes!)
(content/bootstrap!)
(content-bootstrap-section!)
;; ── drop empty text blocks ──
(define
d
(doc-append
(doc-append
(doc-append (doc-empty "d") (mk-heading "h" 1 "Hi"))
(mk-text "empty" ""))
(mk-text "p" "Body")))
(content-test
"drops empty text"
(doc-ids (content/normalize d))
(list "h" "p"))
(content-test "normalize immutable" (doc-ids d) (list "h" "empty" "p"))
(content-test
"keeps non-empty text"
(str (blk-send (doc-find (content/normalize d) "p") "text"))
"Body")
;; ── drop empty sections ──
(define
d2
(doc-append
(doc-append (doc-empty "d") (mk-text "p" "x"))
(mk-section "empty-sec" (list))))
(content-test
"drops empty section"
(doc-ids (content/normalize d2))
(list "p"))
;; ── section that becomes empty (all children dropped) is itself dropped ──
(define
d3
(doc-append
(doc-empty "d")
(mk-section "s" (list (mk-text "e1" "") (mk-text "e2" "")))))
(content-test
"section emptied then dropped"
(doc-ids (content/normalize d3))
(list))
;; ── section with some content keeps surviving children ──
(define
d4
(doc-append
(doc-empty "d")
(mk-section
"s"
(list (mk-text "e" "") (mk-heading "k" 2 "Keep")))))
(define n4 (content/normalize d4))
(content-test "section kept" (doc-ids n4) (list "s"))
(content-test
"empty child dropped, real kept"
(doc-tree-ids n4)
(list "s" "k"))
;; ── nested: empty deep section removed, content bubbles correctly ──
(define
d5
(doc-append
(doc-empty "d")
(mk-section
"outer"
(list (mk-text "a" "A") (mk-section "inner" (list (mk-text "x" "")))))))
(content-test
"nested empty inner dropped"
(doc-tree-ids (content/normalize d5))
(list "outer" "a"))
;; ── already-clean doc unchanged ──
(define
clean
(doc-append
(doc-append (doc-empty "d") (mk-heading "h" 1 "T"))
(mk-text "p" "B")))
(content-test
"clean doc unchanged ids"
(doc-ids (content/normalize clean))
(list "h" "p"))
(content-test
"clean doc render"
(asHTML (content/normalize clean))
(asHTML clean))
;; ── non-text empties preserved (divider, image with empty alt) ──
(define
d6
(doc-append
(doc-append (doc-empty "d") (mk-divider "dv"))
(mk-image "i" "/a.png" "")))
(content-test
"divider + image kept"
(doc-ids (content/normalize d6))
(list "dv" "i"))

View File

@@ -19,7 +19,7 @@ injected adapter, not core.
## Status (rolling)
`bash lib/content/conformance.sh`**594/594** (Phases 14 COMPLETE + extensions: HTML/SX escaping, Markdown render + import/export incl. tables & frontmatter (full round-trip), CRDT replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees + deep tree editing, doc stats, table block, HTML page wrapper + SEO page, doc composition + id-remap, portable data + wire serialization, block query + transforms, TOC rendering)
`bash lib/content/conformance.sh`**605/605** (Phases 14 COMPLETE + extensions: HTML/SX escaping, Markdown render + import/export incl. tables & frontmatter (full round-trip), CRDT replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees + deep tree editing, doc stats, table block, HTML page wrapper + SEO page, doc composition + id-remap, portable data + wire serialization, block query + transforms, TOC rendering, normalization)
## Ground rules
@@ -97,11 +97,17 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─
- [x] block query + TOC (`query.sx`: content/select/select-type/count-type/headings)
- [x] block transforms (`transform.sx`: content/map-blocks/map-type/set-field-on)
- [x] TOC rendering (`toc.sx`: content/toc-markdown + toc-html from headings)
- [x] document normalization (`normalize.sx`: content/normalize, drop empty blocks/sections)
- [x] portable data serialization (`data.sx`: content/to-data + from-data, round-trips tree)
- [x] wire serialization (`wire.sx`: content/to-wire + from-wire, SX-text on the wire)
## Progress log
- 2026-06-07 — Extension: document normalization (`normalize.sx`).
`content/normalize` drops empty text blocks and empty sections tree-wide;
sections are normalised first so one emptied by the pass is itself removed.
For tidying imported/edited docs; non-text empties (dividers, blank-alt images)
preserved. Inline tree handling; immutable. 11 tests; suite 605/605.
- 2026-06-07 — Extension: table-of-contents rendering (`toc.sx`).
`content/toc-markdown` produces a Markdown bullet list indented by heading
level with `[text](#id)` links; `content/toc-html` produces a `<ul>` of escaped