From c5d9e1480dae10031227d575d042b248df93af3d Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 15:29:54 +0000 Subject: [PATCH] content: validation vets list items + table cells element-deep (787/787) validate only checked that list items / table rows-headers ARE lists; a non-string item or non-list/non-string-cell row passed yet crashes asText/ render/find-replace/search. Added ct-all-str?/ct-all-rows? + deepened list/ table branches (guarded against double-reporting). +9 validate tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/content/scoreboard.json | 6 +-- lib/content/scoreboard.md | 4 +- lib/content/tests/validate.sx | 60 +++++++++++++++++++++++++++ lib/content/validate.sx | 76 ++++++++++++++++++++++++++--------- plans/content-on-sx.md | 13 +++++- 5 files changed, 135 insertions(+), 24 deletions(-) diff --git a/lib/content/scoreboard.json b/lib/content/scoreboard.json index 7bb424af..d94a6ce1 100644 --- a/lib/content/scoreboard.json +++ b/lib/content/scoreboard.json @@ -30,7 +30,7 @@ "media": {"pass": 15, "fail": 0}, "data": {"pass": 25, "fail": 0}, "wire": {"pass": 11, "fail": 0}, - "validate": {"pass": 23, "fail": 0}, + "validate": {"pass": 32, "fail": 0}, "store": {"pass": 46, "fail": 0}, "snapshot": {"pass": 20, "fail": 0}, "crdt": {"pass": 34, "fail": 0}, @@ -42,7 +42,7 @@ "md-doc": {"pass": 12, "fail": 0}, "fed": {"pass": 20, "fail": 0} }, - "total_pass": 778, + "total_pass": 787, "total_fail": 0, - "total": 778 + "total": 787 } diff --git a/lib/content/scoreboard.md b/lib/content/scoreboard.md index ca91ab02..8bcc3cc5 100644 --- a/lib/content/scoreboard.md +++ b/lib/content/scoreboard.md @@ -34,7 +34,7 @@ _Generated by `lib/content/conformance.sh`_ | media | 15 | 0 | 15 | | data | 25 | 0 | 25 | | wire | 11 | 0 | 11 | -| validate | 23 | 0 | 23 | +| validate | 32 | 0 | 32 | | store | 46 | 0 | 46 | | snapshot | 20 | 0 | 20 | | crdt | 34 | 0 | 34 | @@ -45,4 +45,4 @@ _Generated by `lib/content/conformance.sh`_ | md-import | 38 | 0 | 38 | | md-doc | 12 | 0 | 12 | | fed | 20 | 0 | 20 | -| **Total** | **778** | **0** | **778** | +| **Total** | **787** | **0** | **787** | diff --git a/lib/content/tests/validate.sx b/lib/content/tests/validate.sx index d01f935c..1a6eb870 100644 --- a/lib/content/tests/validate.sx +++ b/lib/content/tests/validate.sx @@ -5,6 +5,7 @@ (content-bootstrap-blocks!) (content-bootstrap-doc!) (content-bootstrap-section!) +(content-bootstrap-table!) ;; ── a fully valid document ── (define @@ -164,3 +165,62 @@ (content/validate dup-tree))) 1) (content-test "tree dup not valid" (content/valid? dup-tree) false) + +;; ── collection blocks vetted ELEMENT-DEEP (items/cells must be strings) ── +;; A list whose items field is a list but holds a non-string would pass the old +;; "is a list" check yet crash asText/render — now caught. +(content-test + "list non-string item flagged" + (content/issue-kinds + (doc-append (doc-empty "d") (mk-list "l" true (list "a" 5)))) + (list "field")) +(content-test + "list all-string items valid" + (content/valid? + (doc-append (doc-empty "d") (mk-list "l" false (list "a" "b" "c")))) + true) +(content-test + "list empty items valid" + (content/valid? (doc-append (doc-empty "d") (mk-list "l" true (list)))) + true) +;; a malformed-list block reports exactly one element issue (not the is-a-list one) +(content-test + "list non-string item single issue" + (len + (content/validate + (doc-append + (doc-empty "d") + (mk-list "l" true (list 1 2))))) + 1) + +(content-test + "valid table ok" + (content/valid? + (doc-append + (doc-empty "d") + (mk-table "t" (list "H1" "H2") (list (list "a" "b") (list "c" "d"))))) + true) +(content-test + "table empty rows valid" + (content/valid? + (doc-append (doc-empty "d") (mk-table "t" (list "H") (list)))) + true) +(content-test + "table non-list row flagged" + (content/issue-kinds + (doc-append (doc-empty "d") (mk-table "t" (list "H") (list "notarow")))) + (list "field")) +(content-test + "table non-string cell flagged" + (content/issue-kinds + (doc-append + (doc-empty "d") + (mk-table "t" (list "H") (list (list "ok") (list 9))))) + (list "field")) +(content-test + "table non-string header flagged" + (content/issue-kinds + (doc-append + (doc-empty "d") + (mk-table "t" (list "H" 2) (list (list "a" "b"))))) + (list "field")) diff --git a/lib/content/validate.sx b/lib/content/validate.sx index 8880a790..6a10ddc1 100644 --- a/lib/content/validate.sx +++ b/lib/content/validate.sx @@ -6,6 +6,11 @@ ;; Tree detection is inline (class + st-iv-get) so this file needs no section.sx. ;; Dispatch on block type is a validation-boundary concern, not core behaviour. ;; +;; Collection blocks are vetted element-deep: list items must all be strings and +;; table rows must all be lists of strings — exactly what render/asText/ +;; find-replace/search assume — so malformed nested collections are caught at the +;; boundary instead of crashing the render layer downstream. +;; ;; Requires (loaded by harness): block.sx, doc.sx. (define ct-issue (fn (id kind detail) {:id id :detail detail :kind kind})) @@ -36,6 +41,28 @@ (define ct-uniq (fn (xs) (ct-uniq-loop xs (list)))) +;; every element a string? / every row a list of strings? (for collection blocks) +(define + ct-all-str? + (fn + (xs) + (if + (= (len xs) 0) + true + (if (string? (first xs)) (ct-all-str? (rest xs)) false)))) + +(define + ct-all-rows? + (fn + (rows) + (if + (= (len rows) 0) + true + (if + (if (list? (first rows)) (ct-all-str? (first rows)) false) + (ct-all-rows? (rest rows)) + false)))) + ;; ── tree flatten (descends into CtSection children; guards malformed children) ── (define ct-section-block? @@ -136,30 +163,43 @@ "embed provider must be a string"))) ((= t "divider") (list)) ((= t "list") - (append - (ct-field-issue - id - (boolean? (blk-get b "ordered")) - "list ordered must be a boolean") - (ct-field-issue - id - (list? (blk-get b "items")) - "list items must be a list"))) + (let + ((items (blk-get b "items"))) + (append + (ct-field-issue + id + (boolean? (blk-get b "ordered")) + "list ordered must be a boolean") + (append + (ct-field-issue id (list? items) "list items must be a list") + (ct-field-issue + id + (if (list? items) (ct-all-str? items) true) + "list items must all be strings"))))) ((= t "section") (ct-field-issue id (list? (blk-get b "children")) "section children must be a list")) ((= t "table") - (append - (ct-field-issue - id - (list? (blk-get b "headers")) - "table headers must be a list") - (ct-field-issue - id - (list? (blk-get b "rows")) - "table rows must be a list"))) + (let + ((headers (blk-get b "headers")) (rows (blk-get b "rows"))) + (append + (append + (ct-field-issue + id + (list? headers) + "table headers must be a list") + (ct-field-issue + id + (if (list? headers) (ct-all-str? headers) true) + "table headers must all be strings")) + (append + (ct-field-issue id (list? rows) "table rows must be a list") + (ct-field-issue + id + (if (list? rows) (ct-all-rows? rows) true) + "table rows must all be lists of strings"))))) ((= t "callout") (append (ct-field-issue diff --git a/plans/content-on-sx.md b/plans/content-on-sx.md index bbb65212..cd0e14e1 100644 --- a/plans/content-on-sx.md +++ b/plans/content-on-sx.md @@ -19,7 +19,7 @@ injected adapter, not core. ## Status (rolling) -`bash lib/content/conformance.sh` → **778/778** (Phases 1–4 COMPLETE + ~34 extensions, hardened: HTML/SX escaping, Markdown render + import/export incl. tables & frontmatter (full round-trip), CvRDT flat + nested-tree + durable replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees + deep editing + flatten + relative reorder, doc stats + summary + multi-doc index, table + callout + media blocks, HTML page wrapper + SEO page, doc composition + id-remap, portable data + wire serialization, block query + transforms + find/replace, TOC + anchored headings + outline, normalization) +`bash lib/content/conformance.sh` → **787/787** (Phases 1–4 COMPLETE + ~34 extensions, hardened: HTML/SX escaping, Markdown render + import/export incl. tables & frontmatter (full round-trip), CvRDT flat + nested-tree + durable replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees + deep editing + flatten + relative reorder, doc stats + summary + multi-doc index, table + callout + media blocks, HTML page wrapper + SEO page, doc composition + id-remap, portable data + wire serialization, block query + transforms + find/replace, TOC + anchored headings + outline, normalization) ## Ground rules @@ -113,6 +113,17 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─ ## Progress log +- 2026-06-07 — Hardening: validation now vets collection blocks ELEMENT-DEEP. + `validate` previously checked only that list `items` / table `headers`/`rows` + *are lists* — a list holding a non-string, or a table whose rows aren't lists + of strings, passed validation yet crashes asText/render/find-replace/search + (which all assume string items/cells). Added `ct-all-str?`/`ct-all-rows?` and + deepened the list/table branches (guarded so a non-list container reports only + the is-a-list issue, not a spurious element issue). Since validate's job is + guarding imports/federated input, this closes the boundary before the render + layer can fault. +9 validate tests (list non-string item, table non-list row / + non-string cell / non-string header, empties stay valid). 787/787. + - 2026-06-07 — Hardening (tree-wide audit): the public facade `content/find` / `content/has?` were top-level-only (`doc-find`/`doc-has?`), so you could `content/edit` an update/delete to a nested block by id (those ops are