From 7836709f910cc548232eb23c7ae4702a1b78a7e5 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 01:25:37 +0000 Subject: [PATCH] content: document validation (validate.sx) + 17 tests (294/294) Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/content/conformance.sh | 3 +- lib/content/scoreboard.json | 5 +- lib/content/scoreboard.md | 3 +- lib/content/tests/validate.sx | 116 +++++++++++++++++++++++++ lib/content/validate.sx | 156 ++++++++++++++++++++++++++++++++++ plans/content-on-sx.md | 10 ++- 6 files changed, 288 insertions(+), 5 deletions(-) create mode 100644 lib/content/tests/validate.sx create mode 100644 lib/content/validate.sx diff --git a/lib/content/conformance.sh b/lib/content/conformance.sh index 768d833c..b74c3590 100755 --- a/lib/content/conformance.sh +++ b/lib/content/conformance.sh @@ -15,7 +15,7 @@ if [ ! -x "$SX_SERVER" ]; then fi fi -SUITES=(block doc render api markdown store crdt crdt-store sync fed) +SUITES=(block doc render api markdown validate store crdt crdt-store sync fed) OUT_JSON="lib/content/scoreboard.json" OUT_MD="lib/content/scoreboard.md" @@ -43,6 +43,7 @@ run_suite() { (load "lib/content/render.sx") (load "lib/content/api.sx") (load "lib/content/markdown.sx") +(load "lib/content/validate.sx") (load "lib/content/store.sx") (load "lib/content/crdt.sx") (load "lib/content/crdt-store.sx") diff --git a/lib/content/scoreboard.json b/lib/content/scoreboard.json index ba8f3941..6fce4919 100644 --- a/lib/content/scoreboard.json +++ b/lib/content/scoreboard.json @@ -5,13 +5,14 @@ "render": {"pass": 42, "fail": 0}, "api": {"pass": 26, "fail": 0}, "markdown": {"pass": 20, "fail": 0}, + "validate": {"pass": 17, "fail": 0}, "store": {"pass": 29, "fail": 0}, "crdt": {"pass": 34, "fail": 0}, "crdt-store": {"pass": 14, "fail": 0}, "sync": {"pass": 14, "fail": 0}, "fed": {"pass": 20, "fail": 0} }, - "total_pass": 277, + "total_pass": 294, "total_fail": 0, - "total": 277 + "total": 294 } diff --git a/lib/content/scoreboard.md b/lib/content/scoreboard.md index 5249a8bb..2a3fcb5c 100644 --- a/lib/content/scoreboard.md +++ b/lib/content/scoreboard.md @@ -9,9 +9,10 @@ _Generated by `lib/content/conformance.sh`_ | render | 42 | 0 | 42 | | api | 26 | 0 | 26 | | markdown | 20 | 0 | 20 | +| validate | 17 | 0 | 17 | | store | 29 | 0 | 29 | | crdt | 34 | 0 | 34 | | crdt-store | 14 | 0 | 14 | | sync | 14 | 0 | 14 | | fed | 20 | 0 | 20 | -| **Total** | **277** | **0** | **277** | +| **Total** | **294** | **0** | **294** | diff --git a/lib/content/tests/validate.sx b/lib/content/tests/validate.sx new file mode 100644 index 00000000..0cd113be --- /dev/null +++ b/lib/content/tests/validate.sx @@ -0,0 +1,116 @@ +;; Extension — document integrity validation. + +(st-bootstrap-classes!) +(content-bootstrap-blocks!) +(content-bootstrap-doc!) + +;; ── a fully valid document ── +(define + good + (doc-append + (doc-append + (doc-append (doc-empty "d") (mk-heading "h" 1 "Title")) + (mk-text "p" "Body")) + (mk-list "l" true (list "a" "b")))) +(content-test "valid doc is valid" (content/valid? good) true) +(content-test "valid doc no issues" (content/validate good) (list)) + +;; ── bad field types ── +(content-test + "heading bad level" + (content/issue-kinds + (doc-append (doc-empty "d") (mk-heading "h" "notnum" "T"))) + (list "field")) +(content-test + "text bad type" + (content/issue-kinds + (doc-append (doc-empty "d") (mk-text "p" 42))) + (list "field")) +(content-test + "image two bad attrs" + (len + (content/validate + (doc-append (doc-empty "d") (mk-image "i" 1 2)))) + 2) +(content-test + "list bad ordered + items" + (len + (content/validate + (doc-append (doc-empty "d") (mk-list "l" "yes" "nope")))) + 2) +(content-test + "valid image ok" + (content/valid? + (doc-append (doc-empty "d") (mk-image "i" "/a.png" "alt"))) + true) + +;; ── id checks ── +(content-test + "blank id" + (content/issue-kinds (doc-append (doc-empty "d") (mk-text "" "x"))) + (list "id")) +(content-test + "nil id" + (content/issue-kinds + (doc-append (doc-empty "d") (blk-set (mk-text "x" "y") "id" nil))) + (list "id")) + +;; ── duplicate ids ── +(define + dup + (doc-append + (doc-append (doc-empty "d") (mk-text "x" "a")) + (mk-text "x" "b"))) +(content-test + "duplicate id detected" + (content/issue-kinds dup) + (list "duplicate")) +(content-test + "duplicate reported once" + (len + (filter (fn (i) (= (get i :kind) "duplicate")) (content/validate dup))) + 1) +(content-test "duplicate not valid" (content/valid? dup) false) + +;; ── unknown block type (raw base instance) ── +(define raw (st-iv-set! (st-make-instance "CtBlock") "id" "z")) +(content-test + "unknown type flagged" + (content/issue-kinds (doc-append (doc-empty "d") raw)) + (list "type")) + +;; ── issue carries id + detail ── +(define + iss + (first + (content/validate + (doc-append (doc-empty "d") (mk-text "bad" 9))))) +(content-test "issue has id" (get iss :id) "bad") +(content-test "issue has detail" (string? (get iss :detail)) true) + +;; ── multiple issues across blocks accumulate ── +(define + messy + (doc-append + (doc-append (doc-empty "d") (mk-heading "h" "x" "ok")) + (mk-text "" 5))) +(content-test + "issues accumulate" + (> (len (content/validate messy)) 2) + true) + +;; ── all block types valid when well-formed ── +(define + allgood + (doc-append + (doc-append + (doc-append + (doc-append + (doc-append + (doc-append (doc-empty "d") (mk-code "c" "sx" "(+ 1 2)")) + (mk-quote "q" "Ada" "to err")) + (mk-embed "e" "https://v" "vimeo")) + (mk-divider "dv")) + (mk-heading "hh" 2 "H")) + (mk-text "tt" "T"))) +(content-test "all well-formed types valid" (content/valid? allgood) true) diff --git a/lib/content/validate.sx b/lib/content/validate.sx new file mode 100644 index 00000000..3a37cefc --- /dev/null +++ b/lib/content/validate.sx @@ -0,0 +1,156 @@ +;; content-on-sx — document integrity validation. +;; +;; Guards imports, edits and federated input: checks each block's id and the +;; required fields/types for its kind, plus document-level duplicate ids. Returns +;; a list of issue dicts {:id :kind :detail}; an empty list means valid. Dispatch +;; on block type is a validation-boundary concern, not core behaviour. +;; +;; Requires (loaded by harness): block.sx, doc.sx. + +(define ct-issue (fn (id kind detail) {:id id :detail detail :kind kind})) + +(define + ct-flatmap + (fn + (f xs) + (if + (= (len xs) 0) + (list) + (append (f (first xs)) (ct-flatmap f (rest xs)))))) + +(define ct-count-in (fn (x xs) (len (filter (fn (y) (= y x)) xs)))) + +;; dedup, order-preserving (keep first occurrence) +(define + ct-uniq-loop + (fn + (xs seen) + (if + (= (len xs) 0) + (reverse seen) + (if + (> (ct-count-in (first xs) seen) 0) + (ct-uniq-loop (rest xs) seen) + (ct-uniq-loop (rest xs) (cons (first xs) seen)))))) + +(define ct-uniq (fn (xs) (ct-uniq-loop xs (list)))) + +;; ── id checks ── +(define + content/-id-issues + (fn + (b) + (let + ((id (blk-id b))) + (if + (and (string? id) (> (len id) 0)) + (list) + (list (ct-issue id "id" "block id must be a non-empty string")))))) + +(define + ct-field-issue + (fn (id ok? what) (if ok? (list) (list (ct-issue id "field" what))))) + +;; ── per-type field checks ── +(define + content/-field-issues + (fn + (b) + (let + ((t (blk-type b)) (id (blk-id b))) + (cond + ((= t "heading") + (append + (ct-field-issue + id + (number? (blk-get b "level")) + "heading level must be a number") + (ct-field-issue + id + (string? (blk-get b "text")) + "heading text must be a string"))) + ((= t "text") + (ct-field-issue + id + (string? (blk-get b "text")) + "text must be a string")) + ((= t "code") + (append + (ct-field-issue + id + (string? (blk-get b "language")) + "code language must be a string") + (ct-field-issue + id + (string? (blk-get b "text")) + "code text must be a string"))) + ((= t "quote") + (ct-field-issue + id + (string? (blk-get b "text")) + "quote text must be a string")) + ((= t "image") + (append + (ct-field-issue + id + (string? (blk-get b "src")) + "image src must be a string") + (ct-field-issue + id + (string? (blk-get b "alt")) + "image alt must be a string"))) + ((= t "embed") + (append + (ct-field-issue + id + (string? (blk-get b "url")) + "embed url must be a string") + (ct-field-issue + id + (string? (blk-get b "provider")) + "embed provider must be a string"))) + ((= t "divider") (list)) + ((= t "list") + (append + (ct-field-issue + id + (boolean? (blk-get b "ordered")) + "list ordered must be a boolean") + (ct-field-issue + id + (list? (blk-get b "items")) + "list items must be a list"))) + (else (list (ct-issue id "type" (str "unknown block type: " t)))))))) + +(define + content/-block-issues + (fn (b) (append (content/-id-issues b) (content/-field-issues b)))) + +;; ── document-level: duplicate ids ── +(define + content/-dup-issues + (fn + (doc) + (let + ((ids (doc-ids doc))) + (map + (fn (id) (ct-issue id "duplicate" (str "duplicate block id: " id))) + (ct-uniq + (filter (fn (id) (> (ct-count-in id ids) 1)) ids)))))) + +;; ── public ── +(define + content/validate + (fn + (doc) + (append + (content/-dup-issues doc) + (ct-flatmap content/-block-issues (doc-blocks doc))))) + +(define + content/valid? + (fn (doc) (= (len (content/validate doc)) 0))) + +(define + content/issue-kinds + (fn (doc) (map (fn (i) (get i :kind)) (content/validate doc)))) diff --git a/plans/content-on-sx.md b/plans/content-on-sx.md index 8c526778..bc103f1f 100644 --- a/plans/content-on-sx.md +++ b/plans/content-on-sx.md @@ -19,7 +19,7 @@ injected adapter, not core. ## Status (rolling) -`bash lib/content/conformance.sh` → **277/277** (Phases 1–4 COMPLETE + extensions: HTML/SX escaping, Markdown, durable CRDT replication) +`bash lib/content/conformance.sh` → **294/294** (Phases 1–4 COMPLETE + extensions: HTML/SX escaping, Markdown, durable CRDT replication, validation) ## Ground rules @@ -80,9 +80,17 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─ - [x] asSx wire string-escaping (`String>>sxEscaped`: \ and " in SX literals) - [x] Markdown render mode (`asMarkdown:` / `content/render doc "md"`) - [x] durable CRDT replication (`crdt-store.sx`: ops on persist, replay + converge) +- [x] document validation (`validate.sx`: ids, per-type fields, duplicate ids) ## Progress log +- 2026-06-07 — Extension: document validation (`validate.sx`). `content/validate` + returns issue dicts `{:id :kind :detail}` (empty = valid); `content/valid?` + and `content/issue-kinds` convenience. Checks block id (non-empty string), + per-type required fields/types (heading level number, image src/alt strings, + list ordered boolean + items list, etc.), unknown block types, and + document-level duplicate ids. Guards imports/edits/federated input. 17 tests; + suite 294/294. - 2026-06-07 — Extension: durable CRDT replication (`crdt-store.sx`), uniting Phase 2 (persist) + Phase 3 (CvRDT). Each replica appends its CRDT ops to its own stream (`crdt::`); `crdt/replay` folds one log into a state,