From 7791867bbcc39a693e832ad107f4536ee42e8944 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 02:09:17 +0000 Subject: [PATCH] content: document statistics (stats.sx) + 17 tests (433/433) Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/content/conformance.sh | 3 +- lib/content/scoreboard.json | 5 +-- lib/content/scoreboard.md | 3 +- lib/content/stats.sx | 49 ++++++++++++++++++++++++++ lib/content/tests/stats.sx | 68 +++++++++++++++++++++++++++++++++++++ plans/content-on-sx.md | 8 ++++- 6 files changed, 131 insertions(+), 5 deletions(-) create mode 100644 lib/content/stats.sx create mode 100644 lib/content/tests/stats.sx diff --git a/lib/content/conformance.sh b/lib/content/conformance.sh index 411b9394..ada93017 100755 --- a/lib/content/conformance.sh +++ b/lib/content/conformance.sh @@ -15,7 +15,7 @@ if [ ! -x "$SX_SERVER" ]; then fi fi -SUITES=(block doc render api meta markdown text section validate store snapshot crdt crdt-store sync md-import fed) +SUITES=(block doc render api meta markdown text section stats validate store snapshot crdt crdt-store sync md-import fed) OUT_JSON="lib/content/scoreboard.json" OUT_MD="lib/content/scoreboard.md" @@ -45,6 +45,7 @@ run_suite() { (load "lib/content/meta.sx") (load "lib/content/text.sx") (load "lib/content/section.sx") +(load "lib/content/stats.sx") (load "lib/content/markdown.sx") (load "lib/content/validate.sx") (load "lib/content/store.sx") diff --git a/lib/content/scoreboard.json b/lib/content/scoreboard.json index 12236831..1c41185f 100644 --- a/lib/content/scoreboard.json +++ b/lib/content/scoreboard.json @@ -8,6 +8,7 @@ "markdown": {"pass": 20, "fail": 0}, "text": {"pass": 20, "fail": 0}, "section": {"pass": 25, "fail": 0}, + "stats": {"pass": 17, "fail": 0}, "validate": {"pass": 23, "fail": 0}, "store": {"pass": 29, "fail": 0}, "snapshot": {"pass": 20, "fail": 0}, @@ -17,7 +18,7 @@ "md-import": {"pass": 24, "fail": 0}, "fed": {"pass": 20, "fail": 0} }, - "total_pass": 416, + "total_pass": 433, "total_fail": 0, - "total": 416 + "total": 433 } diff --git a/lib/content/scoreboard.md b/lib/content/scoreboard.md index d311d047..17ab7aec 100644 --- a/lib/content/scoreboard.md +++ b/lib/content/scoreboard.md @@ -12,6 +12,7 @@ _Generated by `lib/content/conformance.sh`_ | markdown | 20 | 0 | 20 | | text | 20 | 0 | 20 | | section | 25 | 0 | 25 | +| stats | 17 | 0 | 17 | | validate | 23 | 0 | 23 | | store | 29 | 0 | 29 | | snapshot | 20 | 0 | 20 | @@ -20,4 +21,4 @@ _Generated by `lib/content/conformance.sh`_ | sync | 14 | 0 | 14 | | md-import | 24 | 0 | 24 | | fed | 20 | 0 | 20 | -| **Total** | **416** | **0** | **416** | +| **Total** | **433** | **0** | **433** | diff --git a/lib/content/stats.sx b/lib/content/stats.sx new file mode 100644 index 00000000..783ced6b --- /dev/null +++ b/lib/content/stats.sx @@ -0,0 +1,49 @@ +;; content-on-sx — document statistics (word/char/block counts, reading time). +;; +;; Counts derive from the plain-text projection (asText, tree-accurate via +;; section recursion) and a tree block count (inline class check, so this needs +;; no section.sx). Reading time uses 200 wpm, rounded up. +;; +;; Requires (loaded by harness): block.sx, doc.sx, text.sx (asText). + +(define + ct-words + (fn (s) (filter (fn (w) (if (= w "") false true)) (split s " ")))) + +(define ct-ceil-div (fn (a b) (quotient (+ a (- b 1)) b))) + +(define + ct-stat-section? + (fn (b) (and (st-instance? b) (= (get b :class) "CtSection")))) +(define + ct-stat-count + (fn + (blocks) + (if + (= (len blocks) 0) + 0 + (let + ((b (first blocks))) + (+ + (+ + 1 + (if + (ct-stat-section? b) + (let + ((ch (st-iv-get b "children"))) + (if (list? ch) (ct-stat-count ch) 0)) + 0)) + (ct-stat-count (rest blocks))))))) + +(define content/word-count (fn (doc) (len (ct-words (asText doc))))) +(define content/char-count (fn (doc) (string-length (asText doc)))) +(define content/block-count (fn (doc) (ct-stat-count (doc-blocks doc)))) +(define + content/reading-minutes + (fn + (doc) + (let + ((w (content/word-count doc))) + (if (= w 0) 0 (ct-ceil-div w 200))))) + +(define content/stats (fn (doc) {:blocks (content/block-count doc) :reading-minutes (content/reading-minutes doc) :words (content/word-count doc) :chars (content/char-count doc)})) diff --git a/lib/content/tests/stats.sx b/lib/content/tests/stats.sx new file mode 100644 index 00000000..73d13590 --- /dev/null +++ b/lib/content/tests/stats.sx @@ -0,0 +1,68 @@ +;; Extension — document statistics. + +(st-bootstrap-classes!) +(content/bootstrap!) +(content-bootstrap-text!) +(content-bootstrap-section!) + +;; ── empty doc ── +(define e (doc-empty "e")) +(content-test "empty words" (content/word-count e) 0) +(content-test "empty chars" (content/char-count e) 0) +(content-test "empty blocks" (content/block-count e) 0) +(content-test "empty reading" (content/reading-minutes e) 0) +(content-test "empty stats" (content/stats e) {:blocks 0 :reading-minutes 0 :words 0 :chars 0}) + +;; ── simple doc ── +(define + d + (doc-append + (doc-append (doc-empty "d") (mk-heading "h" 1 "Hello World")) + (mk-text "p" "one two three"))) +(content-test "word count" (content/word-count d) 5) +(content-test + "char count" + (content/char-count d) + (string-length "Hello World one two three")) +(content-test "block count" (content/block-count d) 2) +(content-test "reading rounds up" (content/reading-minutes d) 1) + +;; ── reading time at 0 vs 1 word ── +(content-test + "one word one minute" + (content/reading-minutes (doc-append (doc-empty "d") (mk-text "p" "hi"))) + 1) + +;; ── block count includes nested section children ── +(define + nested + (doc-append + (doc-empty "d") + (mk-section + "s" + (list (mk-heading "nh" 1 "A") (mk-text "np" "b c"))))) +(content-test + "block count counts section + children" + (content/block-count nested) + 3) +(content-test + "word count descends into section" + (content/word-count nested) + 3) + +;; ── deep nesting ── +(define + deep + (doc-append + (doc-empty "d") + (mk-section + "o" + (list (mk-text "a" "x") (mk-section "i" (list (mk-text "b" "y z"))))))) +(content-test "deep block count" (content/block-count deep) 4) +(content-test "deep word count" (content/word-count deep) 3) + +;; ── stats dict shape ── +(define s (content/stats d)) +(content-test "stats words" (get s :words) 5) +(content-test "stats blocks" (get s :blocks) 2) +(content-test "stats has reading" (get s :reading-minutes) 1) diff --git a/plans/content-on-sx.md b/plans/content-on-sx.md index 4e89ba25..38ee01e0 100644 --- a/plans/content-on-sx.md +++ b/plans/content-on-sx.md @@ -19,7 +19,7 @@ injected adapter, not core. ## Status (rolling) -`bash lib/content/conformance.sh` → **416/416** (Phases 1–4 COMPLETE + 10 extensions: HTML/SX escaping, Markdown render+import, CRDT replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees) +`bash lib/content/conformance.sh` → **433/433** (Phases 1–4 COMPLETE + 11 extensions: HTML/SX escaping, Markdown render+import, CRDT replication, tree-aware validation, snapshot cache, doc metadata, plain-text render, nested block trees, doc stats) ## Ground rules @@ -86,9 +86,15 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─ - [x] document metadata (`meta.sx`: title/slug/tags + Ghost title plumbing) - [x] plain-text render + excerpt (`text.sx`: asText, content/excerpt) - [x] nested block trees (`section.sx`: CtSection container, recursive render, deep-find) +- [x] document statistics (`stats.sx`: word/char/block counts, reading time) ## Progress log +- 2026-06-07 — Extension: document statistics (`stats.sx`). `content/stats` + returns `{:words :chars :blocks :reading-minutes}`; word/char counts derive + from the tree-accurate `asText` projection, block count from an inline tree + walk (no section.sx dep), reading time at 200 wpm rounded up. Counts descend + into nested sections. 17 tests; suite 433/433. - 2026-06-07 — Refinement: tree-aware validation. `validate.sx` now flattens the whole block tree (descending into `CtSection` children, guarding malformed non-list children) so field checks and duplicate-id detection cover nested