From 2c1d8c8064ee87cbe12b62dd5f63f5723998d3a2 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 00:53:06 +0000 Subject: [PATCH] content: HTML escaping at render boundary (String>>htmlEscaped) + 8 tests (238/238) Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/content/render.sx | 26 +++++++++++++++--------- lib/content/scoreboard.json | 6 +++--- lib/content/scoreboard.md | 4 ++-- lib/content/tests/render.sx | 40 ++++++++++++++++++++++++++++++++++++- plans/content-on-sx.md | 14 ++++++++++++- 5 files changed, 74 insertions(+), 16 deletions(-) diff --git a/lib/content/render.sx b/lib/content/render.sx index e939ebb7..124aeffb 100644 --- a/lib/content/render.sx +++ b/lib/content/render.sx @@ -6,40 +6,48 @@ ;; children's renderings, so (asHTML doc) / (asSx doc) are pure polymorphic ;; sends with no type dispatch in the SX layer. ;; -;; NOTE: no HTML escaping yet — text is emitted verbatim. Escaping is a boundary -;; concern to add before any untrusted content reaches render. +;; HTML escaping happens HERE, at the boundary: text and attribute values are +;; passed through String>>htmlEscaped (& < > "), so untrusted content cannot +;; break out of its element. asSx wire output is not yet string-escaped (next). (define content-bootstrap-render! (fn () (begin + (ct-def-method! + "String" + "htmlEscaped" + "htmlEscaped | out i n c | out := ''. n := self size. i := 1. [i <= n] whileTrue: [c := self at: i. (c = $&) ifTrue: [out := out , '&'] ifFalse: [(c = $<) ifTrue: [out := out , '<'] ifFalse: [(c = $>) ifTrue: [out := out , '>'] ifFalse: [(c = $\") ifTrue: [out := out , '"'] ifFalse: [out := out , c asString]]]]. i := i + 1]. ^ out") (ct-def-method! "CtHeading" "asHTML" - "asHTML | t | t := level printString. ^ '' , text , ''") - (ct-def-method! "CtText" "asHTML" "asHTML ^ '

' , text , '

'") + "asHTML | t | t := level printString. ^ '' , text htmlEscaped , ''") + (ct-def-method! + "CtText" + "asHTML" + "asHTML ^ '

' , text htmlEscaped , '

'") (ct-def-method! "CtCode" "asHTML" - "asHTML ^ '
' , text , '
'") + "asHTML ^ '
' , text htmlEscaped , '
'") (ct-def-method! "CtQuote" "asHTML" - "asHTML ^ '
' , text , '
'") + "asHTML ^ '
' , text htmlEscaped , '
'") (ct-def-method! "CtImage" "asHTML" - "asHTML ^ '\"''") + "asHTML ^ '\"''") (ct-def-method! "CtEmbed" "asHTML" - "asHTML ^ ''") + "asHTML ^ ''") (ct-def-method! "CtDivider" "asHTML" "asHTML ^ '
'") (ct-def-method! "CtList" "asHTML" - "asHTML | tag | tag := ordered ifTrue: ['ol'] ifFalse: ['ul']. ^ '<' , tag , '>' , (items inject: '' into: [:a :x | a , '
  • ' , x , '
  • ']) , ''") + "asHTML | tag | tag := ordered ifTrue: ['ol'] ifFalse: ['ul']. ^ '<' , tag , '>' , (items inject: '' into: [:a :x | a , '
  • ' , x htmlEscaped , '
  • ']) , ''") (ct-def-method! "CtDoc" "asHTML" diff --git a/lib/content/scoreboard.json b/lib/content/scoreboard.json index fcd1eda0..5f5b647a 100644 --- a/lib/content/scoreboard.json +++ b/lib/content/scoreboard.json @@ -2,14 +2,14 @@ "suites": { "block": {"pass": 38, "fail": 0}, "doc": {"pass": 40, "fail": 0}, - "render": {"pass": 29, "fail": 0}, + "render": {"pass": 37, "fail": 0}, "api": {"pass": 26, "fail": 0}, "store": {"pass": 29, "fail": 0}, "crdt": {"pass": 34, "fail": 0}, "sync": {"pass": 14, "fail": 0}, "fed": {"pass": 20, "fail": 0} }, - "total_pass": 230, + "total_pass": 238, "total_fail": 0, - "total": 230 + "total": 238 } diff --git a/lib/content/scoreboard.md b/lib/content/scoreboard.md index a84b6913..8b66364d 100644 --- a/lib/content/scoreboard.md +++ b/lib/content/scoreboard.md @@ -6,10 +6,10 @@ _Generated by `lib/content/conformance.sh`_ |-------|-----:|-----:|------:| | block | 38 | 0 | 38 | | doc | 40 | 0 | 40 | -| render | 29 | 0 | 29 | +| render | 37 | 0 | 37 | | api | 26 | 0 | 26 | | store | 29 | 0 | 29 | | crdt | 34 | 0 | 34 | | sync | 14 | 0 | 14 | | fed | 20 | 0 | 20 | -| **Total** | **230** | **0** | **230** | +| **Total** | **238** | **0** | **238** | diff --git a/lib/content/tests/render.sx b/lib/content/tests/render.sx index f7ade85a..b81d0a72 100644 --- a/lib/content/tests/render.sx +++ b/lib/content/tests/render.sx @@ -1,5 +1,5 @@ ;; Phase 1 — render boundary. asHTML / asSx are polymorphic message sends on -;; blocks and the document. +;; blocks and the document. HTML escaping happens at the boundary. (st-bootstrap-classes!) (content-bootstrap-blocks!) @@ -71,3 +71,41 @@ "render after delete" (asHTML (doc-delete d "p")) "

    Title


    ") + +;; ── HTML escaping at the boundary ── +(define xh (mk-heading "xh" 2 "A < B & \"C\"")) +(define xp (mk-text "xp" "")) +(define xi (mk-image "xi" "/a.png?x=1&y=2" "tag ")) +(define xl (mk-list "xl" false (list "a<1" "b&2"))) +(content-test + "escape heading text" + (asHTML xh) + "

    A < B & "C"

    ") +(content-test + "escape paragraph" + (asHTML xp) + "

    <script>alert(1)</script>

    ") +(content-test + "escape image attrs" + (asHTML xi) + "\"tag") +(content-test + "escape list items" + (asHTML xl) + "
    • a<1
    • b&2
    ") +(content-test + "escape ampersand once" + (asHTML (mk-text "amp" "a & b")) + "

    a & b

    ") +(content-test + "escape in document" + (asHTML (doc-append (doc-empty "e") xp)) + "

    <script>alert(1)</script>

    ") +(content-test + "no over-escape plain" + (asHTML (mk-text "plain" "hello world")) + "

    hello world

    ") +(content-test + "escape code body" + (asHTML (mk-code "xc" "html" "
    &
    ")) + "
    <div> & </div>
    ") diff --git a/plans/content-on-sx.md b/plans/content-on-sx.md index 5be0b222..9413fb09 100644 --- a/plans/content-on-sx.md +++ b/plans/content-on-sx.md @@ -19,7 +19,7 @@ injected adapter, not core. ## Status (rolling) -`bash lib/content/conformance.sh` → **230/230** (Phases 1–4 COMPLETE: blocks, doc, render, api, persist op log, CRDT merge, Ghost sync, federation) +`bash lib/content/conformance.sh` → **238/238** (Phases 1–4 COMPLETE + extensions: HTML escaping) ## Ground rules @@ -75,8 +75,20 @@ lib/content/api.sx ── (content/edit) (content/render) (content/history) ─ - [x] federated documents (peer-authored blocks) — trust-gated stub - [x] tests: round-trip import/export, conflict on concurrent external edit +## Extensions (post-roadmap) +- [x] HTML escaping at the render boundary (`String>>htmlEscaped`: & < > ") +- [ ] asSx wire string-escaping (" and \ in SX string literals) + ## Progress log +- 2026-06-07 — Extension: HTML escaping at the render boundary. Added + `String>>htmlEscaped` (recursive char walk escaping & < > ", order-safe so & + isn't double-escaped) and routed every `asHTML` text/attr through it — heading, + text, code body + language, quote, image src/alt, embed url, list items. + Render stays fully polymorphic in Smalltalk; escaping lives at the boundary. + +8 render tests (incl. `