host: universal content-address (CID) on every post

Every object (content/type/relation post) now carries a stable :cid = hash of its
canonical, key-sorted content. The runtime has no hash primitive, so host/blog--canon
(recursive, sorts keys -> identical across processes regardless of dict insertion order)
and a tail-recursive double-hash (host/blog--hash-go / host/blog--cid-of) are built in SX.
The slug (a name) and any prior :cid are excluded -> the CID hashes content only.
git-shaped: slug = mutable name -> CID = immutable content identity.

Single choke point host/blog--write! stamps the CID on every record write; routed all
three write sites (put!, set-schema!, seed-rel!) through it. Accessors host/blog-cid and
host/blog-by-cid (reverse lookup). +6 conformance tests (blog suite 134/134). Plan: new
'Content-addressability is universal' section (CID model, git-shape, federation: types
flow across fed-sx as shared content-addressed vocabulary; structure/behaviour trust-split).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-30 10:14:44 +00:00
parent 99d8527d30
commit ad86f3051e
3 changed files with 103 additions and 4 deletions

View File

@@ -22,6 +22,41 @@
(define host/blog-use-store! (fn (b) (set! host/blog-store b)))
(define host/blog--key (fn (slug) (str "blog:" slug)))
;; ── content-addressing: a universal CID over the canonical form ─────
;; Every object (content/type/relation/constraint post) carries a stable :cid =
;; hash of its CONTENT. The runtime has no hash primitive, so the canon serializer
;; and a tail-recursive double-hash are built here. Canon SORTS keys, so the CID is
;; identical across processes regardless of dict insertion / hash-seed order. The
;; :slug (a mutable name) and any prior :cid are excluded — the CID hashes content
;; only. git-shaped: slug = mutable name -> CID = immutable content identity.
(define host/blog--canon
(fn (v)
(let ((t (type-of v)))
(cond
((= t "dict")
(str "{" (join "|"
(map (fn (k) (str k "=" (host/blog--canon (get v k))))
(filter (fn (k) (and (not (= k "slug")) (not (= k "cid"))))
(sort (keys v))))) "}"))
((= t "list") (str "[" (join "|" (map host/blog--canon v)) "]"))
((= t "nil") "~")
(else (str v))))))
(define host/blog--hash-go
(fn (s i n h1 h2)
(if (>= i n)
(str h1 "-" h2)
(let ((c (char-code (substr s i 1))))
(host/blog--hash-go s (+ i 1) n
(mod (+ (* h1 131) c) 1000000007)
(mod (+ (* h2 137) c) 998244353))))))
(define host/blog--cid-of
(fn (rec) (let ((s (host/blog--canon rec))) (str "z" (host/blog--hash-go s 0 (len s) 7 11)))))
;; the single choke point for every record write: stamps the content CID, then puts.
(define host/blog--write!
(fn (slug rec)
(persist/backend-kv-put host/blog-store (host/blog--key slug)
(merge rec {:cid (host/blog--cid-of rec)}))))
;; slug from a title: lowercase, words joined by '-'. (Punctuation kept simple.)
(define host/blog-slugify
(fn (title)
@@ -38,7 +73,7 @@
(define host/blog-put!
(fn (slug title sx-content status)
(let ((prev (host/blog-get slug)))
(persist/backend-kv-put host/blog-store (host/blog--key slug)
(host/blog--write! slug
(merge (if prev prev {})
{:slug slug :title title :sx-content sx-content :status status})))))
(define host/blog-delete!
@@ -63,6 +98,15 @@
{:slug slug :title (get r :title) :status (get r :status)}))
(host/blog-slugs))))
;; a post's content CID — its global, location-independent identity (nil if unknown).
(define host/blog-cid (fn (slug) (get (host/blog-get slug) :cid)))
;; reverse lookup: a slug whose record has this CID (nil if none). Scan; not for renders.
(define host/blog-by-cid
(fn (cid)
(reduce
(fn (acc slug) (if acc acc (if (= (host/blog-cid slug) cid) slug acc)))
nil (host/blog-slugs))))
;; ── render ──────────────────────────────────────────────────────────
;; A post's sx_content is SX element markup -> HTML via render-page (which supplies
;; the server env so components resolve + keyword attrs are kept).
@@ -358,8 +402,7 @@
(fn (slug schema)
(let ((r (host/blog-get slug)))
(when r
(persist/backend-kv-put host/blog-store (host/blog--key slug)
(merge r {:schema schema}))))))
(host/blog--write! slug (merge r {:schema schema}))))))
;; every element tag in a parsed content tree, recursively (the heads of nested
;; lists) — so "requires h1" matches an h1 even inside an article/section wrapper.
@@ -397,7 +440,7 @@
(fn (slug title symmetric label inverse-label)
(begin
(when (not (host/blog-exists? slug))
(persist/backend-kv-put host/blog-store (host/blog--key slug)
(host/blog--write! slug
{:slug slug :title title
:sx-content (str "(article (h1 \"" title "\") (p \"A relation — posts link to each other through it. Its symmetry and labels live on this post.\"))")
:status "published"

View File

@@ -659,6 +659,30 @@
(contains? (dream-resp-body (host-bl-oapp (host-bl-req "/open-post/"))) "<p>o</p>")
true)
;; ── content-addressing: every object carries a stable CID ───────────
;; A CID is the hash of the canonical (key-sorted) content; the slug (a name) and
;; any prior :cid are excluded. Same content -> same CID, across slugs and processes.
(host/blog-use-store! (persist/open))
(host/blog-put! "cid-a" "Same Body" "(p \"same\")" "published")
(host/blog-put! "cid-b" "Same Body" "(p \"same\")" "published")
(host-bl-test "put! stamps a non-nil CID"
(and (not (nil? (host/blog-cid "cid-a"))) (> (len (host/blog-cid "cid-a")) 1)) true)
(host-bl-test "content-addressed: identical content -> identical CID (slug excluded)"
(= (host/blog-cid "cid-a") (host/blog-cid "cid-b")) true)
(host-bl-test "CID changes when content changes"
(let ((before (host/blog-cid "cid-a")))
(host/blog-put! "cid-a" "Same Body" "(p \"different now\")" "published")
(not (= before (host/blog-cid "cid-a"))))
true)
(host-bl-test "canon excludes :slug and :cid"
(= (host/blog--canon {:slug "x" :cid "old" :title "T"})
(host/blog--canon {:title "T"}))
true)
(host-bl-test "by-cid reverse lookup finds a slug with that CID"
(not (nil? (host/blog-by-cid (host/blog-cid "cid-b")))) true)
(host-bl-test "by-cid of an unknown CID is nil"
(host/blog-by-cid "znope-nope") nil)
(define
host-bl-tests-run!
(fn ()

View File

@@ -11,6 +11,38 @@ itself in its own graph.
Supersedes the hardcoded `:candidates "types"/"tags"/"all"` field of `host/blog-rel-kinds`.
## Content-addressability is universal (foundational)
**Every object carries a content-address (CID) — content-posts, type-posts, relation-posts,
constraint-posts, all of them.** A CID is the hash of the object's *canonical* form: a recursive,
**key-sorted** serialization (so insertion order, and any process-seed-dependent dict ordering, is
irrelevant — identical content always yields an identical CID). The runtime has no hash primitive,
so the canon serializer + a tail-recursive double-hash are built in SX (`host/blog--canon`,
`host/blog--cid-of`); the slug is excluded from the hash (it's a *name*, not content).
The model is **git-shaped**: the **slug is a mutable name → CID** (a branch pointing at a commit);
the **CID is the immutable content identity** (the commit). Editing a post mints a new CID; the slug
follows. Type evolution is the same — a type *version* is content-addressed, instances reference the
version they were created against. Two objects with identical content *are* the same object (same
CID) — correct content-addressing semantics.
**Why it's foundational (federation).** A CID is a **global, location-independent identity**, so:
- **Types flow across `fed-sx`.** The same type *definition* on any node has the same CID → a
**shared, content-addressed vocabulary**. Federated *instances* reference type CIDs, so a receiving
node can *interpret* them. This is linked-data/RDF realised on the post graph, and it generalises
ActivityPub itself: AP has a *fixed* type vocabulary (Note/Article/Person, Create/Follow/Like) —
the metamodel makes that vocabulary **extensible and user-defined**.
- **Structure / behaviour trust-split** (the federation boundary): type **structure** (schema,
relations, signatures) is declarative and federates *freely* — sharing a definition is sharing a
hash. **Behaviour** (Slice 9 lifecycles/effects) does **not** federate naively: you never run a
remote node's lifecycle with *your* effect primitives (their "ship" could `charge-card`).
Behaviour federates only under high trust, with the effects **re-bound** to local, audited
primitives (their orchestration, your effects). `fed-sx` is already trust-gated — that's the lever.
Build order: stamp a stable CID on every object first (additive — slug-addressing stays the working
key), then a `cid → slug` index, then migrate references / type versioning, then federation.
## North star — the metamodel as a system-construction kit
The destination this is all heading toward: the host stops being "a blog" and becomes a