host: universal content-address (CID) on every post

Every object (content/type/relation post) now carries a stable :cid = hash of its
canonical, key-sorted content. The runtime has no hash primitive, so host/blog--canon
(recursive, sorts keys -> identical across processes regardless of dict insertion order)
and a tail-recursive double-hash (host/blog--hash-go / host/blog--cid-of) are built in SX.
The slug (a name) and any prior :cid are excluded -> the CID hashes content only.
git-shaped: slug = mutable name -> CID = immutable content identity.

Single choke point host/blog--write! stamps the CID on every record write; routed all
three write sites (put!, set-schema!, seed-rel!) through it. Accessors host/blog-cid and
host/blog-by-cid (reverse lookup). +6 conformance tests (blog suite 134/134). Plan: new
'Content-addressability is universal' section (CID model, git-shape, federation: types
flow across fed-sx as shared content-addressed vocabulary; structure/behaviour trust-split).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-30 10:14:44 +00:00
parent 99d8527d30
commit ad86f3051e
3 changed files with 103 additions and 4 deletions

View File

@@ -22,6 +22,41 @@
(define host/blog-use-store! (fn (b) (set! host/blog-store b)))
(define host/blog--key (fn (slug) (str "blog:" slug)))
;; ── content-addressing: a universal CID over the canonical form ─────
;; Every object (content/type/relation/constraint post) carries a stable :cid =
;; hash of its CONTENT. The runtime has no hash primitive, so the canon serializer
;; and a tail-recursive double-hash are built here. Canon SORTS keys, so the CID is
;; identical across processes regardless of dict insertion / hash-seed order. The
;; :slug (a mutable name) and any prior :cid are excluded — the CID hashes content
;; only. git-shaped: slug = mutable name -> CID = immutable content identity.
(define host/blog--canon
(fn (v)
(let ((t (type-of v)))
(cond
((= t "dict")
(str "{" (join "|"
(map (fn (k) (str k "=" (host/blog--canon (get v k))))
(filter (fn (k) (and (not (= k "slug")) (not (= k "cid"))))
(sort (keys v))))) "}"))
((= t "list") (str "[" (join "|" (map host/blog--canon v)) "]"))
((= t "nil") "~")
(else (str v))))))
(define host/blog--hash-go
(fn (s i n h1 h2)
(if (>= i n)
(str h1 "-" h2)
(let ((c (char-code (substr s i 1))))
(host/blog--hash-go s (+ i 1) n
(mod (+ (* h1 131) c) 1000000007)
(mod (+ (* h2 137) c) 998244353))))))
(define host/blog--cid-of
(fn (rec) (let ((s (host/blog--canon rec))) (str "z" (host/blog--hash-go s 0 (len s) 7 11)))))
;; the single choke point for every record write: stamps the content CID, then puts.
(define host/blog--write!
(fn (slug rec)
(persist/backend-kv-put host/blog-store (host/blog--key slug)
(merge rec {:cid (host/blog--cid-of rec)}))))
;; slug from a title: lowercase, words joined by '-'. (Punctuation kept simple.)
(define host/blog-slugify
(fn (title)
@@ -38,7 +73,7 @@
(define host/blog-put!
(fn (slug title sx-content status)
(let ((prev (host/blog-get slug)))
(persist/backend-kv-put host/blog-store (host/blog--key slug)
(host/blog--write! slug
(merge (if prev prev {})
{:slug slug :title title :sx-content sx-content :status status})))))
(define host/blog-delete!
@@ -63,6 +98,15 @@
{:slug slug :title (get r :title) :status (get r :status)}))
(host/blog-slugs))))
;; a post's content CID — its global, location-independent identity (nil if unknown).
(define host/blog-cid (fn (slug) (get (host/blog-get slug) :cid)))
;; reverse lookup: a slug whose record has this CID (nil if none). Scan; not for renders.
(define host/blog-by-cid
(fn (cid)
(reduce
(fn (acc slug) (if acc acc (if (= (host/blog-cid slug) cid) slug acc)))
nil (host/blog-slugs))))
;; ── render ──────────────────────────────────────────────────────────
;; A post's sx_content is SX element markup -> HTML via render-page (which supplies
;; the server env so components resolve + keyword attrs are kept).
@@ -358,8 +402,7 @@
(fn (slug schema)
(let ((r (host/blog-get slug)))
(when r
(persist/backend-kv-put host/blog-store (host/blog--key slug)
(merge r {:schema schema}))))))
(host/blog--write! slug (merge r {:schema schema}))))))
;; every element tag in a parsed content tree, recursively (the heads of nested
;; lists) — so "requires h1" matches an h1 even inside an article/section wrapper.
@@ -397,7 +440,7 @@
(fn (slug title symmetric label inverse-label)
(begin
(when (not (host/blog-exists? slug))
(persist/backend-kv-put host/blog-store (host/blog--key slug)
(host/blog--write! slug
{:slug slug :title title
:sx-content (str "(article (h1 \"" title "\") (p \"A relation — posts link to each other through it. Its symmetry and labels live on this post.\"))")
:status "published"

View File

@@ -659,6 +659,30 @@
(contains? (dream-resp-body (host-bl-oapp (host-bl-req "/open-post/"))) "<p>o</p>")
true)
;; ── content-addressing: every object carries a stable CID ───────────
;; A CID is the hash of the canonical (key-sorted) content; the slug (a name) and
;; any prior :cid are excluded. Same content -> same CID, across slugs and processes.
(host/blog-use-store! (persist/open))
(host/blog-put! "cid-a" "Same Body" "(p \"same\")" "published")
(host/blog-put! "cid-b" "Same Body" "(p \"same\")" "published")
(host-bl-test "put! stamps a non-nil CID"
(and (not (nil? (host/blog-cid "cid-a"))) (> (len (host/blog-cid "cid-a")) 1)) true)
(host-bl-test "content-addressed: identical content -> identical CID (slug excluded)"
(= (host/blog-cid "cid-a") (host/blog-cid "cid-b")) true)
(host-bl-test "CID changes when content changes"
(let ((before (host/blog-cid "cid-a")))
(host/blog-put! "cid-a" "Same Body" "(p \"different now\")" "published")
(not (= before (host/blog-cid "cid-a"))))
true)
(host-bl-test "canon excludes :slug and :cid"
(= (host/blog--canon {:slug "x" :cid "old" :title "T"})
(host/blog--canon {:title "T"}))
true)
(host-bl-test "by-cid reverse lookup finds a slug with that CID"
(not (nil? (host/blog-by-cid (host/blog-cid "cid-b")))) true)
(host-bl-test "by-cid of an unknown CID is nil"
(host/blog-by-cid "znope-nope") nil)
(define
host-bl-tests-run!
(fn ()