Add canonical serialization and content identity spec

spec/canonical.sx defines:
- canonical-serialize: deterministic s-expression serialization
  (sorted dict keys, normalized numbers, minimal escaping)
- content-id: SHA3-256 of canonical form = CID of any s-expression
- Bytecode module format: (sxbc version source-hash (code ...))
- Provenance records linking source CID → bytecode CID → compiler CID

The CID is the identity model for SX. A component, a bytecode module,
a test suite — anything expressed as an s-expression — is addressable
by content hash. Annotation layers (source maps, variable names, test
results, documentation) reference CIDs without polluting the artifacts.

Requires host primitives: sha3-256, sort. Tests in test-canonical.sx.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-27 14:35:33 +00:00
parent e0070041d6
commit e6d7a08f8c
2 changed files with 208 additions and 0 deletions

117
spec/canonical.sx Normal file
View File

@@ -0,0 +1,117 @@
(define
canonical-serialize
:effects ()
(fn
(val)
(case
(type-of val)
"nil"
"nil"
"boolean"
(if val "true" "false")
"number"
(canonical-number val)
"string"
(str "\"" (escape-string val) "\"")
"symbol"
(symbol-name val)
"keyword"
(str ":" (keyword-name val))
"list"
(str "(" (join " " (map canonical-serialize val)) ")")
"dict"
(canonical-dict val)
:else (str val))))
(define
canonical-number
:effects ()
(fn
(n)
(let
((s (str n)))
(if
(ends-with? s ".0")
(slice s 0 (- (len s) 2))
(if
(contains-char? s ".")
(let
((trimmed (trim-right s "0")))
(if (ends-with? trimmed ".") (str trimmed "0") trimmed))
s)))))
(define
canonical-dict
:effects ()
(fn
(d)
(let
((sorted-keys (sort (keys d))))
(str
"{"
(join
" "
(reduce
(fn
(acc key)
(concat
acc
(list (str ":" key) (canonical-serialize (dict-get d key)))))
(list)
sorted-keys))
"}"))))
(define
content-id
:effects ()
(fn (expr) (sha3-256 (canonical-serialize expr))))
(define
content-id-short
:effects ()
(fn (expr) (slice (content-id expr) 0 16)))
(define
make-bytecode-module
:effects ()
(fn
(version source-hash code)
(list (quote sxbc) version source-hash code)))
(define
bytecode-module?
:effects ()
(fn
(expr)
(and (list? expr) (>= (len expr) 4) (= (first expr) (quote sxbc)))))
(define bytecode-module-version :effects () (fn (m) (nth m 1)))
(define bytecode-module-source-hash :effects () (fn (m) (nth m 2)))
(define bytecode-module-code :effects () (fn (m) (nth m 3)))
(define
make-code-object
:effects ()
(fn
(arity upvalue-count bytecode constants)
(let
((parts (list (quote code))))
(when (> arity 0) (set! parts (concat parts (list :arity arity))))
(when
(> upvalue-count 0)
(set! parts (concat parts (list :upvalue-count upvalue-count))))
(concat parts (list :bytecode bytecode :constants constants)))))
(define
make-provenance
:effects ()
(fn
(source-cid bytecode-cid compiler-cid timestamp)
(list
(quote provenance)
:source-cid source-cid
:bytecode-cid bytecode-cid
:compiler-cid compiler-cid
:timestamp timestamp)))

View File

@@ -0,0 +1,91 @@
(test-group
"canonical-serialize"
(test "nil" (assert= (canonical-serialize nil) "nil"))
(test
"booleans"
(assert= (canonical-serialize true) "true")
(assert= (canonical-serialize false) "false"))
(test
"integers"
(assert= (canonical-serialize 0) "0")
(assert= (canonical-serialize 42) "42")
(assert= (canonical-serialize -7) "-7"))
(test
"strings"
(assert= (canonical-serialize "hello") "\"hello\"")
(assert= (canonical-serialize "") "\"\"")
(assert= (canonical-serialize "a\"b") "\"a\\\"b\""))
(test
"symbols"
(assert= (canonical-serialize (quote deref)) "deref")
(assert= (canonical-serialize (quote swap!)) "swap!"))
(test
"keywords"
(assert= (canonical-serialize :class) ":class")
(assert= (canonical-serialize :arity) ":arity"))
(test "empty list" (assert= (canonical-serialize (list)) "()"))
(test "flat list" (assert= (canonical-serialize (list 1 2 3)) "(1 2 3)"))
(test
"nested list"
(assert=
(canonical-serialize
(list (quote div) :class "flex" (list (quote h2) "title")))
"(div :class \"flex\" (h2 \"title\"))"))
(test
"dict keys sorted"
(let
((d (dict "zebra" 1 "alpha" 2 "middle" 3)))
(assert= (canonical-serialize d) "{:alpha 2 :middle 3 :zebra 1}")))
(test
"dict with nested values"
(let
((d (dict "a" (list 1 2) "b" "hello")))
(assert= (canonical-serialize d) "{:a (1 2) :b \"hello\"}"))))
(test-group
"content-id"
(test
"same expression same CID"
(assert= (content-id (list 1 2 3)) (content-id (list 1 2 3))))
(test
"different expression different CID"
(assert
(not (= (content-id (list 1 2 3)) (content-id (list 1 2 4))))))
(test
"CID is a hex string"
(let
((cid (content-id 42)))
(assert (string? cid))
(assert= (len cid) 64)))
(test
"short CID is 16 chars"
(let ((cid (content-id-short 42))) (assert= (len cid) 16)))
(test
"short CID is prefix of full CID"
(let
((full (content-id 42)) (short (content-id-short 42)))
(assert= short (slice full 0 16)))))
(test-group
"bytecode-module"
(test
"make and query"
(let
((m (make-bytecode-module 1 "abc123" (list (quote code) :bytecode (list 1 2 3)))))
(assert (bytecode-module? m))
(assert= (bytecode-module-version m) 1)
(assert= (bytecode-module-source-hash m) "abc123")))
(test
"non-module fails predicate"
(assert (not (bytecode-module? (list 1 2 3))))
(assert (not (bytecode-module? "hello")))))
(test-group
"provenance"
(test
"make provenance record"
(let
((p (make-provenance "src-cid" "bc-cid" "compiler-cid" "2026-03-27T00:00:00Z")))
(assert= (first p) (quote provenance))
(assert= (nth p 2) "src-cid")
(assert= (nth p 4) "bc-cid"))))