Add canonical serialization and content identity spec
spec/canonical.sx defines: - canonical-serialize: deterministic s-expression serialization (sorted dict keys, normalized numbers, minimal escaping) - content-id: SHA3-256 of canonical form = CID of any s-expression - Bytecode module format: (sxbc version source-hash (code ...)) - Provenance records linking source CID → bytecode CID → compiler CID The CID is the identity model for SX. A component, a bytecode module, a test suite — anything expressed as an s-expression — is addressable by content hash. Annotation layers (source maps, variable names, test results, documentation) reference CIDs without polluting the artifacts. Requires host primitives: sha3-256, sort. Tests in test-canonical.sx. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
117
spec/canonical.sx
Normal file
117
spec/canonical.sx
Normal file
@@ -0,0 +1,117 @@
|
||||
(define
|
||||
canonical-serialize
|
||||
:effects ()
|
||||
(fn
|
||||
(val)
|
||||
(case
|
||||
(type-of val)
|
||||
"nil"
|
||||
"nil"
|
||||
"boolean"
|
||||
(if val "true" "false")
|
||||
"number"
|
||||
(canonical-number val)
|
||||
"string"
|
||||
(str "\"" (escape-string val) "\"")
|
||||
"symbol"
|
||||
(symbol-name val)
|
||||
"keyword"
|
||||
(str ":" (keyword-name val))
|
||||
"list"
|
||||
(str "(" (join " " (map canonical-serialize val)) ")")
|
||||
"dict"
|
||||
(canonical-dict val)
|
||||
:else (str val))))
|
||||
|
||||
(define
|
||||
canonical-number
|
||||
:effects ()
|
||||
(fn
|
||||
(n)
|
||||
(let
|
||||
((s (str n)))
|
||||
(if
|
||||
(ends-with? s ".0")
|
||||
(slice s 0 (- (len s) 2))
|
||||
(if
|
||||
(contains-char? s ".")
|
||||
(let
|
||||
((trimmed (trim-right s "0")))
|
||||
(if (ends-with? trimmed ".") (str trimmed "0") trimmed))
|
||||
s)))))
|
||||
|
||||
(define
|
||||
canonical-dict
|
||||
:effects ()
|
||||
(fn
|
||||
(d)
|
||||
(let
|
||||
((sorted-keys (sort (keys d))))
|
||||
(str
|
||||
"{"
|
||||
(join
|
||||
" "
|
||||
(reduce
|
||||
(fn
|
||||
(acc key)
|
||||
(concat
|
||||
acc
|
||||
(list (str ":" key) (canonical-serialize (dict-get d key)))))
|
||||
(list)
|
||||
sorted-keys))
|
||||
"}"))))
|
||||
|
||||
(define
|
||||
content-id
|
||||
:effects ()
|
||||
(fn (expr) (sha3-256 (canonical-serialize expr))))
|
||||
|
||||
(define
|
||||
content-id-short
|
||||
:effects ()
|
||||
(fn (expr) (slice (content-id expr) 0 16)))
|
||||
|
||||
(define
|
||||
make-bytecode-module
|
||||
:effects ()
|
||||
(fn
|
||||
(version source-hash code)
|
||||
(list (quote sxbc) version source-hash code)))
|
||||
|
||||
(define
|
||||
bytecode-module?
|
||||
:effects ()
|
||||
(fn
|
||||
(expr)
|
||||
(and (list? expr) (>= (len expr) 4) (= (first expr) (quote sxbc)))))
|
||||
|
||||
(define bytecode-module-version :effects () (fn (m) (nth m 1)))
|
||||
|
||||
(define bytecode-module-source-hash :effects () (fn (m) (nth m 2)))
|
||||
|
||||
(define bytecode-module-code :effects () (fn (m) (nth m 3)))
|
||||
|
||||
(define
|
||||
make-code-object
|
||||
:effects ()
|
||||
(fn
|
||||
(arity upvalue-count bytecode constants)
|
||||
(let
|
||||
((parts (list (quote code))))
|
||||
(when (> arity 0) (set! parts (concat parts (list :arity arity))))
|
||||
(when
|
||||
(> upvalue-count 0)
|
||||
(set! parts (concat parts (list :upvalue-count upvalue-count))))
|
||||
(concat parts (list :bytecode bytecode :constants constants)))))
|
||||
|
||||
(define
|
||||
make-provenance
|
||||
:effects ()
|
||||
(fn
|
||||
(source-cid bytecode-cid compiler-cid timestamp)
|
||||
(list
|
||||
(quote provenance)
|
||||
:source-cid source-cid
|
||||
:bytecode-cid bytecode-cid
|
||||
:compiler-cid compiler-cid
|
||||
:timestamp timestamp)))
|
||||
91
spec/tests/test-canonical.sx
Normal file
91
spec/tests/test-canonical.sx
Normal file
@@ -0,0 +1,91 @@
|
||||
(test-group
|
||||
"canonical-serialize"
|
||||
(test "nil" (assert= (canonical-serialize nil) "nil"))
|
||||
(test
|
||||
"booleans"
|
||||
(assert= (canonical-serialize true) "true")
|
||||
(assert= (canonical-serialize false) "false"))
|
||||
(test
|
||||
"integers"
|
||||
(assert= (canonical-serialize 0) "0")
|
||||
(assert= (canonical-serialize 42) "42")
|
||||
(assert= (canonical-serialize -7) "-7"))
|
||||
(test
|
||||
"strings"
|
||||
(assert= (canonical-serialize "hello") "\"hello\"")
|
||||
(assert= (canonical-serialize "") "\"\"")
|
||||
(assert= (canonical-serialize "a\"b") "\"a\\\"b\""))
|
||||
(test
|
||||
"symbols"
|
||||
(assert= (canonical-serialize (quote deref)) "deref")
|
||||
(assert= (canonical-serialize (quote swap!)) "swap!"))
|
||||
(test
|
||||
"keywords"
|
||||
(assert= (canonical-serialize :class) ":class")
|
||||
(assert= (canonical-serialize :arity) ":arity"))
|
||||
(test "empty list" (assert= (canonical-serialize (list)) "()"))
|
||||
(test "flat list" (assert= (canonical-serialize (list 1 2 3)) "(1 2 3)"))
|
||||
(test
|
||||
"nested list"
|
||||
(assert=
|
||||
(canonical-serialize
|
||||
(list (quote div) :class "flex" (list (quote h2) "title")))
|
||||
"(div :class \"flex\" (h2 \"title\"))"))
|
||||
(test
|
||||
"dict keys sorted"
|
||||
(let
|
||||
((d (dict "zebra" 1 "alpha" 2 "middle" 3)))
|
||||
(assert= (canonical-serialize d) "{:alpha 2 :middle 3 :zebra 1}")))
|
||||
(test
|
||||
"dict with nested values"
|
||||
(let
|
||||
((d (dict "a" (list 1 2) "b" "hello")))
|
||||
(assert= (canonical-serialize d) "{:a (1 2) :b \"hello\"}"))))
|
||||
|
||||
(test-group
|
||||
"content-id"
|
||||
(test
|
||||
"same expression same CID"
|
||||
(assert= (content-id (list 1 2 3)) (content-id (list 1 2 3))))
|
||||
(test
|
||||
"different expression different CID"
|
||||
(assert
|
||||
(not (= (content-id (list 1 2 3)) (content-id (list 1 2 4))))))
|
||||
(test
|
||||
"CID is a hex string"
|
||||
(let
|
||||
((cid (content-id 42)))
|
||||
(assert (string? cid))
|
||||
(assert= (len cid) 64)))
|
||||
(test
|
||||
"short CID is 16 chars"
|
||||
(let ((cid (content-id-short 42))) (assert= (len cid) 16)))
|
||||
(test
|
||||
"short CID is prefix of full CID"
|
||||
(let
|
||||
((full (content-id 42)) (short (content-id-short 42)))
|
||||
(assert= short (slice full 0 16)))))
|
||||
|
||||
(test-group
|
||||
"bytecode-module"
|
||||
(test
|
||||
"make and query"
|
||||
(let
|
||||
((m (make-bytecode-module 1 "abc123" (list (quote code) :bytecode (list 1 2 3)))))
|
||||
(assert (bytecode-module? m))
|
||||
(assert= (bytecode-module-version m) 1)
|
||||
(assert= (bytecode-module-source-hash m) "abc123")))
|
||||
(test
|
||||
"non-module fails predicate"
|
||||
(assert (not (bytecode-module? (list 1 2 3))))
|
||||
(assert (not (bytecode-module? "hello")))))
|
||||
|
||||
(test-group
|
||||
"provenance"
|
||||
(test
|
||||
"make provenance record"
|
||||
(let
|
||||
((p (make-provenance "src-cid" "bc-cid" "compiler-cid" "2026-03-27T00:00:00Z")))
|
||||
(assert= (first p) (quote provenance))
|
||||
(assert= (nth p 2) "src-cid")
|
||||
(assert= (nth p 4) "bc-cid"))))
|
||||
Reference in New Issue
Block a user