sx-git Phase 1: blob/tree/commit/tag as content-addressed typed objects (TDD)

Objects are plain dicts over persist kv, addressed by sx1:<sha256> of the
artdag/canon canonical form (sorted dict keys) — native CIDs, extensible
fields participate in identity. 38/38.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-03 12:01:11 +00:00
parent f561deede3
commit 9a85b52d1a
5 changed files with 453 additions and 0 deletions

115
lib/git/conformance.sh Executable file
View File

@@ -0,0 +1,115 @@
#!/usr/bin/env bash
# lib/git/conformance.sh — run sx-git test suites, emit scoreboard.json + scoreboard.md.
set -uo pipefail
cd "$(git rev-parse --show-toplevel)"
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
if [ ! -x "$SX_SERVER" ]; then
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
fi
if [ ! -x "$SX_SERVER" ]; then
echo "ERROR: sx_server.exe not found." >&2
exit 1
fi
SUITES=(object)
OUT_JSON="lib/git/scoreboard.json"
OUT_MD="lib/git/scoreboard.md"
run_suite() {
local suite=$1
local file="lib/git/tests/${suite}.sx"
local TMP
TMP=$(mktemp)
cat > "$TMP" << EPOCHS
(epoch 1)
(load "spec/stdlib.sx")
(load "lib/r7rs.sx")
(load "lib/persist/event.sx")
(load "lib/persist/backend.sx")
(load "lib/persist/log.sx")
(load "lib/persist/kv.sx")
(load "lib/artdag/dag.sx")
(load "lib/git/object.sx")
(epoch 2)
(eval "(define git-test-pass 0)")
(eval "(define git-test-fail 0)")
(eval "(define git-test-failures (list))")
(eval "(define git-test (fn (name got expected) (if (equal? got expected) (set! git-test-pass (+ git-test-pass 1)) (begin (set! git-test-fail (+ git-test-fail 1)) (set! git-test-failures (append git-test-failures (list (list name (inspect got) (inspect expected)))))))))")
(epoch 3)
(load "${file}")
(epoch 4)
(eval "(list git-test-pass git-test-fail)")
EPOCHS
local OUTPUT
OUTPUT=$(timeout 300 "$SX_SERVER" < "$TMP" 2>/dev/null)
rm -f "$TMP"
local LINE
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 4 / {getline; print; exit}')
if [ -z "$LINE" ]; then
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 4 \([0-9]+ [0-9]+\)\)' | tail -1 \
| sed -E 's/^\(ok 4 //; s/\)$//')
fi
local P F
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/')
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/')
P=${P:-0}
F=${F:-0}
echo "${P} ${F}"
}
declare -A SUITE_PASS
declare -A SUITE_FAIL
TOTAL_PASS=0
TOTAL_FAIL=0
echo "Running sx-git conformance suite..." >&2
for s in "${SUITES[@]}"; do
read -r p f < <(run_suite "$s")
SUITE_PASS[$s]=$p
SUITE_FAIL[$s]=$f
TOTAL_PASS=$((TOTAL_PASS + p))
TOTAL_FAIL=$((TOTAL_FAIL + f))
printf " %-12s %d/%d\n" "$s" "$p" "$((p+f))" >&2
done
# scoreboard.json
{
printf '{\n'
printf ' "suites": {\n'
first=1
for s in "${SUITES[@]}"; do
if [ $first -eq 0 ]; then printf ',\n'; fi
printf ' "%s": {"pass": %d, "fail": %d}' "$s" "${SUITE_PASS[$s]}" "${SUITE_FAIL[$s]}"
first=0
done
printf '\n },\n'
printf ' "total_pass": %d,\n' "$TOTAL_PASS"
printf ' "total_fail": %d,\n' "$TOTAL_FAIL"
printf ' "total": %d\n' "$((TOTAL_PASS + TOTAL_FAIL))"
printf '}\n'
} > "$OUT_JSON"
# scoreboard.md
{
printf '# sx-git Conformance Scoreboard\n\n'
printf '_Generated by `lib/git/conformance.sh`_\n\n'
printf '| Suite | Pass | Fail | Total |\n'
printf '|-------|-----:|-----:|------:|\n'
for s in "${SUITES[@]}"; do
p=${SUITE_PASS[$s]}
f=${SUITE_FAIL[$s]}
printf '| %s | %d | %d | %d |\n' "$s" "$p" "$f" "$((p+f))"
done
printf '| **Total** | **%d** | **%d** | **%d** |\n' "$TOTAL_PASS" "$TOTAL_FAIL" "$((TOTAL_PASS + TOTAL_FAIL))"
} > "$OUT_MD"
echo "Wrote $OUT_JSON and $OUT_MD" >&2
echo "Total: $TOTAL_PASS pass, $TOTAL_FAIL fail" >&2
[ "$TOTAL_FAIL" -eq 0 ]

81
lib/git/object.sx Normal file
View File

@@ -0,0 +1,81 @@
; lib/git/object.sx — sx-git Phase 1: blob/tree/commit/tag as content-addressed
; TYPED objects over the persist kv store. Identity = host digest of the
; canonical serialization (artdag/canon: sorted dict keys, escaped strings) —
; native CIDs, NOT git wire bytes. Objects are plain dicts: typed, extensible;
; unknown fields round-trip and participate in the CID.
; Requires: lib/persist/backend.sx, lib/persist/kv.sx, lib/artdag/dag.sx.
; ---- canonical form + content id ----
(define git/canon (fn (obj) (artdag/canon obj)))
(define
git/cid
(fn (obj) (str "sx1:" (crypto-sha256 (artdag/canon obj)))))
; ---- repo handle: a persist backend + key prefix (many repos per db) ----
(define git/repo-named (fn (db name) {:prefix name :db db}))
(define git/repo (fn (db) (git/repo-named db "git")))
(define git/repo-db (fn (repo) (get repo :db)))
(define git/obj-key (fn (repo cid) (str (get repo :prefix) "/obj/" cid)))
; ---- constructors ----
(define git/blob (fn (data) {:data data :type "blob"}))
; entries: dict of name -> entry, entry = {:kind "blob"|"tree" :cid cid ...}
(define git/tree (fn (entries) {:type "tree" :entries entries}))
(define git/tree-entry (fn (kind cid) {:kind kind :cid cid}))
; meta: open dict (:author :message :time ... anything); protected keys win
(define git/commit (fn (tree parents meta) (merge meta {:type "commit" :tree tree :parents parents})))
(define git/tag (fn (target name meta) (merge meta {:name name :type "tag" :target target})))
; ---- predicates / accessors ----
(define git/object-type (fn (obj) (get obj :type)))
(define
git/blob?
(fn (obj) (and (dict? obj) (equal? (get obj :type) "blob"))))
(define
git/tree?
(fn (obj) (and (dict? obj) (equal? (get obj :type) "tree"))))
(define
git/commit?
(fn (obj) (and (dict? obj) (equal? (get obj :type) "commit"))))
(define
git/tag?
(fn (obj) (and (dict? obj) (equal? (get obj :type) "tag"))))
(define git/blob-data (fn (obj) (get obj :data)))
(define git/tree-entries (fn (obj) (get obj :entries)))
(define git/tree-entry-for (fn (obj name) (get (get obj :entries) name)))
(define
git/tree-names
(fn (obj) (artdag/sort-strings (keys (get obj :entries)))))
(define git/entry-cid (fn (entry) (get entry :cid)))
(define git/entry-kind (fn (entry) (get entry :kind)))
(define git/commit-tree (fn (obj) (get obj :tree)))
(define git/commit-parents (fn (obj) (get obj :parents)))
(define git/commit-author (fn (obj) (get obj :author)))
(define git/commit-message (fn (obj) (get obj :message)))
(define git/tag-target (fn (obj) (get obj :target)))
(define git/tag-name (fn (obj) (get obj :name)))
; ---- object store: write/read/has, keyed by cid ----
(define
git/write
(fn
(repo obj)
(let
((cid (git/cid obj)))
(begin (persist/kv-put (get repo :db) (git/obj-key repo cid) obj) cid))))
(define
git/read
(fn (repo cid) (persist/kv-get (get repo :db) (git/obj-key repo cid))))
(define
git/has?
(fn (repo cid) (persist/kv-has? (get repo :db) (git/obj-key repo cid))))
; convenience: write a blob straight from data
(define git/write-blob (fn (repo data) (git/write repo (git/blob data))))

8
lib/git/scoreboard.json Normal file
View File

@@ -0,0 +1,8 @@
{
"suites": {
"object": {"pass": 38, "fail": 0}
},
"total_pass": 38,
"total_fail": 0,
"total": 38
}

8
lib/git/scoreboard.md Normal file
View File

@@ -0,0 +1,8 @@
# sx-git Conformance Scoreboard
_Generated by `lib/git/conformance.sh`_
| Suite | Pass | Fail | Total |
|-------|-----:|-----:|------:|
| object | 38 | 0 | 38 |
| **Total** | **38** | **0** | **38** |

241
lib/git/tests/object.sx Normal file
View File

@@ -0,0 +1,241 @@
; Phase 1 — object: blob/tree/commit/tag as content-addressed typed objects.
; Fixture repo: blobs a/b/c, nested tree (a.txt b.txt sub/c.txt), two commits
; (c2 modifies a.txt, parent c1), tag v1 -> c1. Reused as the assertion target.
(define git-fix-db (persist/mem-backend))
(define git-fix (git/repo git-fix-db))
(define git-fix-blob-a (git/write-blob git-fix "hello\n"))
(define git-fix-blob-b (git/write-blob git-fix "world\n"))
(define git-fix-blob-c (git/write-blob git-fix "sub\n"))
(define git-fix-blob-a2 (git/write-blob git-fix "hello2\n"))
(define
git-fixt-entries3
(fn
(acid bcid subcid)
(assoc
(assoc
(assoc {} "a.txt" (git/tree-entry "blob" acid))
"b.txt"
(git/tree-entry "blob" bcid))
"sub"
(git/tree-entry "tree" subcid))))
(define
git-fix-subtree-cid
(git/write
git-fix
(git/tree
(assoc {} "c.txt" (git/tree-entry "blob" git-fix-blob-c)))))
(define
git-fix-tree1-cid
(git/write
git-fix
(git/tree
(git-fixt-entries3 git-fix-blob-a git-fix-blob-b git-fix-subtree-cid))))
(define
git-fix-tree2-cid
(git/write
git-fix
(git/tree
(git-fixt-entries3 git-fix-blob-a2 git-fix-blob-b git-fix-subtree-cid))))
(define
git-fix-commit1-cid
(git/write git-fix (git/commit git-fix-tree1-cid (list) {:message "c1" :time 1 :author "ada"})))
(define
git-fix-commit2-cid
(git/write
git-fix
(git/commit git-fix-tree2-cid (list git-fix-commit1-cid) {:message "c2" :time 2 :author "ada"})))
(define
git-fix-tag-cid
(git/write git-fix (git/tag git-fix-commit1-cid "v1" {:message "first" :tagger "ada"})))
; ---- constructors + types ----
(git-test "blob is typed" (git/object-type (git/blob "x")) "blob")
(git-test "blob? true on blob" (git/blob? (git/blob "x")) true)
(git-test
"blob? false on commit"
(git/blob? (git/commit "t" (list) {}))
false)
(git-test "tree? true on tree" (git/tree? (git/tree {})) true)
(git-test
"commit? true on commit"
(git/commit? (git/commit "t" (list) {}))
true)
(git-test "tag? true on tag" (git/tag? (git/tag "c" "v" {})) true)
(git-test "blob-data reads back" (git/blob-data (git/blob "hi")) "hi")
; ---- cid: deterministic structural identity ----
(git-test
"cid deterministic"
(equal? (git/cid (git/blob "same")) (git/cid (git/blob "same")))
true)
(git-test
"cid differs by content"
(equal? (git/cid (git/blob "a")) (git/cid (git/blob "b")))
false)
(git-test
"cid ignores dict insertion order"
(equal?
(git/cid (assoc (assoc {} :type "blob") :data "x"))
(git/cid (assoc (assoc {} :data "x") :type "blob")))
true)
(git-test
"cid carries the native scheme prefix"
(starts-with? (git/cid (git/blob "x")) "sx1:")
true)
; ---- write / read / has ----
(git-test
"write returns the object cid"
(equal? git-fix-blob-a (git/cid (git/blob "hello\n")))
true)
(git-test
"read round-trips blob data"
(git/blob-data (git/read git-fix git-fix-blob-a))
"hello\n")
(git-test
"read round-trips structurally"
(equal? (git/read git-fix git-fix-blob-a) (git/blob "hello\n"))
true)
(git-test "has? true after write" (git/has? git-fix git-fix-blob-a) true)
(git-test "has? false for unknown cid" (git/has? git-fix "sx1:nope") false)
(git-test "read unknown cid gives nil" (git/read git-fix "sx1:nope") nil)
(git-test
"rewrite is idempotent, same cid"
(equal? (git/write git-fix (git/blob "hello\n")) git-fix-blob-a)
true)
; ---- structural identity across separately built objects ----
(git-test
"separately built identical tree shares the cid"
(equal?
(git/write
git-fix
(git/tree
(git-fixt-entries3 git-fix-blob-a git-fix-blob-b git-fix-subtree-cid)))
git-fix-tree1-cid)
true)
(git-test
"changed entry changes the tree cid"
(equal? git-fix-tree1-cid git-fix-tree2-cid)
false)
; ---- tree accessors ----
(git-test
"tree entry lookup by name"
(git/entry-cid
(git/tree-entry-for (git/read git-fix git-fix-tree1-cid) "a.txt"))
git-fix-blob-a)
(git-test
"tree entry kind"
(git/entry-kind
(git/tree-entry-for (git/read git-fix git-fix-tree1-cid) "sub"))
"tree")
(git-test
"tree-names sorted"
(=
(git/tree-names (git/read git-fix git-fix-tree1-cid))
(list "a.txt" "b.txt" "sub"))
true)
; ---- commit accessors ----
(git-test
"commit tree cid"
(git/commit-tree (git/read git-fix git-fix-commit1-cid))
git-fix-tree1-cid)
(git-test
"root commit has no parents"
(git/commit-parents (git/read git-fix git-fix-commit1-cid))
(list))
(git-test
"child commit records its parent"
(git/commit-parents (git/read git-fix git-fix-commit2-cid))
(list git-fix-commit1-cid))
(git-test
"commit author round-trips"
(git/commit-author (git/read git-fix git-fix-commit1-cid))
"ada")
(git-test
"commit message round-trips"
(git/commit-message (git/read git-fix git-fix-commit2-cid))
"c2")
(git-test
"commit cids differ across history"
(equal? git-fix-commit1-cid git-fix-commit2-cid)
false)
; ---- typed extensibility (the reason for native CID) ----
(git-test
"extra commit field round-trips"
(get
(git/read
git-fix
(git/write git-fix (git/commit "t" (list) {:message "m" :co-authored-by "claude"})))
:co-authored-by)
"claude")
(git-test
"extra field changes the cid"
(equal?
(git/cid (git/commit "t" (list) {:m 1}))
(git/cid (git/commit "t" (list) {})))
false)
(git-test
"protected keys win over meta"
(git/commit-tree (git/commit "t" (list) {:tree "evil"}))
"t")
; ---- tag ----
(git-test
"tag target"
(git/tag-target (git/read git-fix git-fix-tag-cid))
git-fix-commit1-cid)
(git-test "tag name" (git/tag-name (git/read git-fix git-fix-tag-cid)) "v1")
(git-test
"tag? on read-back"
(git/tag? (git/read git-fix git-fix-tag-cid))
true)
; ---- full walk: commit -> tree -> subtree -> blob ----
(git-test
"walk commit to nested blob"
(git/blob-data
(git/read
git-fix
(git/entry-cid
(git/tree-entry-for
(git/read
git-fix
(git/entry-cid
(git/tree-entry-for
(git/read
git-fix
(git/commit-tree (git/read git-fix git-fix-commit1-cid)))
"sub")))
"c.txt"))))
"sub\n")
; ---- repos are namespaced within one backend ----
(git-test
"objects are invisible across repo namespaces"
(let
((db (persist/mem-backend)))
(let
((ra (git/repo-named db "a")) (rb (git/repo-named db "b")))
(let ((cid (git/write-blob ra "x"))) (git/has? rb cid))))
false)
(git-test
"same content, same cid in any repo"
(let
((db (persist/mem-backend)))
(equal?
(git/write-blob (git/repo-named db "a") "x")
(git/write-blob (git/repo-named db "b") "x")))
true)