diff --git a/lib/git/conformance.sh b/lib/git/conformance.sh index 3a382afb..81103d0e 100755 --- a/lib/git/conformance.sh +++ b/lib/git/conformance.sh @@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -SUITES=(object ref dag worktree diff merge porcelain) +SUITES=(object ref dag worktree diff merge porcelain export) OUT_JSON="lib/git/scoreboard.json" OUT_MD="lib/git/scoreboard.md" @@ -49,6 +49,8 @@ run_suite() { (load "lib/git/diff.sx") (load "lib/git/merge.sx") (load "lib/git/porcelain.sx") +(load "lib/git/sha1.sx") +(load "lib/git/export.sx") (epoch 2) (eval "(define git-test-pass 0)") (eval "(define git-test-fail 0)") diff --git a/lib/git/export.sx b/lib/git/export.sx new file mode 100644 index 00000000..dae89647 --- /dev/null +++ b/lib/git/export.sx @@ -0,0 +1,225 @@ +; lib/git/export.sx — git-wire EXPORT ADAPTER (flagged extension). +; Converts native sx-git objects into byte-exact git loose-object payloads: +; " \0" with real git SHA-1 identity, golden-verified +; against git CLI output. This is an adapter at the edge — the base model +; stays native-CID (sx1:/sha256, typed extensible dicts). zlib/packfiles are +; host-side concerns and stay out of scope: the adapter emits type+bytes+sha1. +; Field mapping (commit/tag idents): :author/:email/:time/:tz with committer +; overrides :committer/:committer-email/:committer-time/:committer-tz; +; defaults email="@sx", time=0, tz="+0000". Messages get a trailing +; newline if missing. Extra native fields do NOT survive export (git wire +; has nowhere to put them) — that loss is the point of native-first. +; Requires: lib/git/object.sx, lib/git/sha1.sx, lib/artdag/dag.sx. + +(define git/export-nul (list->string (list (integer->char 0)))) + +; ---- tree entry details ---- +; raw tree bytes use "40000" for subtrees (no leading zero); files default +; to 100644 unless the entry carries an explicit :mode (e.g. "100755") +(define + git/export-mode + (fn + (e) + (if + (has-key? e :mode) + (get e :mode) + (if (equal? (get e :kind) "tree") "40000" "100644")))) + +; git sorts tree entries by name bytes with directories keyed as "name/" +(define + git/export-sortkey + (fn + (tree name) + (if + (equal? (git/entry-kind (git/tree-entry-for tree name)) "tree") + (str name "/") + name))) + +(define + git/export-ins + (fn + (tree sorted n) + (cond + ((empty? sorted) (list n)) + ((artdag/str " + (git/export-or (get obj :time) 0) + " " + (git/export-or (get obj :tz) "+0000"))))) + +(define + git/export-committer-ident + (fn + (obj) + (let + ((name (git/export-or (get obj :committer) (git/export-or (get obj :author) "sx")))) + (str + name + " <" + (git/export-or + (get obj :committer-email) + (git/export-or (get obj :email) (str name "@sx"))) + "> " + (git/export-or + (get obj :committer-time) + (git/export-or (get obj :time) 0)) + " " + (git/export-or + (get obj :committer-tz) + (git/export-or (get obj :tz) "+0000")))))) + +(define + git/export-message + (fn + (obj) + (let + ((m (git/export-or (get obj :message) ""))) + (let + ((n (string-length m))) + (if + (and + (> n 0) + (equal? (substring m (- n 1) n) "\n")) + m + (str m "\n")))))) + +; ---- object payloads ---- +(define + git/export-entry + (fn + (type body) + (let + ((full (str type " " (string-length body) git/export-nul body))) + {:type type :sha1 (git/sha1-hex full) :bytes full}))) + +(define + git/export-tree-body + (fn + (tree table) + (reduce + (fn + (acc name) + (let + ((e (git/tree-entry-for tree name))) + (str + acc + (git/export-mode e) + " " + name + git/export-nul + (git/hex->raw (get (get table (git/entry-cid e)) :sha1))))) + "" + (git/export-sort-names tree (git/tree-names tree))))) + +(define + git/export-commit-body + (fn + (obj table) + (str + "tree " + (get (get table (git/commit-tree obj)) :sha1) + "\n" + (reduce + (fn (acc p) (str acc "parent " (get (get table p) :sha1) "\n")) + "" + (git/commit-parents obj)) + "author " + (git/export-author-ident obj) + "\n" + "committer " + (git/export-committer-ident obj) + "\n" + "\n" + (git/export-message obj)))) + +(define + git/export-tag-body + (fn + (obj table) + (str + "object " + (get (get table (git/tag-target obj)) :sha1) + "\n" + "type " + (get (get table (git/tag-target obj)) :type) + "\n" + "tag " + (git/tag-name obj) + "\n" + "tagger " + (git/export-author-ident obj) + "\n" + "\n" + (git/export-message obj)))) + +; ---- recursive closure export: table cid -> {:type :bytes :sha1} ---- +(define + git/export-into + (fn + (repo cid table) + (if + (has-key? table cid) + table + (let + ((obj (git/read repo cid))) + (cond + ((git/blob? obj) + (assoc table cid (git/export-entry "blob" (git/blob-data obj)))) + ((git/tree? obj) + (let + ((t2 (reduce (fn (tb name) (git/export-into repo (git/entry-cid (git/tree-entry-for obj name)) tb)) table (git/tree-names obj)))) + (assoc + t2 + cid + (git/export-entry "tree" (git/export-tree-body obj t2))))) + ((git/commit? obj) + (let + ((t2 (git/export-into repo (git/commit-tree obj) (reduce (fn (tb p) (git/export-into repo p tb)) table (git/commit-parents obj))))) + (assoc + t2 + cid + (git/export-entry "commit" (git/export-commit-body obj t2))))) + ((git/tag? obj) + (let + ((t2 (git/export-into repo (git/tag-target obj) table))) + (assoc + t2 + cid + (git/export-entry "tag" (git/export-tag-body obj t2))))) + (else table)))))) + +(define + git/export-closure + (fn (repo cid) (git/export-into repo cid {}))) + +; export one object (with its dependencies): {:type :bytes :sha1} +(define git/export (fn (repo cid) (get (git/export-closure repo cid) cid))) + +(define git/export-sha (fn (repo cid) (get (git/export repo cid) :sha1))) + +; host-writable set: {:head :objects {sha1 -> {:type :bytes}}} +(define + git/export-set + (fn (repo cid) (let ((table (git/export-closure repo cid))) {:head (get (get table cid) :sha1) :objects (reduce (fn (acc c) (let ((e (get table c))) (assoc acc (get e :sha1) {:type (get e :type) :bytes (get e :bytes)}))) {} (keys table))}))) diff --git a/lib/git/scoreboard.json b/lib/git/scoreboard.json index e88a6853..82a82845 100644 --- a/lib/git/scoreboard.json +++ b/lib/git/scoreboard.json @@ -6,9 +6,10 @@ "worktree": {"pass": 26, "fail": 0}, "diff": {"pass": 27, "fail": 0}, "merge": {"pass": 28, "fail": 0}, - "porcelain": {"pass": 40, "fail": 0} + "porcelain": {"pass": 40, "fail": 0}, + "export": {"pass": 25, "fail": 0} }, - "total_pass": 227, + "total_pass": 252, "total_fail": 0, - "total": 227 + "total": 252 } diff --git a/lib/git/scoreboard.md b/lib/git/scoreboard.md index 4eb78db4..86db6a66 100644 --- a/lib/git/scoreboard.md +++ b/lib/git/scoreboard.md @@ -11,4 +11,5 @@ _Generated by `lib/git/conformance.sh`_ | diff | 27 | 0 | 27 | | merge | 28 | 0 | 28 | | porcelain | 40 | 0 | 40 | -| **Total** | **227** | **0** | **227** | +| export | 25 | 0 | 25 | +| **Total** | **252** | **0** | **252** | diff --git a/lib/git/sha1.sx b/lib/git/sha1.sx new file mode 100644 index 00000000..78cc7917 --- /dev/null +++ b/lib/git/sha1.sx @@ -0,0 +1,299 @@ +; lib/git/sha1.sx — SHA-1 in pure SX (host bitwise prims, no deps). +; Exists ONLY for the git-wire export adapter: native sx-git identity stays +; sx1:/sha256 (object.sx); SHA-1 is what the exported byte format demands. +; Strings are treated as byte strings (char-code on 1-byte substrings). + +(define git/sha1-mask 4294967295) +(define git/u32 (fn (x) (bitwise-and x git/sha1-mask))) + +(define + git/rotl + (fn + (x n) + (git/u32 + (bitwise-or + (arithmetic-shift x n) + (arithmetic-shift x (- n 32)))))) + +; ---- byte plumbing ---- +(define + git/sha1-take + (fn + (xs n) + (if + (or (= n 0) (empty? xs)) + (list) + (cons (first xs) (git/sha1-take (rest xs) (- n 1)))))) + +(define + git/sha1-drop + (fn + (xs n) + (if + (or (= n 0) (empty? xs)) + xs + (git/sha1-drop (rest xs) (- n 1))))) + +(define + git/sha1-sb + (fn + (s i n acc) + (if + (>= i n) + (reverse acc) + (git/sha1-sb + s + (+ i 1) + n + (cons (char-code (substring s i (+ i 1))) acc))))) + +(define + git/sha1-str-bytes + (fn (s) (git/sha1-sb s 0 (string-length s) (list)))) + +(define + git/sha1-zeros + (fn + (k) + (if + (= k 0) + (list) + (cons 0 (git/sha1-zeros (- k 1)))))) + +(define + git/sha1-be8 + (fn + (v) + (map + (fn + (sh) + (bitwise-and (arithmetic-shift v (- 0 sh)) 255)) + (list + 56 + 48 + 40 + 32 + 24 + 16 + 8 + 0)))) + +; append 0x80, zero-pad to 56 mod 64, then the 64-bit big-endian bit length +(define + git/sha1-pad + (fn + (bytes) + (let + ((n (len bytes))) + (let + ((zeros (remainder (+ (- 56 (remainder (+ n 1) 64)) 64) 64))) + (append + bytes + (append + (cons 128 (git/sha1-zeros zeros)) + (git/sha1-be8 (* n 8)))))))) + +; ---- message schedule: w as an index-keyed dict ---- +(define + git/sha1-w-init-go + (fn + (bs j w) + (if + (= j 16) + w + (git/sha1-w-init-go + (git/sha1-drop bs 4) + (+ j 1) + (assoc + w + (str j) + (bitwise-or + (arithmetic-shift (nth bs 0) 24) + (bitwise-or + (arithmetic-shift (nth bs 1) 16) + (bitwise-or + (arithmetic-shift (nth bs 2) 8) + (nth bs 3))))))))) + +(define + git/sha1-w-expand + (fn + (w t) + (if + (= t 80) + w + (git/sha1-w-expand + (assoc + w + (str t) + (git/rotl + (bitwise-xor + (bitwise-xor + (get w (str (- t 3))) + (get w (str (- t 8)))) + (bitwise-xor + (get w (str (- t 14))) + (get w (str (- t 16))))) + 1)) + (+ t 1))))) + +; ---- rounds ---- +(define + git/sha1-f + (fn + (t b c d) + (cond + ((< t 20) + (bitwise-or + (bitwise-and b c) + (bitwise-and (bitwise-and (bitwise-not b) git/sha1-mask) d))) + ((< t 40) (bitwise-xor (bitwise-xor b c) d)) + ((< t 60) + (bitwise-or + (bitwise-or (bitwise-and b c) (bitwise-and b d)) + (bitwise-and c d))) + (else (bitwise-xor (bitwise-xor b c) d))))) + +(define + git/sha1-k + (fn + (t) + (cond + ((< t 20) 1518500249) + ((< t 40) 1859775393) + ((< t 60) 2400959708) + (else 3395469782)))) + +(define + git/sha1-rounds + (fn + (w t a b c d e) + (if + (= t 80) + (list a b c d e) + (git/sha1-rounds + w + (+ t 1) + (git/u32 + (+ + (+ + (+ (+ (git/rotl a 5) (git/sha1-f t b c d)) e) + (git/sha1-k t)) + (get w (str t)))) + a + (git/rotl b 30) + c + d)))) + +(define + git/sha1-blocks + (fn + (bs hs) + (if + (empty? bs) + hs + (let + ((w (git/sha1-w-expand (git/sha1-w-init-go (git/sha1-take bs 64) 0 {}) 16))) + (let + ((r (git/sha1-rounds w 0 (nth hs 0) (nth hs 1) (nth hs 2) (nth hs 3) (nth hs 4)))) + (git/sha1-blocks + (git/sha1-drop bs 64) + (list + (git/u32 (+ (nth hs 0) (nth r 0))) + (git/u32 (+ (nth hs 1) (nth r 1))) + (git/u32 (+ (nth hs 2) (nth r 2))) + (git/u32 (+ (nth hs 3) (nth r 3))) + (git/u32 (+ (nth hs 4) (nth r 4)))))))))) + +(define + git/sha1-words + (fn + (s) + (git/sha1-blocks + (git/sha1-pad (git/sha1-str-bytes s)) + (list 1732584193 4023233417 2562383102 271733878 3285377520)))) + +; ---- digest forms ---- +(define + git/sha1-word-bytes + (fn + (v) + (list + (bitwise-and (arithmetic-shift v -24) 255) + (bitwise-and (arithmetic-shift v -16) 255) + (bitwise-and (arithmetic-shift v -8) 255) + (bitwise-and v 255)))) + +(define + git/sha1-digest-bytes + (fn + (s) + (reduce + (fn (acc v) (append acc (git/sha1-word-bytes v))) + (list) + (git/sha1-words s)))) + +(define git/sha1-hexd "0123456789abcdef") +(define + git/sha1-byte-hex + (fn + (b) + (str + (substring + git/sha1-hexd + (quotient b 16) + (+ (quotient b 16) 1)) + (substring + git/sha1-hexd + (remainder b 16) + (+ (remainder b 16) 1))))) + +(define + git/sha1-hex + (fn + (s) + (reduce + (fn (acc b) (str acc (git/sha1-byte-hex b))) + "" + (git/sha1-digest-bytes s)))) + +(define + git/sha1-raw + (fn + (s) + (list->string + (map (fn (b) (integer->char b)) (git/sha1-digest-bytes s))))) + +; hex string -> raw bytes (tree entries embed 20 raw sha bytes) +(define + git/hex-digit-val + (fn + (c) + (let + ((v (char-code c))) + (if (< v 58) (- v 48) (- v 87))))) + +(define + git/hex->raw-go + (fn + (h i n acc) + (if + (>= i n) + (list->string (reverse acc)) + (git/hex->raw-go + h + (+ i 2) + n + (cons + (integer->char + (+ + (* + 16 + (git/hex-digit-val (substring h i (+ i 1)))) + (git/hex-digit-val + (substring h (+ i 1) (+ i 2))))) + acc))))) + +(define + git/hex->raw + (fn (h) (git/hex->raw-go h 0 (string-length h) (list)))) diff --git a/lib/git/tests/export.sx b/lib/git/tests/export.sx new file mode 100644 index 00000000..60df24d3 --- /dev/null +++ b/lib/git/tests/export.sx @@ -0,0 +1,202 @@ +; Extension — git-wire export adapter. Golden values generated with real git +; (hash-object/mktree/commit-tree with pinned idents) — the adapter must +; reproduce byte-exact payloads and SHA-1s. + +; ---- SHA-1 vectors ---- +(git-test + "sha1 of empty" + (git/sha1-hex "") + "da39a3ee5e6b4b0d3255bfef95601890afd80709") +(git-test + "sha1 of abc" + (git/sha1-hex "abc") + "a9993e364706816aba3e25717850c26c9cd0d89d") +(git-test + "sha1 crossing the padding boundary" + (git/sha1-hex "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq") + "84983e441c3bd26ebaae4aa1f95129e5e54670f1") +(define + gex-rep + (fn + (s n) + (if (= n 0) "" (str s (gex-rep s (- n 1)))))) +(git-test + "sha1 over multiple blocks" + (git/sha1-hex (gex-rep "a" 200)) + "e61cfffe0d9195a525fc6cf06ca2d77119c24a40") +(git-test + "raw digest is 20 bytes" + (= (string-length (git/sha1-raw "abc")) 20) + true) +(git-test + "hex->raw round-trips through byte codes" + (char-code (substring (git/hex->raw "ff00a1") 1 2)) + 0) + +; ---- fixture (mirrors the golden git repo exactly) ---- +(define gex-db (persist/mem-backend)) +(define gex (git/repo gex-db)) +(define gex-hello (git/write-blob gex "hello\n")) +(define gex-sub (git/write-blob gex "sub\n")) +(define gex-hello2 (git/write-blob gex "hello2\n")) +(define gex-subtxt (git/write-blob gex "not a dir\n")) +(define + gex-tsub + (git/write + gex + (git/tree (assoc {} "c.txt" (git/tree-entry "blob" gex-sub))))) +(define + gex-t1 + (git/write + gex + (git/tree + (assoc + (assoc {} "a.txt" (git/tree-entry "blob" gex-hello)) + "sub" + (git/tree-entry "tree" gex-tsub))))) +(define + gex-t2 + (git/write + gex + (git/tree + (assoc + (assoc {} "a.txt" (git/tree-entry "blob" gex-hello2)) + "sub" + (git/tree-entry "tree" gex-tsub))))) +(define + gex-t3 + (git/write + gex + (git/tree + (assoc + (assoc + (assoc {} "a.txt" (git/tree-entry "blob" gex-hello)) + "sub" + (git/tree-entry "tree" gex-tsub)) + "sub.txt" + (git/tree-entry "blob" gex-subtxt))))) +(define + gex-t4 + (git/write + gex + (git/tree + (assoc + {} + "x.sh" + (merge (git/tree-entry "blob" gex-hello) {:mode "100755"}))))) +(define gex-c1 (git/write gex (git/commit gex-t1 (list) {:message "c1" :tz "+0000" :email "ada@sx" :time 1700000000 :author "ada"}))) +(define gex-c2 (git/write gex (git/commit gex-t2 (list gex-c1) {:message "c2" :tz "+0000" :email "ada@sx" :time 1700000100 :author "ada"}))) +(define gex-tag (git/write gex (git/tag gex-c1 "v1" {:message "first" :tz "+0000" :email "ada@sx" :time 1700000200 :author "ada"}))) + +; ---- blob payloads ---- +(git-test + "blob export matches git hash-object" + (git/export-sha gex gex-hello) + "ce013625030ba8dba906f756967f9e9ca394464a") +(git-test + "empty blob is git's famous e69de29" + (git/export-sha gex (git/write-blob gex "")) + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391") +(git-test + "blob payload bytes are ' NUL'" + (equal? + (get (git/export gex gex-hello) :bytes) + (str "blob 6" git/export-nul "hello\n")) + true) + +; ---- tree payloads (git sort rules, raw child shas) ---- +(git-test + "leaf tree matches git mktree" + (git/export-sha gex gex-tsub) + "2282cb13a4b7999406280aac69e4fc45260fb909") +(git-test + "nested tree matches git mktree" + (git/export-sha gex gex-t1) + "77918032f1f02d785d3bc222ab29b4969cd83854") +(git-test + "modified tree matches git mktree" + (git/export-sha gex gex-t2) + "aaf1022f60da7b14837d52232a75fa3fc2d3e3a7") +(git-test + "directories sort as 'name/': sub.txt before dir sub" + (git/export-sha gex gex-t3) + "58c19e599aa988a4ee6fba065f8801c700777a3d") +(git-test + "entry :mode override (100755) is honored" + (git/export-sha gex gex-t4) + "54925a269ee97325d7da275bda4250d83b338e65") +(git-test + "the empty tree is git's 4b825dc" + (git/export-sha gex (git/write gex (git/tree {}))) + "4b825dc642cb6eb9a060e54bf8d69288fbee4904") + +; ---- commit payloads ---- +(git-test + "root commit matches git commit-tree" + (git/export-sha gex gex-c1) + "baeeb137fc255e62c1e3b980e70b2d8dd2be6e83") +(git-test + "child commit matches git commit-tree -p" + (git/export-sha gex gex-c2) + "a8eac1e101bf4f6b7d614a4384592a981a67ce92") +(git-test + "commit body is byte-exact" + (get (git/export gex gex-c1) :bytes) + (str + "commit 127" + git/export-nul + "tree 77918032f1f02d785d3bc222ab29b4969cd83854\n" + "author ada 1700000000 +0000\n" + "committer ada 1700000000 +0000\n" + "\n" + "c1\n")) + +; ---- tag payload ---- +(git-test + "annotated tag matches git hash-object -t tag" + (git/export-sha gex gex-tag) + "b28ebfd4213f10fd0c2127d1d14a26179fcf12e0") +(git-test + "tag body names the target type" + (contains? (get (git/export gex gex-tag) :bytes) "type commit") + true) + +; ---- closure + set ---- +(git-test + "closure of the child commit covers all 8 reachable objects" + (len (keys (git/export-closure gex gex-c2))) + 8) +(git-test + "closure of the tag reaches through the commit" + (len (keys (git/export-closure gex gex-tag))) + 6) +(git-test + "export-set is keyed by git sha with the head marked" + (let + ((s (git/export-set gex gex-c2))) + (list + (get s :head) + (get + (get (get s :objects) "baeeb137fc255e62c1e3b980e70b2d8dd2be6e83") + :type))) + (list "a8eac1e101bf4f6b7d614a4384592a981a67ce92" "commit")) + +; ---- defaults + trailing newline rule ---- +(git-test + "idents default deterministically" + (contains? + (get + (git/export gex (git/write gex (git/commit gex-t1 (list) {:message "m\n"}))) + :bytes) + "author sx 0 +0000") + true) +(git-test + "message gains a trailing newline when missing" + (let + ((e1 (git/export gex (git/write gex (git/commit gex-t1 (list) {:message "m"})))) + (e2 + (git/export + gex + (git/write gex (git/commit gex-t1 (list) {:message "m\n" :x 1}))))) + (equal? (get e1 :sha1) (get e2 :sha1))) + true)