diff --git a/lib/git/conformance.sh b/lib/git/conformance.sh index 81103d0e..3de3c7e0 100755 --- a/lib/git/conformance.sh +++ b/lib/git/conformance.sh @@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -SUITES=(object ref dag worktree diff merge porcelain export) +SUITES=(object ref dag worktree diff merge porcelain export import) OUT_JSON="lib/git/scoreboard.json" OUT_MD="lib/git/scoreboard.md" @@ -51,6 +51,7 @@ run_suite() { (load "lib/git/porcelain.sx") (load "lib/git/sha1.sx") (load "lib/git/export.sx") +(load "lib/git/import.sx") (epoch 2) (eval "(define git-test-pass 0)") (eval "(define git-test-fail 0)") diff --git a/lib/git/import.sx b/lib/git/import.sx new file mode 100644 index 00000000..43fdaf4a --- /dev/null +++ b/lib/git/import.sx @@ -0,0 +1,244 @@ +; lib/git/import.sx — git-wire IMPORT (inverse of export.sx). +; Parses loose-object payloads " \0" back into native +; objects, bottom-up over an export-set-shaped table {sha1 -> {:bytes ...}}. +; Wire round-trip (bytes -> native -> bytes) is byte-exact: messages kept +; verbatim, non-default tree modes preserved as entry :mode, committer +; fields stored only when they differ from the author (so export's defaults +; regenerate identical bytes). Native cids of imported blobs/trees with +; default modes equal the originals'. +; Requires: lib/git/object.sx, lib/git/sha1.sx, lib/git/export.sx. + +; ---- scanning ---- +(define + git/import-index-of + (fn + (s ch i n) + (cond + ((>= i n) -1) + ((equal? (substring s i (+ i 1)) ch) i) + (else (git/import-index-of s ch (+ i 1) n))))) + +(define + git/import-find + (fn (s ch from) (git/import-index-of s ch from (string-length s)))) + +(define + git/import-rfind + (fn + (s ch i) + (cond + ((< i 0) -1) + ((equal? (substring s i (+ i 1)) ch) i) + (else (git/import-rfind s ch (- i 1)))))) + +(define + git/import-find2 + (fn + (s i n) + (cond + ((> (+ i 2) n) -1) + ((equal? (substring s i (+ i 2)) "\n\n") i) + (else (git/import-find2 s (+ i 1) n))))) + +(define + git/raw->hex-go + (fn + (raw i n acc) + (if + (>= i n) + acc + (git/raw->hex-go + raw + (+ i 1) + n + (str + acc + (git/sha1-byte-hex (char-code (substring raw i (+ i 1))))))))) + +(define + git/raw->hex + (fn (raw) (git/raw->hex-go raw 0 (string-length raw) ""))) + +; ---- payload header ---- +(define + git/import-payload + (fn + (bytes) + (let + ((sp (git/import-find bytes " " 0)) + (z (git/import-find bytes git/export-nul 0))) + {:type (substring bytes 0 sp) :body (substring bytes (+ z 1) (string-length bytes))}))) + +; ---- tree body: " \0<20 raw sha>" sequence ---- +(define + git/import-tree-entries + (fn + (body i n acc) + (if + (>= i n) + acc + (let + ((sp (git/import-find body " " i))) + (let + ((z (git/import-find body git/export-nul sp))) + (git/import-tree-entries + body + (+ z 21) + n + (append acc (list {:name (substring body (+ sp 1) z) :sha (git/raw->hex (substring body (+ z 1) (+ z 21))) :mode (substring body i sp)})))))))) + +; ---- ident: "name time tz" ---- +(define + git/import-ident + (fn + (line) + (let + ((lt (git/import-rfind line "<" (- (string-length line) 1))) + (gt (git/import-rfind line ">" (- (string-length line) 1)))) + (let + ((rest (substring line (+ gt 2) (string-length line)))) + (let ((sp (git/import-find rest " " 0))) {:name (substring line 0 (max 0 (- lt 1))) :tz (substring rest (+ sp 1) (string-length rest)) :email (substring line (+ lt 1) gt) :time (parse-int (substring rest 0 sp))}))))) + +; ---- commit / tag bodies ---- +(define + git/import-headers + (fn + (body) + (let + ((cut (git/import-find2 body 0 (string-length body)))) + {:message (substring body (+ cut 2) (string-length body)) :lines (split (substring body 0 cut) "\n")}))) + +(define + git/import-commit-parse + (fn + (body) + (let + ((h (git/import-headers body))) + (reduce + (fn + (acc line) + (cond + ((starts-with? line "tree ") + (assoc + acc + :tree (substring line 5 (string-length line)))) + ((starts-with? line "parent ") + (assoc + acc + :parents (append + (get acc :parents) + (list (substring line 7 (string-length line)))))) + ((starts-with? line "author ") + (assoc + acc + :author (git/import-ident + (substring line 7 (string-length line))))) + ((starts-with? line "committer ") + (assoc + acc + :committer (git/import-ident + (substring line 10 (string-length line))))) + (else acc))) + {:message (get h :message) :parents (list)} + (get h :lines))))) + +(define + git/import-tag-parse + (fn + (body) + (let + ((h (git/import-headers body))) + (reduce + (fn + (acc line) + (cond + ((starts-with? line "object ") + (assoc + acc + :object (substring line 7 (string-length line)))) + ((starts-with? line "tag ") + (assoc + acc + :tag (substring line 4 (string-length line)))) + ((starts-with? line "tagger ") + (assoc + acc + :tagger (git/import-ident + (substring line 7 (string-length line))))) + (else acc))) + {:message (get h :message)} + (get h :lines))))) + +; ident dict -> native commit meta fields +(define git/import-author-meta (fn (a) {:tz (get a :tz) :email (get a :email) :time (get a :time) :author (get a :name)})) + +(define + git/import-commit-meta + (fn + (p) + (let + ((a (get p :author)) (c (get p :committer))) + (merge + (merge (git/import-author-meta a) {:message (get p :message)}) + (if (or (equal? c nil) (= a c)) {} {:committer (get c :name) :committer-tz (get c :tz) :committer-email (get c :email) :committer-time (get c :time)}))))) + +; ---- bottom-up import over a sha->{:bytes} table; memo: sha -> native cid ---- +(define + git/import-sha + (fn + (repo objects sha memo) + (if + (has-key? memo sha) + memo + (let + ((p (git/import-payload (get (get objects sha) :bytes)))) + (cond + ((equal? (get p :type) "blob") + (assoc memo sha (git/write-blob repo (get p :body)))) + ((equal? (get p :type) "tree") + (let + ((res (reduce (fn (acc pe) (let ((m2 (git/import-sha repo objects (get pe :sha) (first acc)))) (list m2 (assoc (nth acc 1) (get pe :name) (let ((base {:kind (if (equal? (get pe :mode) "40000") "tree" "blob") :cid (get m2 (get pe :sha))})) (if (or (equal? (get pe :mode) "40000") (equal? (get pe :mode) "100644")) base (merge base {:mode (get pe :mode)}))))))) (list memo {}) (git/import-tree-entries (get p :body) 0 (string-length (get p :body)) (list))))) + (assoc + (first res) + sha + (git/write repo (git/tree (nth res 1)))))) + ((equal? (get p :type) "commit") + (let + ((cp (git/import-commit-parse (get p :body)))) + (let + ((m2 (git/import-sha repo objects (get cp :tree) (reduce (fn (mm ps) (git/import-sha repo objects ps mm)) memo (get cp :parents))))) + (assoc + m2 + sha + (git/write + repo + (git/commit + (get m2 (get cp :tree)) + (map (fn (ps) (get m2 ps)) (get cp :parents)) + (git/import-commit-meta cp))))))) + ((equal? (get p :type) "tag") + (let + ((tp (git/import-tag-parse (get p :body)))) + (let + ((m2 (git/import-sha repo objects (get tp :object) memo))) + (assoc + m2 + sha + (git/write + repo + (git/tag + (get m2 (get tp :object)) + (get tp :tag) + (merge + (git/import-author-meta (get tp :tagger)) + {:message (get tp :message)}))))))) + (else memo)))))) + +; import a {:head :objects} set; returns the native cid of the head object +(define + git/import-set + (fn + (repo set) + (get + (git/import-sha repo (get set :objects) (get set :head) {}) + (get set :head)))) diff --git a/lib/git/scoreboard.json b/lib/git/scoreboard.json index 82a82845..7d2ddc5c 100644 --- a/lib/git/scoreboard.json +++ b/lib/git/scoreboard.json @@ -7,9 +7,10 @@ "diff": {"pass": 27, "fail": 0}, "merge": {"pass": 28, "fail": 0}, "porcelain": {"pass": 40, "fail": 0}, - "export": {"pass": 25, "fail": 0} + "export": {"pass": 25, "fail": 0}, + "import": {"pass": 15, "fail": 0} }, - "total_pass": 252, + "total_pass": 267, "total_fail": 0, - "total": 252 + "total": 267 } diff --git a/lib/git/scoreboard.md b/lib/git/scoreboard.md index 86db6a66..4e4491bd 100644 --- a/lib/git/scoreboard.md +++ b/lib/git/scoreboard.md @@ -12,4 +12,5 @@ _Generated by `lib/git/conformance.sh`_ | merge | 28 | 0 | 28 | | porcelain | 40 | 0 | 40 | | export | 25 | 0 | 25 | -| **Total** | **252** | **0** | **252** | +| import | 15 | 0 | 15 | +| **Total** | **267** | **0** | **267** | diff --git a/lib/git/tests/import.sx b/lib/git/tests/import.sx new file mode 100644 index 00000000..db36f295 --- /dev/null +++ b/lib/git/tests/import.sx @@ -0,0 +1,174 @@ +; Extension — git-wire import (inverse adapter). The law under test: +; export-set -> import-set -> export-set is BYTE-IDENTICAL (same head sha, +; same object table), and imported blobs/trees with default modes get the +; same native cids as the originals. + +(define gim-db (persist/mem-backend)) +(define gim (git/repo gim-db)) +(define gim-hello (git/write-blob gim "hello\n")) +(define gim-sub (git/write-blob gim "sub\n")) +(define + gim-tsub + (git/write + gim + (git/tree (assoc {} "c.txt" (git/tree-entry "blob" gim-sub))))) +(define + gim-t1 + (git/write + gim + (git/tree + (assoc + (assoc {} "a.txt" (git/tree-entry "blob" gim-hello)) + "sub" + (git/tree-entry "tree" gim-tsub))))) +(define gim-c1 (git/write gim (git/commit gim-t1 (list) {:message "c1" :tz "+0000" :email "ada@sx" :time 1700000000 :author "ada"}))) +(define + gim-c2 + (git/write + gim + (git/commit + (git/write + gim + (git/tree + (assoc + (assoc + {} + "a.txt" + (git/tree-entry "blob" (git/write-blob gim "hello2\n"))) + "sub" + (git/tree-entry "tree" gim-tsub)))) + (list gim-c1) + {:message "c2" :tz "+0000" :email "ada@sx" :time 1700000100 :author "ada"}))) + +; ---- parsing units ---- +(git-test + "payload splits type and body" + (git/import-payload (str "blob 6" git/export-nul "hello\n")) + {:type "blob" :body "hello\n"}) +(git-test + "ident parses name/email/time/tz" + (= (git/import-ident "ada lovelace 1700000000 +0100") {:name "ada lovelace" :tz "+0100" :email "ada@sx" :time 1700000000}) + true) +(git-test + "raw->hex inverts hex->raw" + (git/raw->hex (git/hex->raw "ce013625030ba8dba906f756967f9e9ca394464a")) + "ce013625030ba8dba906f756967f9e9ca394464a") +(git-test + "tree body parses mode/name/sha triples" + (= + (map + (fn (e) (get e :name)) + (git/import-tree-entries + (get + (git/import-payload (get (git/export gim gim-t1) :bytes)) + :body) + 0 + (string-length + (get + (git/import-payload (get (git/export gim gim-t1) :bytes)) + :body)) + (list))) + (list "a.txt" "sub")) + true) + +; ---- the round-trip law ---- +(define gim-set1 (git/export-set gim gim-c2)) +(define gim-fresh (git/repo-named (persist/mem-backend) "imported")) +(define gim-head (git/import-set gim-fresh gim-set1)) +(define gim-set2 (git/export-set gim-fresh gim-head)) + +(git-test + "wire round-trip: head sha is identical" + (get gim-set2 :head) + (get gim-set1 :head)) +(git-test + "wire round-trip: same number of objects" + (= + (len (keys (get gim-set2 :objects))) + (len (keys (get gim-set1 :objects)))) + true) +(git-test + "wire round-trip: every sha re-exports byte-identical" + (reduce + (fn + (acc sha) + (and + acc + (equal? + (get (get (get gim-set2 :objects) sha) :bytes) + (get (get (get gim-set1 :objects) sha) :bytes)))) + true + (keys (get gim-set1 :objects))) + true) + +; ---- native identity on the wire-expressible subset ---- +(git-test + "imported blob has the original native cid" + (git/has? gim-fresh gim-hello) + true) +(git-test + "imported default-mode tree has the original native cid" + (git/has? gim-fresh gim-t1) + true) +(git-test + "imported commit graph walks natively" + (= (git/log-messages gim-fresh gim-head) (list "c2\n" "c1\n")) + true) +(git-test + "imported parents map to native cids" + (= + (git/parents gim-fresh gim-head) + (list + (git/cid + (git/read gim-fresh (first (git/parents gim-fresh gim-head)))))) + true) + +; ---- mode + tag + committer round-trips ---- +(git-test + "non-default mode survives the round-trip" + (let + ((r (git/repo (persist/mem-backend)))) + (let + ((t (git/write r (git/tree (assoc {} "x.sh" (merge (git/tree-entry "blob" (git/write-blob r "hello\n")) {:mode "100755"})))))) + (let + ((s1 (git/export-set r t))) + (let + ((r2 (git/repo-named (persist/mem-backend) "i"))) + (get (git/export-set r2 (git/import-set r2 s1)) :head))))) + "54925a269ee97325d7da275bda4250d83b338e65") +(git-test + "annotated tag round-trips through the wire" + (let + ((tag (git/write gim (git/tag gim-c1 "v1" {:message "first" :tz "+0000" :email "ada@sx" :time 1700000200 :author "ada"})))) + (let + ((s1 (git/export-set gim tag))) + (let + ((r2 (git/repo-named (persist/mem-backend) "i"))) + (equal? + (get (git/export-set r2 (git/import-set r2 s1)) :head) + (get s1 :head))))) + true) +(git-test + "distinct committer survives the round-trip" + (let + ((c (git/write gim (git/commit gim-t1 (list) {:message "handoff" :committer "bob" :committer-tz "+0200" :tz "+0000" :committer-email "bob@sx" :email "ada@sx" :committer-time 1700000300 :time 1700000000 :author "ada"})))) + (let + ((s1 (git/export-set gim c))) + (let + ((r2 (git/repo-named (persist/mem-backend) "i"))) + (equal? + (get (git/export-set r2 (git/import-set r2 s1)) :head) + (get s1 :head))))) + true) +(git-test + "multi-line message round-trips byte-exact" + (let + ((c (git/write gim (git/commit gim-t1 (list) {:message "subject\n\nbody line one\nbody line two\n" :tz "+0000" :email "ada@sx" :time 1 :author "ada"})))) + (let + ((s1 (git/export-set gim c))) + (let + ((r2 (git/repo-named (persist/mem-backend) "i"))) + (equal? + (get (git/export-set r2 (git/import-set r2 s1)) :head) + (get s1 :head))))) + true)