From 4d5a60a7546ed01a7af31b061ea5f74d12277be2 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 3 Jul 2026 12:15:51 +0000 Subject: [PATCH] =?UTF-8?q?sx-git=20Phase=205:=20diff=20=E2=80=94=20Myers?= =?UTF-8?q?=20line=20diff=20+=20structural=20tree=20diff=20+=20unified=20r?= =?UTF-8?q?ender=20(TDD)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Myers O(ND) forward/backtrack over line vectors (dict-vec), edit script {:op eq|del|add :line}, reconstruction invariants both sides, paper example D=5 verified; unified hunks with context 3, merged ranges, exact header math for empty sides; tree/commit structural diff over flattened trees; whole-commit unified render. 27/27, total 159/159. Co-Authored-By: Claude Fable 5 --- lib/git/conformance.sh | 3 +- lib/git/diff.sx | 423 ++++++++++++++++++++++++++++++++++++++++ lib/git/scoreboard.json | 7 +- lib/git/scoreboard.md | 3 +- lib/git/tests/diff.sx | 164 ++++++++++++++++ 5 files changed, 595 insertions(+), 5 deletions(-) create mode 100644 lib/git/diff.sx create mode 100644 lib/git/tests/diff.sx diff --git a/lib/git/conformance.sh b/lib/git/conformance.sh index 84772a48..72876385 100755 --- a/lib/git/conformance.sh +++ b/lib/git/conformance.sh @@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -SUITES=(object ref dag worktree) +SUITES=(object ref dag worktree diff) OUT_JSON="lib/git/scoreboard.json" OUT_MD="lib/git/scoreboard.md" @@ -46,6 +46,7 @@ run_suite() { (load "lib/git/ref.sx") (load "lib/git/dag.sx") (load "lib/git/worktree.sx") +(load "lib/git/diff.sx") (epoch 2) (eval "(define git-test-pass 0)") (eval "(define git-test-fail 0)") diff --git a/lib/git/diff.sx b/lib/git/diff.sx new file mode 100644 index 00000000..4737f213 --- /dev/null +++ b/lib/git/diff.sx @@ -0,0 +1,423 @@ +; lib/git/diff.sx — sx-git Phase 5: structural tree diff + Myers line diff. +; Tree diff = files-diff over flattened trees (path -> blob cid). Blob diff = +; Myers O(ND) shortest edit script over lines, edit script ops +; {:op "eq"|"del"|"add" :line l}, rendered as unified hunks (context 3). +; Requires: lib/git/object.sx, lib/git/worktree.sx. + +; ---- lines <-> data ---- +(define + git/diff-take + (fn + (xs n) + (if + (or (= n 0) (empty? xs)) + (list) + (cons (first xs) (git/diff-take (rest xs) (- n 1)))))) + +(define + git/diff-lines + (fn + (s) + (let + ((parts (split s "\n"))) + (if + (and + (> (len parts) 0) + (equal? (nth parts (- (len parts) 1)) "")) + (git/diff-take parts (- (len parts) 1)) + parts)))) + +; index-keyed dict as an O(1) vector +(define + git/dvec + (fn + (xs) + (reduce + (fn (acc p) (assoc acc (str (first p)) (nth p 1))) + {} + (map-indexed (fn (i x) (list i x)) xs)))) +(define git/dget (fn (v i) (get v (str i)))) + +; ---- Myers forward pass ---- +; v: dict k(str) -> furthest x on diagonal k. Reads of k±1 always hit the +; previous round's parity, so in-round writes never corrupt the decision. +(define + git/myers-x + (fn + (v d k) + (if + (or + (= k (- 0 d)) + (and + (not (= k d)) + (< (git/dget v (- k 1)) (git/dget v (+ k 1))))) + (git/dget v (+ k 1)) + (+ (git/dget v (- k 1)) 1)))) + +(define + git/myers-snake + (fn + (av bv n m x y) + (if + (and (< x n) (< y m) (equal? (git/dget av x) (git/dget bv y))) + (git/myers-snake av bv n m (+ x 1) (+ y 1)) + (list x y)))) + +; one round d over k = -d, -d+2, ..., d; returns (v done?) +(define + git/myers-round + (fn + (av bv n m v d k) + (if + (> k d) + (list v false) + (let + ((sn (git/myers-snake av bv n m (git/myers-x v d k) (- (git/myers-x v d k) k)))) + (let + ((v2 (assoc v (str k) (first sn)))) + (if + (and (>= (first sn) n) (>= (nth sn 1) m)) + (list v2 true) + (git/myers-round av bv n m v2 d (+ k 2)))))))) + +; trace[d] = v entering round d; returns (trace D) +(define + git/myers-run + (fn + (av bv n m v d trace) + (let + ((trace2 (append trace (list v)))) + (let + ((res (git/myers-round av bv n m v d (- 0 d)))) + (if + (nth res 1) + (list trace2 d) + (git/myers-run av bv n m (first res) (+ d 1) trace2)))))) + +; ---- Myers backtrack: walk (n,m) back to (0,0), cons ops into forward order ---- +(define + git/myers-diag + (fn + (av x y px py ops) + (if + (and (> x px) (> y py)) + (git/myers-diag + av + (- x 1) + (- y 1) + px + py + (cons {:op "eq" :line (git/dget av (- x 1))} ops)) + (list x y ops)))) + +(define + git/myers-back + (fn + (av bv trace d x y ops) + (if + (< d 0) + ops + (let + ((v (nth trace d))) + (let + ((k (- x y))) + (let + ((pk (if (or (= k (- 0 d)) (and (not (= k d)) (< (git/dget v (- k 1)) (git/dget v (+ k 1))))) (+ k 1) (- k 1)))) + (let + ((px (git/dget v pk))) + (let + ((py (- px pk))) + (let + ((r (git/myers-diag av x y px py ops))) + (if + (> d 0) + (git/myers-back + av + bv + trace + (- d 1) + px + py + (if + (= (first r) px) + (cons {:op "add" :line (git/dget bv py)} (nth r 2)) + (cons {:op "del" :line (git/dget av px)} (nth r 2)))) + (nth r 2))))))))))) + +; ---- edit script over two strings ---- +(define + git/diff-script + (fn + (a-data b-data) + (let + ((al (git/diff-lines a-data)) (bl (git/diff-lines b-data))) + (let + ((rt (git/myers-run (git/dvec al) (git/dvec bl) (len al) (len bl) (assoc {} "1" 0) 0 (list)))) + (git/myers-back + (git/dvec al) + (git/dvec bl) + (first rt) + (nth rt 1) + (len al) + (len bl) + (list)))))) + +(define + git/diff-changes + (fn + (script) + (len (filter (fn (o) (not (equal? (get o :op) "eq"))) script)))) + +; reconstruction invariants: old = eq+del lines, new = eq+add lines +(define + git/diff-old-lines + (fn + (script) + (map + (fn (o) (get o :line)) + (filter (fn (o) (not (equal? (get o :op) "add"))) script)))) +(define + git/diff-new-lines + (fn + (script) + (map + (fn (o) (get o :line)) + (filter (fn (o) (not (equal? (get o :op) "del"))) script)))) + +; ---- unified rendering ---- +(define + git/diff-annotate + (fn + (script) + (nth + (reduce + (fn + (acc o) + (let + ((a (first acc)) + (b (nth acc 1)) + (out (nth acc 2))) + (cond + ((equal? (get o :op) "eq") + (list + (+ a 1) + (+ b 1) + (append out (list (merge o {:a a :b b}))))) + ((equal? (get o :op) "del") + (list + (+ a 1) + b + (append out (list (merge o {:a a :b b}))))) + (else + (list + a + (+ b 1) + (append out (list (merge o {:a a :b b})))))))) + (list 1 1 (list)) + script) + 2))) + +(define + git/diff-change-idxs + (fn + (script) + (map + (fn (p) (first p)) + (filter + (fn (p) (not (equal? (get (nth p 1) :op) "eq"))) + (map-indexed (fn (i o) (list i o)) script))))) + +(define + git/diff-merge-ranges + (fn + (ranges) + (reduce + (fn + (acc r) + (if + (empty? acc) + (list r) + (let + ((prev (nth acc (- (len acc) 1)))) + (if + (<= (first r) (+ (nth prev 1) 1)) + (append + (git/diff-take acc (- (len acc) 1)) + (list + (list + (first prev) + (max (nth prev 1) (nth r 1))))) + (append acc (list r)))))) + (list) + ranges))) + +(define + git/diff-hunk-ranges + (fn + (script ctx) + (git/diff-merge-ranges + (map + (fn + (i) + (list + (max 0 (- i ctx)) + (min (- (len script) 1) (+ i ctx)))) + (git/diff-change-idxs script))))) + +(define + git/diff-slice + (fn + (xs from to) + (map + (fn (p) (nth p 1)) + (filter + (fn (p) (and (>= (first p) from) (<= (first p) to))) + (map-indexed (fn (i x) (list i x)) xs))))) + +(define + git/diff-op-char + (fn + (op) + (cond ((equal? op "eq") " ") ((equal? op "del") "-") (else "+")))) + +(define + git/diff-hunk-render + (fn + (ops) + (let + ((acount (len (filter (fn (o) (not (equal? (get o :op) "add"))) ops))) + (bcount + (len (filter (fn (o) (not (equal? (get o :op) "del"))) ops)))) + (let + ((astart (if (= acount 0) (- (get (first ops) :a) 1) (get (first ops) :a))) + (bstart + (if + (= bcount 0) + (- (get (first ops) :b) 1) + (get (first ops) :b)))) + (str + "@@ -" + astart + "," + acount + " +" + bstart + "," + bcount + " @@\n" + (reduce + (fn + (acc o) + (str acc (git/diff-op-char (get o :op)) (get o :line) "\n")) + "" + ops)))))) + +(define + git/diff-unified + (fn + (a-data b-data) + (let + ((ann (git/diff-annotate (git/diff-script a-data b-data)))) + (reduce + (fn + (acc r) + (str + acc + (git/diff-hunk-render + (git/diff-slice ann (first r) (nth r 1))))) + "" + (git/diff-hunk-ranges ann 3))))) + +; ---- object-level diffs ---- +(define + git/blob-diff + (fn + (repo b1 b2) + (git/diff-script + (git/blob-data (git/read repo b1)) + (git/blob-data (git/read repo b2))))) + +(define + git/tree-diff + (fn + (repo t1 t2) + (git/files-diff (git/tree-flatten repo t1) (git/tree-flatten repo t2)))) + +(define + git/commit-diff + (fn + (repo c1 c2) + (git/tree-diff + repo + (git/commit-tree (git/read repo c1)) + (git/commit-tree (git/read repo c2))))) + +; ---- whole-commit unified render: added, deleted, then modified paths ---- +(define + git/diff-path-data + (fn + (repo flat path) + (if + (has-key? flat path) + (git/blob-data (git/read repo (get flat path))) + ""))) + +(define + git/commit-diff-unified + (fn + (repo c1 c2) + (let + ((f1 (git/tree-flatten repo (git/commit-tree (git/read repo c1)))) + (f2 (git/tree-flatten repo (git/commit-tree (git/read repo c2))))) + (let + ((d (git/files-diff f1 f2))) + (str + (reduce + (fn + (acc p) + (str + acc + "diff --sx a/" + p + " b/" + p + "\n--- /dev/null\n+++ b/" + p + "\n" + (git/diff-unified "" (git/diff-path-data repo f2 p)))) + "" + (get d :added)) + (reduce + (fn + (acc p) + (str + acc + "diff --sx a/" + p + " b/" + p + "\n--- a/" + p + "\n+++ /dev/null\n" + (git/diff-unified (git/diff-path-data repo f1 p) ""))) + "" + (get d :deleted)) + (reduce + (fn + (acc p) + (str + acc + "diff --sx a/" + p + " b/" + p + "\n--- a/" + p + "\n+++ b/" + p + "\n" + (git/diff-unified + (git/diff-path-data repo f1 p) + (git/diff-path-data repo f2 p)))) + "" + (get d :modified))))))) diff --git a/lib/git/scoreboard.json b/lib/git/scoreboard.json index 8317572d..dc7a8b28 100644 --- a/lib/git/scoreboard.json +++ b/lib/git/scoreboard.json @@ -3,9 +3,10 @@ "object": {"pass": 38, "fail": 0}, "ref": {"pass": 38, "fail": 0}, "dag": {"pass": 30, "fail": 0}, - "worktree": {"pass": 26, "fail": 0} + "worktree": {"pass": 26, "fail": 0}, + "diff": {"pass": 27, "fail": 0} }, - "total_pass": 132, + "total_pass": 159, "total_fail": 0, - "total": 132 + "total": 159 } diff --git a/lib/git/scoreboard.md b/lib/git/scoreboard.md index cfd7d435..1a56a213 100644 --- a/lib/git/scoreboard.md +++ b/lib/git/scoreboard.md @@ -8,4 +8,5 @@ _Generated by `lib/git/conformance.sh`_ | ref | 38 | 0 | 38 | | dag | 30 | 0 | 30 | | worktree | 26 | 0 | 26 | -| **Total** | **132** | **0** | **132** | +| diff | 27 | 0 | 27 | +| **Total** | **159** | **0** | **159** | diff --git a/lib/git/tests/diff.sx b/lib/git/tests/diff.sx new file mode 100644 index 00000000..ecbafa50 --- /dev/null +++ b/lib/git/tests/diff.sx @@ -0,0 +1,164 @@ +; Phase 5 — diff: Myers line diff (edit script + reconstruction invariants), +; unified hunk rendering, structural tree/commit diff. + +(define gdf-db (persist/mem-backend)) +(define gdf (git/repo gdf-db)) + +; ---- diff-lines ---- +(git-test + "lines drop the trailing newline slot" + (= (git/diff-lines "a\nb\n") (list "a" "b")) + true) +(git-test + "lines without trailing newline" + (= (git/diff-lines "a\nb") (list "a" "b")) + true) +(git-test "empty data has no lines" (= (git/diff-lines "") (list)) true) + +; ---- Myers edit script ---- +(git-test + "identical inputs are all-eq" + (git/diff-changes (git/diff-script "a\nb\nc\n" "a\nb\nc\n")) + 0) +(git-test + "identical inputs keep every line" + (len (git/diff-script "a\nb\nc\n" "a\nb\nc\n")) + 3) +(git-test + "empty vs empty is the empty script" + (= (git/diff-script "" "") (list)) + true) +(git-test + "single line replacement" + (= (git/diff-script "a" "b") (list {:op "del" :line "a"} {:op "add" :line "b"})) + true) +(git-test + "pure insertion script" + (= (git/diff-script "" "a\nb\n") (list {:op "add" :line "a"} {:op "add" :line "b"})) + true) +(git-test + "pure deletion script" + (= (git/diff-script "a\nb\n" "") (list {:op "del" :line "a"} {:op "del" :line "b"})) + true) +(git-test + "middle change keeps flanks eq" + (= + (git/diff-script "a\nb\nc\n" "a\nx\nc\n") + (list {:op "eq" :line "a"} {:op "del" :line "b"} {:op "add" :line "x"} {:op "eq" :line "c"})) + true) + +; Myers' paper example: ABCABBA -> CBABAC has a shortest edit script of 5 +(git-test + "ABCABBA/CBABAC shortest edit distance is 5" + (git/diff-changes (git/diff-script "A\nB\nC\nA\nB\nB\nA" "C\nB\nA\nB\nA\nC")) + 5) +(git-test + "script reconstructs the old side" + (= + (git/diff-old-lines (git/diff-script "A\nB\nC\nA\nB\nB\nA" "C\nB\nA\nB\nA\nC")) + (list "A" "B" "C" "A" "B" "B" "A")) + true) +(git-test + "script reconstructs the new side" + (= + (git/diff-new-lines (git/diff-script "A\nB\nC\nA\nB\nB\nA" "C\nB\nA\nB\nA\nC")) + (list "C" "B" "A" "B" "A" "C")) + true) +(git-test + "reconstruction holds for asymmetric edits" + (let + ((a "one\ntwo\nthree\nfour\n") (b "zero\ntwo\nfour\nfive\nsix\n")) + (and + (= (git/diff-old-lines (git/diff-script a b)) (git/diff-lines a)) + (= (git/diff-new-lines (git/diff-script a b)) (git/diff-lines b)))) + true) + +; ---- unified rendering ---- +(git-test + "unified: middle replacement, full context" + (git/diff-unified "a\nb\nc\n" "a\nx\nc\n") + "@@ -1,3 +1,3 @@\n a\n-b\n+x\n c\n") +(git-test + "unified: append at end" + (git/diff-unified "a\n" "a\nb\n") + "@@ -1,1 +1,2 @@\n a\n+b\n") +(git-test "unified: identical renders empty" (git/diff-unified "x\n" "x\n") "") +(git-test + "unified: creation from empty" + (git/diff-unified "" "a\nb\n") + "@@ -0,0 +1,2 @@\n+a\n+b\n") +(git-test + "unified: deletion to empty" + (git/diff-unified "a\nb\n" "") + "@@ -1,2 +0,0 @@\n-a\n-b\n") +(git-test + "unified: context trimmed to 3 lines" + (git/diff-unified "l1\nl2\nl3\nl4\nl5\nl6\nl7\nl8\nl9\n" "l1\nl2\nl3\nl4\nX\nl6\nl7\nl8\nl9\n") + "@@ -2,7 +2,7 @@\n l2\n l3\n l4\n-l5\n+X\n l6\n l7\n l8\n") +(git-test + "unified: distant changes split into two hunks" + (git/diff-unified + "l1\nl2\nl3\nl4\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\nl14\nl15\n" + "l1\nX\nl3\nl4\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\nY\nl15\n") + (str + "@@ -1,5 +1,5 @@\n l1\n-l2\n+X\n l3\n l4\n l5\n" + "@@ -11,5 +11,5 @@\n l11\n l12\n l13\n-l14\n+Y\n l15\n")) + +; ---- blob diff over the object store ---- +(git-test + "blob-diff reads both blobs" + (= + (git/blob-diff gdf (git/write-blob gdf "a\n") (git/write-blob gdf "b\n")) + (list {:op "del" :line "a"} {:op "add" :line "b"})) + true) + +; ---- structural tree/commit diff ---- +(define + gdf-t1 + (git/tree-from-files + gdf + (assoc + (assoc (assoc {} "a.txt" "1\n") "b.txt" "2\n") + "sub/c.txt" + "3\n"))) +(define + gdf-t2 + (git/tree-from-files + gdf + (assoc + (assoc (assoc {} "a.txt" "1\n") "b.txt" "2x\n") + "d.txt" + "new\n"))) +(define gdf-c1 (git/write gdf (git/commit gdf-t1 (list) {:message "c1"}))) +(define gdf-c2 (git/write gdf (git/commit gdf-t2 (list gdf-c1) {:message "c2"}))) + +(git-test + "tree-diff classifies added/modified/deleted" + (= (git/tree-diff gdf gdf-t1 gdf-t2) {:deleted (list "sub/c.txt") :modified (list "b.txt") :added (list "d.txt")}) + true) +(git-test + "tree-diff of a tree with itself is empty" + (= (git/tree-diff gdf gdf-t1 gdf-t1) {:deleted (list) :modified (list) :added (list)}) + true) +(git-test + "commit-diff goes through the commit trees" + (= (git/commit-diff gdf gdf-c1 gdf-c2) {:deleted (list "sub/c.txt") :modified (list "b.txt") :added (list "d.txt")}) + true) + +; ---- whole-commit unified render ---- +(git-test + "commit-diff-unified renders adds, deletes, then modifications" + (let + ((r (git/repo (persist/mem-backend)))) + (let + ((c1 (git/write r (git/commit (git/tree-from-files r (assoc {} "f.txt" "old\n")) (list) {:message "c1"})))) + (let + ((c2 (git/write r (git/commit (git/tree-from-files r (assoc (assoc {} "f.txt" "new\n") "g.txt" "hi\n")) (list c1) {:message "c2"})))) + (git/commit-diff-unified r c1 c2)))) + (str + "diff --sx a/g.txt b/g.txt\n--- /dev/null\n+++ b/g.txt\n@@ -0,0 +1,1 @@\n+hi\n" + "diff --sx a/f.txt b/f.txt\n--- a/f.txt\n+++ b/f.txt\n@@ -1,1 +1,1 @@\n-old\n+new\n")) +(git-test + "commit-diff-unified of identical commits is empty" + (git/commit-diff-unified gdf gdf-c1 gdf-c1) + "")