sx-git Phase 5: diff — Myers line diff + structural tree diff + unified render (TDD)

Myers O(ND) forward/backtrack over line vectors (dict-vec), edit script
{:op eq|del|add :line}, reconstruction invariants both sides, paper example
D=5 verified; unified hunks with context 3, merged ranges, exact header
math for empty sides; tree/commit structural diff over flattened trees;
whole-commit unified render. 27/27, total 159/159.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-03 12:15:51 +00:00
parent 125d9f1398
commit 4d5a60a754
5 changed files with 595 additions and 5 deletions

View File

@@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then
exit 1 exit 1
fi fi
SUITES=(object ref dag worktree) SUITES=(object ref dag worktree diff)
OUT_JSON="lib/git/scoreboard.json" OUT_JSON="lib/git/scoreboard.json"
OUT_MD="lib/git/scoreboard.md" OUT_MD="lib/git/scoreboard.md"
@@ -46,6 +46,7 @@ run_suite() {
(load "lib/git/ref.sx") (load "lib/git/ref.sx")
(load "lib/git/dag.sx") (load "lib/git/dag.sx")
(load "lib/git/worktree.sx") (load "lib/git/worktree.sx")
(load "lib/git/diff.sx")
(epoch 2) (epoch 2)
(eval "(define git-test-pass 0)") (eval "(define git-test-pass 0)")
(eval "(define git-test-fail 0)") (eval "(define git-test-fail 0)")

423
lib/git/diff.sx Normal file
View File

@@ -0,0 +1,423 @@
; lib/git/diff.sx — sx-git Phase 5: structural tree diff + Myers line diff.
; Tree diff = files-diff over flattened trees (path -> blob cid). Blob diff =
; Myers O(ND) shortest edit script over lines, edit script ops
; {:op "eq"|"del"|"add" :line l}, rendered as unified hunks (context 3).
; Requires: lib/git/object.sx, lib/git/worktree.sx.
; ---- lines <-> data ----
(define
git/diff-take
(fn
(xs n)
(if
(or (= n 0) (empty? xs))
(list)
(cons (first xs) (git/diff-take (rest xs) (- n 1))))))
(define
git/diff-lines
(fn
(s)
(let
((parts (split s "\n")))
(if
(and
(> (len parts) 0)
(equal? (nth parts (- (len parts) 1)) ""))
(git/diff-take parts (- (len parts) 1))
parts))))
; index-keyed dict as an O(1) vector
(define
git/dvec
(fn
(xs)
(reduce
(fn (acc p) (assoc acc (str (first p)) (nth p 1)))
{}
(map-indexed (fn (i x) (list i x)) xs))))
(define git/dget (fn (v i) (get v (str i))))
; ---- Myers forward pass ----
; v: dict k(str) -> furthest x on diagonal k. Reads of k±1 always hit the
; previous round's parity, so in-round writes never corrupt the decision.
(define
git/myers-x
(fn
(v d k)
(if
(or
(= k (- 0 d))
(and
(not (= k d))
(< (git/dget v (- k 1)) (git/dget v (+ k 1)))))
(git/dget v (+ k 1))
(+ (git/dget v (- k 1)) 1))))
(define
git/myers-snake
(fn
(av bv n m x y)
(if
(and (< x n) (< y m) (equal? (git/dget av x) (git/dget bv y)))
(git/myers-snake av bv n m (+ x 1) (+ y 1))
(list x y))))
; one round d over k = -d, -d+2, ..., d; returns (v done?)
(define
git/myers-round
(fn
(av bv n m v d k)
(if
(> k d)
(list v false)
(let
((sn (git/myers-snake av bv n m (git/myers-x v d k) (- (git/myers-x v d k) k))))
(let
((v2 (assoc v (str k) (first sn))))
(if
(and (>= (first sn) n) (>= (nth sn 1) m))
(list v2 true)
(git/myers-round av bv n m v2 d (+ k 2))))))))
; trace[d] = v entering round d; returns (trace D)
(define
git/myers-run
(fn
(av bv n m v d trace)
(let
((trace2 (append trace (list v))))
(let
((res (git/myers-round av bv n m v d (- 0 d))))
(if
(nth res 1)
(list trace2 d)
(git/myers-run av bv n m (first res) (+ d 1) trace2))))))
; ---- Myers backtrack: walk (n,m) back to (0,0), cons ops into forward order ----
(define
git/myers-diag
(fn
(av x y px py ops)
(if
(and (> x px) (> y py))
(git/myers-diag
av
(- x 1)
(- y 1)
px
py
(cons {:op "eq" :line (git/dget av (- x 1))} ops))
(list x y ops))))
(define
git/myers-back
(fn
(av bv trace d x y ops)
(if
(< d 0)
ops
(let
((v (nth trace d)))
(let
((k (- x y)))
(let
((pk (if (or (= k (- 0 d)) (and (not (= k d)) (< (git/dget v (- k 1)) (git/dget v (+ k 1))))) (+ k 1) (- k 1))))
(let
((px (git/dget v pk)))
(let
((py (- px pk)))
(let
((r (git/myers-diag av x y px py ops)))
(if
(> d 0)
(git/myers-back
av
bv
trace
(- d 1)
px
py
(if
(= (first r) px)
(cons {:op "add" :line (git/dget bv py)} (nth r 2))
(cons {:op "del" :line (git/dget av px)} (nth r 2))))
(nth r 2)))))))))))
; ---- edit script over two strings ----
(define
git/diff-script
(fn
(a-data b-data)
(let
((al (git/diff-lines a-data)) (bl (git/diff-lines b-data)))
(let
((rt (git/myers-run (git/dvec al) (git/dvec bl) (len al) (len bl) (assoc {} "1" 0) 0 (list))))
(git/myers-back
(git/dvec al)
(git/dvec bl)
(first rt)
(nth rt 1)
(len al)
(len bl)
(list))))))
(define
git/diff-changes
(fn
(script)
(len (filter (fn (o) (not (equal? (get o :op) "eq"))) script))))
; reconstruction invariants: old = eq+del lines, new = eq+add lines
(define
git/diff-old-lines
(fn
(script)
(map
(fn (o) (get o :line))
(filter (fn (o) (not (equal? (get o :op) "add"))) script))))
(define
git/diff-new-lines
(fn
(script)
(map
(fn (o) (get o :line))
(filter (fn (o) (not (equal? (get o :op) "del"))) script))))
; ---- unified rendering ----
(define
git/diff-annotate
(fn
(script)
(nth
(reduce
(fn
(acc o)
(let
((a (first acc))
(b (nth acc 1))
(out (nth acc 2)))
(cond
((equal? (get o :op) "eq")
(list
(+ a 1)
(+ b 1)
(append out (list (merge o {:a a :b b})))))
((equal? (get o :op) "del")
(list
(+ a 1)
b
(append out (list (merge o {:a a :b b})))))
(else
(list
a
(+ b 1)
(append out (list (merge o {:a a :b b}))))))))
(list 1 1 (list))
script)
2)))
(define
git/diff-change-idxs
(fn
(script)
(map
(fn (p) (first p))
(filter
(fn (p) (not (equal? (get (nth p 1) :op) "eq")))
(map-indexed (fn (i o) (list i o)) script)))))
(define
git/diff-merge-ranges
(fn
(ranges)
(reduce
(fn
(acc r)
(if
(empty? acc)
(list r)
(let
((prev (nth acc (- (len acc) 1))))
(if
(<= (first r) (+ (nth prev 1) 1))
(append
(git/diff-take acc (- (len acc) 1))
(list
(list
(first prev)
(max (nth prev 1) (nth r 1)))))
(append acc (list r))))))
(list)
ranges)))
(define
git/diff-hunk-ranges
(fn
(script ctx)
(git/diff-merge-ranges
(map
(fn
(i)
(list
(max 0 (- i ctx))
(min (- (len script) 1) (+ i ctx))))
(git/diff-change-idxs script)))))
(define
git/diff-slice
(fn
(xs from to)
(map
(fn (p) (nth p 1))
(filter
(fn (p) (and (>= (first p) from) (<= (first p) to)))
(map-indexed (fn (i x) (list i x)) xs)))))
(define
git/diff-op-char
(fn
(op)
(cond ((equal? op "eq") " ") ((equal? op "del") "-") (else "+"))))
(define
git/diff-hunk-render
(fn
(ops)
(let
((acount (len (filter (fn (o) (not (equal? (get o :op) "add"))) ops)))
(bcount
(len (filter (fn (o) (not (equal? (get o :op) "del"))) ops))))
(let
((astart (if (= acount 0) (- (get (first ops) :a) 1) (get (first ops) :a)))
(bstart
(if
(= bcount 0)
(- (get (first ops) :b) 1)
(get (first ops) :b))))
(str
"@@ -"
astart
","
acount
" +"
bstart
","
bcount
" @@\n"
(reduce
(fn
(acc o)
(str acc (git/diff-op-char (get o :op)) (get o :line) "\n"))
""
ops))))))
(define
git/diff-unified
(fn
(a-data b-data)
(let
((ann (git/diff-annotate (git/diff-script a-data b-data))))
(reduce
(fn
(acc r)
(str
acc
(git/diff-hunk-render
(git/diff-slice ann (first r) (nth r 1)))))
""
(git/diff-hunk-ranges ann 3)))))
; ---- object-level diffs ----
(define
git/blob-diff
(fn
(repo b1 b2)
(git/diff-script
(git/blob-data (git/read repo b1))
(git/blob-data (git/read repo b2)))))
(define
git/tree-diff
(fn
(repo t1 t2)
(git/files-diff (git/tree-flatten repo t1) (git/tree-flatten repo t2))))
(define
git/commit-diff
(fn
(repo c1 c2)
(git/tree-diff
repo
(git/commit-tree (git/read repo c1))
(git/commit-tree (git/read repo c2)))))
; ---- whole-commit unified render: added, deleted, then modified paths ----
(define
git/diff-path-data
(fn
(repo flat path)
(if
(has-key? flat path)
(git/blob-data (git/read repo (get flat path)))
"")))
(define
git/commit-diff-unified
(fn
(repo c1 c2)
(let
((f1 (git/tree-flatten repo (git/commit-tree (git/read repo c1))))
(f2 (git/tree-flatten repo (git/commit-tree (git/read repo c2)))))
(let
((d (git/files-diff f1 f2)))
(str
(reduce
(fn
(acc p)
(str
acc
"diff --sx a/"
p
" b/"
p
"\n--- /dev/null\n+++ b/"
p
"\n"
(git/diff-unified "" (git/diff-path-data repo f2 p))))
""
(get d :added))
(reduce
(fn
(acc p)
(str
acc
"diff --sx a/"
p
" b/"
p
"\n--- a/"
p
"\n+++ /dev/null\n"
(git/diff-unified (git/diff-path-data repo f1 p) "")))
""
(get d :deleted))
(reduce
(fn
(acc p)
(str
acc
"diff --sx a/"
p
" b/"
p
"\n--- a/"
p
"\n+++ b/"
p
"\n"
(git/diff-unified
(git/diff-path-data repo f1 p)
(git/diff-path-data repo f2 p))))
""
(get d :modified)))))))

View File

@@ -3,9 +3,10 @@
"object": {"pass": 38, "fail": 0}, "object": {"pass": 38, "fail": 0},
"ref": {"pass": 38, "fail": 0}, "ref": {"pass": 38, "fail": 0},
"dag": {"pass": 30, "fail": 0}, "dag": {"pass": 30, "fail": 0},
"worktree": {"pass": 26, "fail": 0} "worktree": {"pass": 26, "fail": 0},
"diff": {"pass": 27, "fail": 0}
}, },
"total_pass": 132, "total_pass": 159,
"total_fail": 0, "total_fail": 0,
"total": 132 "total": 159
} }

View File

@@ -8,4 +8,5 @@ _Generated by `lib/git/conformance.sh`_
| ref | 38 | 0 | 38 | | ref | 38 | 0 | 38 |
| dag | 30 | 0 | 30 | | dag | 30 | 0 | 30 |
| worktree | 26 | 0 | 26 | | worktree | 26 | 0 | 26 |
| **Total** | **132** | **0** | **132** | | diff | 27 | 0 | 27 |
| **Total** | **159** | **0** | **159** |

164
lib/git/tests/diff.sx Normal file
View File

@@ -0,0 +1,164 @@
; Phase 5 — diff: Myers line diff (edit script + reconstruction invariants),
; unified hunk rendering, structural tree/commit diff.
(define gdf-db (persist/mem-backend))
(define gdf (git/repo gdf-db))
; ---- diff-lines ----
(git-test
"lines drop the trailing newline slot"
(= (git/diff-lines "a\nb\n") (list "a" "b"))
true)
(git-test
"lines without trailing newline"
(= (git/diff-lines "a\nb") (list "a" "b"))
true)
(git-test "empty data has no lines" (= (git/diff-lines "") (list)) true)
; ---- Myers edit script ----
(git-test
"identical inputs are all-eq"
(git/diff-changes (git/diff-script "a\nb\nc\n" "a\nb\nc\n"))
0)
(git-test
"identical inputs keep every line"
(len (git/diff-script "a\nb\nc\n" "a\nb\nc\n"))
3)
(git-test
"empty vs empty is the empty script"
(= (git/diff-script "" "") (list))
true)
(git-test
"single line replacement"
(= (git/diff-script "a" "b") (list {:op "del" :line "a"} {:op "add" :line "b"}))
true)
(git-test
"pure insertion script"
(= (git/diff-script "" "a\nb\n") (list {:op "add" :line "a"} {:op "add" :line "b"}))
true)
(git-test
"pure deletion script"
(= (git/diff-script "a\nb\n" "") (list {:op "del" :line "a"} {:op "del" :line "b"}))
true)
(git-test
"middle change keeps flanks eq"
(=
(git/diff-script "a\nb\nc\n" "a\nx\nc\n")
(list {:op "eq" :line "a"} {:op "del" :line "b"} {:op "add" :line "x"} {:op "eq" :line "c"}))
true)
; Myers' paper example: ABCABBA -> CBABAC has a shortest edit script of 5
(git-test
"ABCABBA/CBABAC shortest edit distance is 5"
(git/diff-changes (git/diff-script "A\nB\nC\nA\nB\nB\nA" "C\nB\nA\nB\nA\nC"))
5)
(git-test
"script reconstructs the old side"
(=
(git/diff-old-lines (git/diff-script "A\nB\nC\nA\nB\nB\nA" "C\nB\nA\nB\nA\nC"))
(list "A" "B" "C" "A" "B" "B" "A"))
true)
(git-test
"script reconstructs the new side"
(=
(git/diff-new-lines (git/diff-script "A\nB\nC\nA\nB\nB\nA" "C\nB\nA\nB\nA\nC"))
(list "C" "B" "A" "B" "A" "C"))
true)
(git-test
"reconstruction holds for asymmetric edits"
(let
((a "one\ntwo\nthree\nfour\n") (b "zero\ntwo\nfour\nfive\nsix\n"))
(and
(= (git/diff-old-lines (git/diff-script a b)) (git/diff-lines a))
(= (git/diff-new-lines (git/diff-script a b)) (git/diff-lines b))))
true)
; ---- unified rendering ----
(git-test
"unified: middle replacement, full context"
(git/diff-unified "a\nb\nc\n" "a\nx\nc\n")
"@@ -1,3 +1,3 @@\n a\n-b\n+x\n c\n")
(git-test
"unified: append at end"
(git/diff-unified "a\n" "a\nb\n")
"@@ -1,1 +1,2 @@\n a\n+b\n")
(git-test "unified: identical renders empty" (git/diff-unified "x\n" "x\n") "")
(git-test
"unified: creation from empty"
(git/diff-unified "" "a\nb\n")
"@@ -0,0 +1,2 @@\n+a\n+b\n")
(git-test
"unified: deletion to empty"
(git/diff-unified "a\nb\n" "")
"@@ -1,2 +0,0 @@\n-a\n-b\n")
(git-test
"unified: context trimmed to 3 lines"
(git/diff-unified "l1\nl2\nl3\nl4\nl5\nl6\nl7\nl8\nl9\n" "l1\nl2\nl3\nl4\nX\nl6\nl7\nl8\nl9\n")
"@@ -2,7 +2,7 @@\n l2\n l3\n l4\n-l5\n+X\n l6\n l7\n l8\n")
(git-test
"unified: distant changes split into two hunks"
(git/diff-unified
"l1\nl2\nl3\nl4\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\nl14\nl15\n"
"l1\nX\nl3\nl4\nl5\nl6\nl7\nl8\nl9\nl10\nl11\nl12\nl13\nY\nl15\n")
(str
"@@ -1,5 +1,5 @@\n l1\n-l2\n+X\n l3\n l4\n l5\n"
"@@ -11,5 +11,5 @@\n l11\n l12\n l13\n-l14\n+Y\n l15\n"))
; ---- blob diff over the object store ----
(git-test
"blob-diff reads both blobs"
(=
(git/blob-diff gdf (git/write-blob gdf "a\n") (git/write-blob gdf "b\n"))
(list {:op "del" :line "a"} {:op "add" :line "b"}))
true)
; ---- structural tree/commit diff ----
(define
gdf-t1
(git/tree-from-files
gdf
(assoc
(assoc (assoc {} "a.txt" "1\n") "b.txt" "2\n")
"sub/c.txt"
"3\n")))
(define
gdf-t2
(git/tree-from-files
gdf
(assoc
(assoc (assoc {} "a.txt" "1\n") "b.txt" "2x\n")
"d.txt"
"new\n")))
(define gdf-c1 (git/write gdf (git/commit gdf-t1 (list) {:message "c1"})))
(define gdf-c2 (git/write gdf (git/commit gdf-t2 (list gdf-c1) {:message "c2"})))
(git-test
"tree-diff classifies added/modified/deleted"
(= (git/tree-diff gdf gdf-t1 gdf-t2) {:deleted (list "sub/c.txt") :modified (list "b.txt") :added (list "d.txt")})
true)
(git-test
"tree-diff of a tree with itself is empty"
(= (git/tree-diff gdf gdf-t1 gdf-t1) {:deleted (list) :modified (list) :added (list)})
true)
(git-test
"commit-diff goes through the commit trees"
(= (git/commit-diff gdf gdf-c1 gdf-c2) {:deleted (list "sub/c.txt") :modified (list "b.txt") :added (list "d.txt")})
true)
; ---- whole-commit unified render ----
(git-test
"commit-diff-unified renders adds, deletes, then modifications"
(let
((r (git/repo (persist/mem-backend))))
(let
((c1 (git/write r (git/commit (git/tree-from-files r (assoc {} "f.txt" "old\n")) (list) {:message "c1"}))))
(let
((c2 (git/write r (git/commit (git/tree-from-files r (assoc (assoc {} "f.txt" "new\n") "g.txt" "hi\n")) (list c1) {:message "c2"}))))
(git/commit-diff-unified r c1 c2))))
(str
"diff --sx a/g.txt b/g.txt\n--- /dev/null\n+++ b/g.txt\n@@ -0,0 +1,1 @@\n+hi\n"
"diff --sx a/f.txt b/f.txt\n--- a/f.txt\n+++ b/f.txt\n@@ -1,1 +1,1 @@\n-old\n+new\n"))
(git-test
"commit-diff-unified of identical commits is empty"
(git/commit-diff-unified gdf gdf-c1 gdf-c1)
"")