sx-git Phase 5: diff — Myers line diff + structural tree diff + unified render (TDD)
Myers O(ND) forward/backtrack over line vectors (dict-vec), edit script
{:op eq|del|add :line}, reconstruction invariants both sides, paper example
D=5 verified; unified hunks with context 3, merged ranges, exact header
math for empty sides; tree/commit structural diff over flattened trees;
whole-commit unified render. 27/27, total 159/159.
Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
423
lib/git/diff.sx
Normal file
423
lib/git/diff.sx
Normal file
@@ -0,0 +1,423 @@
|
||||
; lib/git/diff.sx — sx-git Phase 5: structural tree diff + Myers line diff.
|
||||
; Tree diff = files-diff over flattened trees (path -> blob cid). Blob diff =
|
||||
; Myers O(ND) shortest edit script over lines, edit script ops
|
||||
; {:op "eq"|"del"|"add" :line l}, rendered as unified hunks (context 3).
|
||||
; Requires: lib/git/object.sx, lib/git/worktree.sx.
|
||||
|
||||
; ---- lines <-> data ----
|
||||
(define
|
||||
git/diff-take
|
||||
(fn
|
||||
(xs n)
|
||||
(if
|
||||
(or (= n 0) (empty? xs))
|
||||
(list)
|
||||
(cons (first xs) (git/diff-take (rest xs) (- n 1))))))
|
||||
|
||||
(define
|
||||
git/diff-lines
|
||||
(fn
|
||||
(s)
|
||||
(let
|
||||
((parts (split s "\n")))
|
||||
(if
|
||||
(and
|
||||
(> (len parts) 0)
|
||||
(equal? (nth parts (- (len parts) 1)) ""))
|
||||
(git/diff-take parts (- (len parts) 1))
|
||||
parts))))
|
||||
|
||||
; index-keyed dict as an O(1) vector
|
||||
(define
|
||||
git/dvec
|
||||
(fn
|
||||
(xs)
|
||||
(reduce
|
||||
(fn (acc p) (assoc acc (str (first p)) (nth p 1)))
|
||||
{}
|
||||
(map-indexed (fn (i x) (list i x)) xs))))
|
||||
(define git/dget (fn (v i) (get v (str i))))
|
||||
|
||||
; ---- Myers forward pass ----
|
||||
; v: dict k(str) -> furthest x on diagonal k. Reads of k±1 always hit the
|
||||
; previous round's parity, so in-round writes never corrupt the decision.
|
||||
(define
|
||||
git/myers-x
|
||||
(fn
|
||||
(v d k)
|
||||
(if
|
||||
(or
|
||||
(= k (- 0 d))
|
||||
(and
|
||||
(not (= k d))
|
||||
(< (git/dget v (- k 1)) (git/dget v (+ k 1)))))
|
||||
(git/dget v (+ k 1))
|
||||
(+ (git/dget v (- k 1)) 1))))
|
||||
|
||||
(define
|
||||
git/myers-snake
|
||||
(fn
|
||||
(av bv n m x y)
|
||||
(if
|
||||
(and (< x n) (< y m) (equal? (git/dget av x) (git/dget bv y)))
|
||||
(git/myers-snake av bv n m (+ x 1) (+ y 1))
|
||||
(list x y))))
|
||||
|
||||
; one round d over k = -d, -d+2, ..., d; returns (v done?)
|
||||
(define
|
||||
git/myers-round
|
||||
(fn
|
||||
(av bv n m v d k)
|
||||
(if
|
||||
(> k d)
|
||||
(list v false)
|
||||
(let
|
||||
((sn (git/myers-snake av bv n m (git/myers-x v d k) (- (git/myers-x v d k) k))))
|
||||
(let
|
||||
((v2 (assoc v (str k) (first sn))))
|
||||
(if
|
||||
(and (>= (first sn) n) (>= (nth sn 1) m))
|
||||
(list v2 true)
|
||||
(git/myers-round av bv n m v2 d (+ k 2))))))))
|
||||
|
||||
; trace[d] = v entering round d; returns (trace D)
|
||||
(define
|
||||
git/myers-run
|
||||
(fn
|
||||
(av bv n m v d trace)
|
||||
(let
|
||||
((trace2 (append trace (list v))))
|
||||
(let
|
||||
((res (git/myers-round av bv n m v d (- 0 d))))
|
||||
(if
|
||||
(nth res 1)
|
||||
(list trace2 d)
|
||||
(git/myers-run av bv n m (first res) (+ d 1) trace2))))))
|
||||
|
||||
; ---- Myers backtrack: walk (n,m) back to (0,0), cons ops into forward order ----
|
||||
(define
|
||||
git/myers-diag
|
||||
(fn
|
||||
(av x y px py ops)
|
||||
(if
|
||||
(and (> x px) (> y py))
|
||||
(git/myers-diag
|
||||
av
|
||||
(- x 1)
|
||||
(- y 1)
|
||||
px
|
||||
py
|
||||
(cons {:op "eq" :line (git/dget av (- x 1))} ops))
|
||||
(list x y ops))))
|
||||
|
||||
(define
|
||||
git/myers-back
|
||||
(fn
|
||||
(av bv trace d x y ops)
|
||||
(if
|
||||
(< d 0)
|
||||
ops
|
||||
(let
|
||||
((v (nth trace d)))
|
||||
(let
|
||||
((k (- x y)))
|
||||
(let
|
||||
((pk (if (or (= k (- 0 d)) (and (not (= k d)) (< (git/dget v (- k 1)) (git/dget v (+ k 1))))) (+ k 1) (- k 1))))
|
||||
(let
|
||||
((px (git/dget v pk)))
|
||||
(let
|
||||
((py (- px pk)))
|
||||
(let
|
||||
((r (git/myers-diag av x y px py ops)))
|
||||
(if
|
||||
(> d 0)
|
||||
(git/myers-back
|
||||
av
|
||||
bv
|
||||
trace
|
||||
(- d 1)
|
||||
px
|
||||
py
|
||||
(if
|
||||
(= (first r) px)
|
||||
(cons {:op "add" :line (git/dget bv py)} (nth r 2))
|
||||
(cons {:op "del" :line (git/dget av px)} (nth r 2))))
|
||||
(nth r 2)))))))))))
|
||||
|
||||
; ---- edit script over two strings ----
|
||||
(define
|
||||
git/diff-script
|
||||
(fn
|
||||
(a-data b-data)
|
||||
(let
|
||||
((al (git/diff-lines a-data)) (bl (git/diff-lines b-data)))
|
||||
(let
|
||||
((rt (git/myers-run (git/dvec al) (git/dvec bl) (len al) (len bl) (assoc {} "1" 0) 0 (list))))
|
||||
(git/myers-back
|
||||
(git/dvec al)
|
||||
(git/dvec bl)
|
||||
(first rt)
|
||||
(nth rt 1)
|
||||
(len al)
|
||||
(len bl)
|
||||
(list))))))
|
||||
|
||||
(define
|
||||
git/diff-changes
|
||||
(fn
|
||||
(script)
|
||||
(len (filter (fn (o) (not (equal? (get o :op) "eq"))) script))))
|
||||
|
||||
; reconstruction invariants: old = eq+del lines, new = eq+add lines
|
||||
(define
|
||||
git/diff-old-lines
|
||||
(fn
|
||||
(script)
|
||||
(map
|
||||
(fn (o) (get o :line))
|
||||
(filter (fn (o) (not (equal? (get o :op) "add"))) script))))
|
||||
(define
|
||||
git/diff-new-lines
|
||||
(fn
|
||||
(script)
|
||||
(map
|
||||
(fn (o) (get o :line))
|
||||
(filter (fn (o) (not (equal? (get o :op) "del"))) script))))
|
||||
|
||||
; ---- unified rendering ----
|
||||
(define
|
||||
git/diff-annotate
|
||||
(fn
|
||||
(script)
|
||||
(nth
|
||||
(reduce
|
||||
(fn
|
||||
(acc o)
|
||||
(let
|
||||
((a (first acc))
|
||||
(b (nth acc 1))
|
||||
(out (nth acc 2)))
|
||||
(cond
|
||||
((equal? (get o :op) "eq")
|
||||
(list
|
||||
(+ a 1)
|
||||
(+ b 1)
|
||||
(append out (list (merge o {:a a :b b})))))
|
||||
((equal? (get o :op) "del")
|
||||
(list
|
||||
(+ a 1)
|
||||
b
|
||||
(append out (list (merge o {:a a :b b})))))
|
||||
(else
|
||||
(list
|
||||
a
|
||||
(+ b 1)
|
||||
(append out (list (merge o {:a a :b b}))))))))
|
||||
(list 1 1 (list))
|
||||
script)
|
||||
2)))
|
||||
|
||||
(define
|
||||
git/diff-change-idxs
|
||||
(fn
|
||||
(script)
|
||||
(map
|
||||
(fn (p) (first p))
|
||||
(filter
|
||||
(fn (p) (not (equal? (get (nth p 1) :op) "eq")))
|
||||
(map-indexed (fn (i o) (list i o)) script)))))
|
||||
|
||||
(define
|
||||
git/diff-merge-ranges
|
||||
(fn
|
||||
(ranges)
|
||||
(reduce
|
||||
(fn
|
||||
(acc r)
|
||||
(if
|
||||
(empty? acc)
|
||||
(list r)
|
||||
(let
|
||||
((prev (nth acc (- (len acc) 1))))
|
||||
(if
|
||||
(<= (first r) (+ (nth prev 1) 1))
|
||||
(append
|
||||
(git/diff-take acc (- (len acc) 1))
|
||||
(list
|
||||
(list
|
||||
(first prev)
|
||||
(max (nth prev 1) (nth r 1)))))
|
||||
(append acc (list r))))))
|
||||
(list)
|
||||
ranges)))
|
||||
|
||||
(define
|
||||
git/diff-hunk-ranges
|
||||
(fn
|
||||
(script ctx)
|
||||
(git/diff-merge-ranges
|
||||
(map
|
||||
(fn
|
||||
(i)
|
||||
(list
|
||||
(max 0 (- i ctx))
|
||||
(min (- (len script) 1) (+ i ctx))))
|
||||
(git/diff-change-idxs script)))))
|
||||
|
||||
(define
|
||||
git/diff-slice
|
||||
(fn
|
||||
(xs from to)
|
||||
(map
|
||||
(fn (p) (nth p 1))
|
||||
(filter
|
||||
(fn (p) (and (>= (first p) from) (<= (first p) to)))
|
||||
(map-indexed (fn (i x) (list i x)) xs)))))
|
||||
|
||||
(define
|
||||
git/diff-op-char
|
||||
(fn
|
||||
(op)
|
||||
(cond ((equal? op "eq") " ") ((equal? op "del") "-") (else "+"))))
|
||||
|
||||
(define
|
||||
git/diff-hunk-render
|
||||
(fn
|
||||
(ops)
|
||||
(let
|
||||
((acount (len (filter (fn (o) (not (equal? (get o :op) "add"))) ops)))
|
||||
(bcount
|
||||
(len (filter (fn (o) (not (equal? (get o :op) "del"))) ops))))
|
||||
(let
|
||||
((astart (if (= acount 0) (- (get (first ops) :a) 1) (get (first ops) :a)))
|
||||
(bstart
|
||||
(if
|
||||
(= bcount 0)
|
||||
(- (get (first ops) :b) 1)
|
||||
(get (first ops) :b))))
|
||||
(str
|
||||
"@@ -"
|
||||
astart
|
||||
","
|
||||
acount
|
||||
" +"
|
||||
bstart
|
||||
","
|
||||
bcount
|
||||
" @@\n"
|
||||
(reduce
|
||||
(fn
|
||||
(acc o)
|
||||
(str acc (git/diff-op-char (get o :op)) (get o :line) "\n"))
|
||||
""
|
||||
ops))))))
|
||||
|
||||
(define
|
||||
git/diff-unified
|
||||
(fn
|
||||
(a-data b-data)
|
||||
(let
|
||||
((ann (git/diff-annotate (git/diff-script a-data b-data))))
|
||||
(reduce
|
||||
(fn
|
||||
(acc r)
|
||||
(str
|
||||
acc
|
||||
(git/diff-hunk-render
|
||||
(git/diff-slice ann (first r) (nth r 1)))))
|
||||
""
|
||||
(git/diff-hunk-ranges ann 3)))))
|
||||
|
||||
; ---- object-level diffs ----
|
||||
(define
|
||||
git/blob-diff
|
||||
(fn
|
||||
(repo b1 b2)
|
||||
(git/diff-script
|
||||
(git/blob-data (git/read repo b1))
|
||||
(git/blob-data (git/read repo b2)))))
|
||||
|
||||
(define
|
||||
git/tree-diff
|
||||
(fn
|
||||
(repo t1 t2)
|
||||
(git/files-diff (git/tree-flatten repo t1) (git/tree-flatten repo t2))))
|
||||
|
||||
(define
|
||||
git/commit-diff
|
||||
(fn
|
||||
(repo c1 c2)
|
||||
(git/tree-diff
|
||||
repo
|
||||
(git/commit-tree (git/read repo c1))
|
||||
(git/commit-tree (git/read repo c2)))))
|
||||
|
||||
; ---- whole-commit unified render: added, deleted, then modified paths ----
|
||||
(define
|
||||
git/diff-path-data
|
||||
(fn
|
||||
(repo flat path)
|
||||
(if
|
||||
(has-key? flat path)
|
||||
(git/blob-data (git/read repo (get flat path)))
|
||||
"")))
|
||||
|
||||
(define
|
||||
git/commit-diff-unified
|
||||
(fn
|
||||
(repo c1 c2)
|
||||
(let
|
||||
((f1 (git/tree-flatten repo (git/commit-tree (git/read repo c1))))
|
||||
(f2 (git/tree-flatten repo (git/commit-tree (git/read repo c2)))))
|
||||
(let
|
||||
((d (git/files-diff f1 f2)))
|
||||
(str
|
||||
(reduce
|
||||
(fn
|
||||
(acc p)
|
||||
(str
|
||||
acc
|
||||
"diff --sx a/"
|
||||
p
|
||||
" b/"
|
||||
p
|
||||
"\n--- /dev/null\n+++ b/"
|
||||
p
|
||||
"\n"
|
||||
(git/diff-unified "" (git/diff-path-data repo f2 p))))
|
||||
""
|
||||
(get d :added))
|
||||
(reduce
|
||||
(fn
|
||||
(acc p)
|
||||
(str
|
||||
acc
|
||||
"diff --sx a/"
|
||||
p
|
||||
" b/"
|
||||
p
|
||||
"\n--- a/"
|
||||
p
|
||||
"\n+++ /dev/null\n"
|
||||
(git/diff-unified (git/diff-path-data repo f1 p) "")))
|
||||
""
|
||||
(get d :deleted))
|
||||
(reduce
|
||||
(fn
|
||||
(acc p)
|
||||
(str
|
||||
acc
|
||||
"diff --sx a/"
|
||||
p
|
||||
" b/"
|
||||
p
|
||||
"\n--- a/"
|
||||
p
|
||||
"\n+++ b/"
|
||||
p
|
||||
"\n"
|
||||
(git/diff-unified
|
||||
(git/diff-path-data repo f1 p)
|
||||
(git/diff-path-data repo f2 p))))
|
||||
""
|
||||
(get d :modified)))))))
|
||||
Reference in New Issue
Block a user