diff --git a/lib/git/conformance.sh b/lib/git/conformance.sh index 5d1e39ab..791d710a 100755 --- a/lib/git/conformance.sh +++ b/lib/git/conformance.sh @@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -SUITES=(object ref) +SUITES=(object ref dag) OUT_JSON="lib/git/scoreboard.json" OUT_MD="lib/git/scoreboard.md" @@ -32,8 +32,19 @@ run_suite() { (load "lib/persist/log.sx") (load "lib/persist/kv.sx") (load "lib/artdag/dag.sx") +(load "lib/datalog/tokenizer.sx") +(load "lib/datalog/parser.sx") +(load "lib/datalog/unify.sx") +(load "lib/datalog/db.sx") +(load "lib/datalog/builtins.sx") +(load "lib/datalog/aggregates.sx") +(load "lib/datalog/strata.sx") +(load "lib/datalog/eval.sx") +(load "lib/datalog/api.sx") +(load "lib/datalog/magic.sx") (load "lib/git/object.sx") (load "lib/git/ref.sx") +(load "lib/git/dag.sx") (epoch 2) (eval "(define git-test-pass 0)") (eval "(define git-test-fail 0)") diff --git a/lib/git/dag.sx b/lib/git/dag.sx new file mode 100644 index 00000000..c3a3f75d --- /dev/null +++ b/lib/git/dag.sx @@ -0,0 +1,180 @@ +; lib/git/dag.sx — sx-git Phase 3: the commit DAG as queries. +; The workhorse walks (log, ancestry, reachability, merge-base) run SX-side +; over parent edges read straight from commit objects. The Datalog bridge +; exports those edges as (git-parent child parent) facts under a deliberately +; MINIMAL two-rule ancestor closure (every dl-query re-saturates the ruleset — +; keep it lean, walk SX-side; see lib/relations/engine.sx for the precedent). +; Requires: lib/git/object.sx; datalog stack only for the git/dag-db bridge. + +(define + git/parents + (fn + (repo cid) + (let + ((c (git/read repo cid))) + (if (git/commit? c) (git/commit-parents c) (list))))) + +; ---- DFS postorder over parent edges, visited-set threaded through ---- +; returns (seen acc); acc lists each commit AFTER all its parents. +(define + git/dag-post + (fn + (repo cid seen acc) + (if + (has-key? seen cid) + (list seen acc) + (let + ((res (git/dag-post-list repo (git/parents repo cid) (assoc seen cid true) acc))) + (list (first res) (append (nth res 1) (list cid))))))) + +(define + git/dag-post-list + (fn + (repo cids seen acc) + (if + (empty? cids) + (list seen acc) + (let + ((res (git/dag-post repo (first cids) seen acc))) + (git/dag-post-list + repo + (rest cids) + (first res) + (nth res 1)))))) + +; all commits reachable from cid, INCLUDING cid (parents-first order) +(define + git/reachable + (fn + (repo cid) + (nth (git/dag-post repo cid {} (list)) 1))) + +(define + git/reachable-all + (fn + (repo cids) + (nth (git/dag-post-list repo cids {} (list)) 1))) + +; ---- log: topological, children before parents (reverse DFS postorder) ---- +(define + git/log + (fn (repo head-cid) (reverse (git/reachable repo head-cid)))) + +(define + git/log-messages + (fn + (repo head-cid) + (map + (fn (c) (git/commit-message (git/read repo c))) + (git/log repo head-cid)))) + +; ---- ancestry ---- +; proper ancestors of cid (excludes cid itself unless reachable via a cycle, +; which a well-formed commit DAG never has) +(define + git/ancestors + (fn + (repo cid) + (nth + (git/dag-post-list repo (git/parents repo cid) {} (list)) + 1))) + +(define + git/dag-member? + (fn (x xs) (reduce (fn (acc y) (or acc (equal? x y))) false xs))) + +; reflexive, like `git merge-base --is-ancestor A B`: is a an ancestor of b? +(define + git/is-ancestor? + (fn + (repo a b) + (if (equal? a b) true (git/dag-member? a (git/ancestors repo b))))) + +; ---- merge-base (LCA) ---- +(define + git/dag-set + (fn (xs) (reduce (fn (acc x) (assoc acc x true)) {} xs))) + +(define + git/common-ancestors + (fn + (repo a b) + (let + ((sb (git/dag-set (git/reachable repo b)))) + (filter (fn (c) (has-key? sb c)) (git/reachable repo a))))) + +; all best common ancestors: common ancestors dominated by no other common +; ancestor (c is dominated when it is a proper ancestor of another common d) +(define + git/merge-bases + (fn + (repo a b) + (let + ((common (git/common-ancestors repo a b))) + (artdag/sort-strings + (filter + (fn + (c) + (not + (reduce + (fn + (acc d) + (or + acc + (and + (not (equal? c d)) + (git/dag-member? c (git/ancestors repo d))))) + false + common))) + common))))) + +(define + git/merge-base + (fn + (repo a b) + (let + ((bs (git/merge-bases repo a b))) + (if (empty? bs) nil (first bs))))) + +; ---- Datalog bridge: parent edges as facts, minimal ancestor closure ---- +(define + git/dag-rules + (quote + ((git-anc X Y <- (git-parent X Y)) + (git-anc X Y <- (git-parent X Z) (git-anc Z Y))))) + +(define + git/dag-facts + (fn + (repo heads) + (reduce + (fn + (acc c) + (append + acc + (map + (fn (p) (list (quote git-parent) c p)) + (git/parents repo c)))) + (list) + (git/reachable-all repo heads)))) + +(define + git/dag-db + (fn + (repo heads) + (dl-program-data (git/dag-facts repo heads) git/dag-rules))) + +(define + git/ancestors-dl + (fn + (db cid) + (artdag/sort-strings + (map + (fn (s) (get s :Y)) + (dl-query db (list (quote git-anc) cid (quote Y))))))) + +(define + git/is-ancestor-dl? + (fn + (db a b) + (> (len (dl-query db (list (quote git-anc) b a))) 0))) diff --git a/lib/git/scoreboard.json b/lib/git/scoreboard.json index b9795544..27441a5b 100644 --- a/lib/git/scoreboard.json +++ b/lib/git/scoreboard.json @@ -1,9 +1,10 @@ { "suites": { "object": {"pass": 38, "fail": 0}, - "ref": {"pass": 38, "fail": 0} + "ref": {"pass": 38, "fail": 0}, + "dag": {"pass": 30, "fail": 0} }, - "total_pass": 76, + "total_pass": 106, "total_fail": 0, - "total": 76 + "total": 106 } diff --git a/lib/git/scoreboard.md b/lib/git/scoreboard.md index 8fc27ff7..b83dc3d5 100644 --- a/lib/git/scoreboard.md +++ b/lib/git/scoreboard.md @@ -6,4 +6,5 @@ _Generated by `lib/git/conformance.sh`_ |-------|-----:|-----:|------:| | object | 38 | 0 | 38 | | ref | 38 | 0 | 38 | -| **Total** | **76** | **0** | **76** | +| dag | 30 | 0 | 30 | +| **Total** | **106** | **0** | **106** | diff --git a/lib/git/tests/dag.sx b/lib/git/tests/dag.sx new file mode 100644 index 00000000..e68d3c97 --- /dev/null +++ b/lib/git/tests/dag.sx @@ -0,0 +1,172 @@ +; Phase 3 — dag: log (topo), ancestry, is-ancestor?, reachability, merge-base, +; and the minimal Datalog ancestor-closure bridge. +; Fixtures: a linear chain, a diamond merge, a criss-cross (two LCAs), +; and two unrelated roots. + +(define gdt-db (persist/mem-backend)) +(define gdt (git/repo gdt-db)) + +(define + gdt-commit! + (fn + (msg parents) + (git/write gdt (git/commit (git/write-blob gdt msg) parents {:message msg})))) + +; linear chain c1 <- c2 <- c3 +(define gdt-c1 (gdt-commit! "c1" (list))) +(define gdt-c2 (gdt-commit! "c2" (list gdt-c1))) +(define gdt-c3 (gdt-commit! "c3" (list gdt-c2))) + +; diamond: b1 <- p, b1 <- q, m = merge(p q) +(define gdt-b1 (gdt-commit! "b1" (list))) +(define gdt-p (gdt-commit! "p" (list gdt-b1))) +(define gdt-q (gdt-commit! "q" (list gdt-b1))) +(define gdt-m (gdt-commit! "m" (list gdt-p gdt-q))) + +; criss-cross: base <- a, base <- b; x = merge(a b), y = merge(a b) via +; distinct messages so x != y +(define gdt-base (gdt-commit! "base" (list))) +(define gdt-a (gdt-commit! "a" (list gdt-base))) +(define gdt-b (gdt-commit! "b" (list gdt-base))) +(define gdt-x (gdt-commit! "x" (list gdt-a gdt-b))) +(define gdt-y (gdt-commit! "y" (list gdt-a gdt-b))) + +; unrelated root +(define gdt-lone (gdt-commit! "lone" (list))) + +; ---- parents ---- +(git-test + "parents of a root commit" + (= (git/parents gdt gdt-c1) (list)) + true) +(git-test + "parents of a merge commit" + (= (git/parents gdt gdt-m) (list gdt-p gdt-q)) + true) +(git-test + "parents of a non-commit cid" + (= (git/parents gdt "sx1:junk") (list)) + true) + +; ---- reachability ---- +(git-test + "reachable includes self" + (git/dag-member? gdt-c3 (git/reachable gdt gdt-c3)) + true) +(git-test + "reachable spans the chain" + (= (git/reachable gdt gdt-c3) (list gdt-c1 gdt-c2 gdt-c3)) + true) +(git-test + "reachable covers both diamond legs" + (len (git/reachable gdt gdt-m)) + 4) +(git-test + "reachable-all merges from several heads without duplicates" + (len (git/reachable-all gdt (list gdt-x gdt-y))) + 5) + +; ---- log: children before parents ---- +(git-test + "log on a chain is newest-first" + (= (git/log gdt gdt-c3) (list gdt-c3 gdt-c2 gdt-c1)) + true) +(git-test + "log messages read newest-first" + (= (git/log-messages gdt gdt-c3) (list "c3" "c2" "c1")) + true) +(git-test + "log of a diamond is a valid topo order" + (= (git/log gdt gdt-m) (list gdt-m gdt-q gdt-p gdt-b1)) + true) +(git-test + "log of a root is just the root" + (= (git/log gdt gdt-c1) (list gdt-c1)) + true) + +; ---- ancestors / is-ancestor? ---- +(git-test + "ancestors excludes self" + (git/dag-member? gdt-c3 (git/ancestors gdt gdt-c3)) + false) +(git-test + "ancestors of the chain tip" + (= (git/ancestors gdt gdt-c3) (list gdt-c1 gdt-c2)) + true) +(git-test + "ancestors of a merge include both legs and the base" + (len (git/ancestors gdt gdt-m)) + 3) +(git-test + "is-ancestor? along the chain" + (git/is-ancestor? gdt gdt-c1 gdt-c3) + true) +(git-test + "is-ancestor? is directed" + (git/is-ancestor? gdt gdt-c3 gdt-c1) + false) +(git-test + "is-ancestor? is reflexive" + (git/is-ancestor? gdt gdt-c2 gdt-c2) + true) +(git-test + "is-ancestor? false across unrelated history" + (git/is-ancestor? gdt gdt-c1 gdt-lone) + false) + +; ---- merge-base ---- +(git-test + "merge-base of the diamond legs is the base" + (git/merge-base gdt gdt-p gdt-q) + gdt-b1) +(git-test + "merge-base when one side is an ancestor is that side" + (git/merge-base gdt gdt-c1 gdt-c3) + gdt-c1) +(git-test "merge-base with self" (git/merge-base gdt gdt-c3 gdt-c3) gdt-c3) +(git-test + "merge-base of unrelated commits is nil" + (git/merge-base gdt gdt-c3 gdt-lone) + nil) +(git-test + "merge-base is symmetric" + (equal? (git/merge-base gdt gdt-p gdt-q) (git/merge-base gdt gdt-q gdt-p)) + true) +(git-test + "criss-cross yields BOTH best common ancestors" + (= + (git/merge-bases gdt gdt-x gdt-y) + (artdag/sort-strings (list gdt-a gdt-b))) + true) +(git-test + "dominated common ancestor is not a merge-base" + (git/dag-member? gdt-base (git/merge-bases gdt gdt-x gdt-y)) + false) + +; ---- Datalog bridge ---- +(git-test + "dag-facts exports one fact per parent edge" + (len (git/dag-facts gdt (list gdt-m))) + 4) +(git-test + "datalog ancestors match the SX-side walk" + (let + ((db (git/dag-db gdt (list gdt-m)))) + (= + (git/ancestors-dl db gdt-m) + (artdag/sort-strings (git/ancestors gdt gdt-m)))) + true) +(git-test + "datalog is-ancestor? positive" + (git/is-ancestor-dl? (git/dag-db gdt (list gdt-m)) gdt-b1 gdt-m) + true) +(git-test + "datalog is-ancestor? negative" + (git/is-ancestor-dl? (git/dag-db gdt (list gdt-m)) gdt-m gdt-b1) + false) +(git-test + "datalog closure spans the chain" + (= + (git/ancestors-dl (git/dag-db gdt (list gdt-c3)) gdt-c3) + (artdag/sort-strings (list gdt-c1 gdt-c2))) + true)