From b9afe671ae539e480e2c6b80fb00dfb07e973274 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 11:53:29 +0000 Subject: [PATCH] artdag: Phase 2 Analyze on Datalog + 16 tests analyze.sx projects DAG edges to (edge in out) facts and runs recursive reachable rules for deps-of/dependents-of/reachable-from/ancestors-of, plus dirty-closure (dirty(Y):-edge(X,Y),dirty(X)) for incremental recompute. Keystone: changing a mid node dirties only it + downstream. analyze 16/16, total 36/36. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/artdag/analyze.sx | 88 ++++++++++++++++++++++++++ lib/artdag/conformance.sh | 12 +++- lib/artdag/scoreboard.json | 7 ++- lib/artdag/scoreboard.md | 3 +- lib/artdag/tests/analyze.sx | 119 ++++++++++++++++++++++++++++++++++++ plans/artdag-on-sx.md | 19 ++++-- 6 files changed, 239 insertions(+), 9 deletions(-) create mode 100644 lib/artdag/analyze.sx create mode 100644 lib/artdag/tests/analyze.sx diff --git a/lib/artdag/analyze.sx b/lib/artdag/analyze.sx new file mode 100644 index 00000000..03ac4a12 --- /dev/null +++ b/lib/artdag/analyze.sx @@ -0,0 +1,88 @@ +; lib/artdag/analyze.sx — Phase 2: Analyze on Datalog. +; Project the DAG's edges into a Datalog db and answer dependency questions +; (deps, dependents, transitive reachability) plus dirty-closure propagation +; as recursive Datalog — the acl/relations reachability shape. Depends on +; lib/artdag/dag.sx and the lib/datalog/ public API. + +; edge(input-id, node-id): data flows input -> node (input is a dependency). +(define + artdag/edge-facts + (fn + (dag) + (reduce + (fn + (acc id) + (concat + acc + (map + (fn (in) (list (quote edge) in id)) + (artdag/node-inputs (artdag/dag-get dag id))))) + (list) + (keys (artdag/dag-nodes dag))))) + +; reachable(X,Y): Y is a transitive dependent of X (forward, downstream). +(define + artdag/reach-rules + (quote + ((reachable X Y <- (edge X Y)) + (reachable X Z <- (edge X Y) (reachable Y Z))))) + +(define + artdag/analyze + (fn (dag) (dl-program-data (artdag/edge-facts dag) artdag/reach-rules))) + +; pull a single variable's bindings out of a subst list, sorted for determinism. +(define + artdag/-bindings + (fn + (substs var) + (artdag/sort-strings (map (fn (s) (get s var)) substs)))) + +; direct dependencies (inputs) of a node. +(define + artdag/deps-of + (fn + (db id) + (artdag/-bindings (dl-query db (list (quote edge) (quote X) id)) :X))) + +; direct dependents of a node. +(define + artdag/dependents-of + (fn + (db id) + (artdag/-bindings (dl-query db (list (quote edge) id (quote Y))) :Y))) + +; transitive dependents (everything downstream of a node). +(define + artdag/reachable-from + (fn + (db id) + (artdag/-bindings + (dl-query db (list (quote reachable) id (quote Y))) + :Y))) + +; transitive dependencies (everything upstream of a node). +(define + artdag/ancestors-of + (fn + (db id) + (artdag/-bindings + (dl-query db (list (quote reachable) (quote X) id)) + :X))) + +; dirty propagation: dirty(Y) :- edge(X,Y), dirty(X). Seeds are changed nodes. +(define artdag/dirty-rules (quote ((dirty Y <- (edge X Y) (dirty X))))) + +(define + artdag/dirty-seeds + (fn (changed) (map (fn (c) (list (quote dirty) c)) changed))) + +; transitive dirty closure of a set of changed node-ids: the changed nodes plus +; every transitive dependent that must recompute. Sorted, deduplicated. +(define + artdag/dirty-closure + (fn + (dag changed) + (let + ((db (dl-program-data (concat (artdag/edge-facts dag) (artdag/dirty-seeds changed)) artdag/dirty-rules))) + (artdag/-bindings (dl-query db (list (quote dirty) (quote X))) :X)))) diff --git a/lib/artdag/conformance.sh b/lib/artdag/conformance.sh index 5424ae49..63c902ec 100755 --- a/lib/artdag/conformance.sh +++ b/lib/artdag/conformance.sh @@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -SUITES=(dag) +SUITES=(dag analyze) OUT_JSON="lib/artdag/scoreboard.json" OUT_MD="lib/artdag/scoreboard.md" @@ -27,7 +27,17 @@ run_suite() { (epoch 1) (load "spec/stdlib.sx") (load "lib/r7rs.sx") +(load "lib/datalog/tokenizer.sx") +(load "lib/datalog/parser.sx") +(load "lib/datalog/unify.sx") +(load "lib/datalog/db.sx") +(load "lib/datalog/builtins.sx") +(load "lib/datalog/aggregates.sx") +(load "lib/datalog/strata.sx") +(load "lib/datalog/eval.sx") +(load "lib/datalog/api.sx") (load "lib/artdag/dag.sx") +(load "lib/artdag/analyze.sx") (epoch 2) (eval "(define artdag-test-pass 0)") (eval "(define artdag-test-fail 0)") diff --git a/lib/artdag/scoreboard.json b/lib/artdag/scoreboard.json index 1ca0b565..46d9d6e8 100644 --- a/lib/artdag/scoreboard.json +++ b/lib/artdag/scoreboard.json @@ -1,8 +1,9 @@ { "suites": { - "dag": {"pass": 20, "fail": 0} + "dag": {"pass": 20, "fail": 0}, + "analyze": {"pass": 16, "fail": 0} }, - "total_pass": 20, + "total_pass": 36, "total_fail": 0, - "total": 20 + "total": 36 } diff --git a/lib/artdag/scoreboard.md b/lib/artdag/scoreboard.md index 34e8136d..43f3868d 100644 --- a/lib/artdag/scoreboard.md +++ b/lib/artdag/scoreboard.md @@ -5,4 +5,5 @@ _Generated by `lib/artdag/conformance.sh`_ | Suite | Pass | Fail | Total | |-------|-----:|-----:|------:| | dag | 20 | 0 | 20 | -| **Total** | **20** | **0** | **20** | +| analyze | 16 | 0 | 16 | +| **Total** | **36** | **0** | **36** | diff --git a/lib/artdag/tests/analyze.sx b/lib/artdag/tests/analyze.sx new file mode 100644 index 00000000..fc84f045 --- /dev/null +++ b/lib/artdag/tests/analyze.sx @@ -0,0 +1,119 @@ +; Phase 2 — Analyze on Datalog: deps/dependents/reachability + dirty closure. + +; diamond: a -> b, a -> c, (b,c) -> d +(define + an-D + (artdag/build + (list + (list "a" "load" (list) {}) + (list "b" "f" (list "a") {}) + (list "c" "g" (list "a") {}) + (list "d" "add" (list "b" "c") {} true)))) +(define an-db (artdag/analyze an-D)) +(define an-a (artdag/dag-id an-D "a")) +(define an-b (artdag/dag-id an-D "b")) +(define an-c (artdag/dag-id an-D "c")) +(define an-d (artdag/dag-id an-D "d")) + +; ---- direct deps / dependents ---- + +(artdag-test + "deps-of: direct inputs" + (artdag/deps-of an-db an-d) + (artdag/sort-strings (list an-b an-c))) + +(artdag-test "deps-of: leaf has none" (artdag/deps-of an-db an-a) (list)) + +(artdag-test + "dependents-of: direct consumers" + (artdag/dependents-of an-db an-a) + (artdag/sort-strings (list an-b an-c))) + +(artdag-test + "dependents-of: output has none" + (artdag/dependents-of an-db an-d) + (list)) + +; ---- transitive reachability ---- + +(artdag-test + "reachable-from: all downstream" + (artdag/reachable-from an-db an-a) + (artdag/sort-strings (list an-b an-c an-d))) + +(artdag-test + "reachable-from: mid node reaches output" + (artdag/reachable-from an-db an-b) + (list an-d)) + +(artdag-test + "ancestors-of: all upstream" + (artdag/ancestors-of an-db an-d) + (artdag/sort-strings (list an-a an-b an-c))) + +(artdag-test + "ancestors-of: leaf has none" + (artdag/ancestors-of an-db an-a) + (list)) + +; ---- deep chain ---- + +(define + ch-D + (artdag/build + (list + (list "a" "load" (list) {}) + (list "b" "f" (list "a") {}) + (list "c" "f" (list "b") {}) + (list "d" "f" (list "c") {})))) +(define ch-db (artdag/analyze ch-D)) + +(artdag-test + "deep chain: reachable-from leaf" + (artdag/reachable-from ch-db (artdag/dag-id ch-D "a")) + (artdag/sort-strings + (list + (artdag/dag-id ch-D "b") + (artdag/dag-id ch-D "c") + (artdag/dag-id ch-D "d")))) + +(artdag-test + "deep chain: ancestors of tip" + (artdag/ancestors-of ch-db (artdag/dag-id ch-D "d")) + (artdag/sort-strings + (list + (artdag/dag-id ch-D "a") + (artdag/dag-id ch-D "b") + (artdag/dag-id ch-D "c")))) + +; ---- dirty closure ---- + +(artdag-test + "dirty closure: change leaf dirties all" + (artdag/dirty-closure an-D (list an-a)) + (artdag/sort-strings (list an-a an-b an-c an-d))) + +(artdag-test + "dirty closure: change mid touches only downstream" + (artdag/dirty-closure an-D (list an-b)) + (artdag/sort-strings (list an-b an-d))) + +(artdag-test + "dirty closure: unaffected stay clean (count)" + (len (artdag/dirty-closure an-D (list an-b))) + 2) + +(artdag-test + "dirty closure: change output dirties only itself" + (artdag/dirty-closure an-D (list an-d)) + (list an-d)) + +(artdag-test + "dirty closure: multiple seeds union" + (artdag/dirty-closure an-D (list an-b an-c)) + (artdag/sort-strings (list an-b an-c an-d))) + +(artdag-test + "dirty closure: empty seed set" + (artdag/dirty-closure an-D (list)) + (list)) diff --git a/plans/artdag-on-sx.md b/plans/artdag-on-sx.md index bf5aed91..0ce869c1 100644 --- a/plans/artdag-on-sx.md +++ b/plans/artdag-on-sx.md @@ -30,7 +30,7 @@ edges. ## Status (rolling) -`bash lib/artdag/conformance.sh` → **20/20** (1 suite: dag) +`bash lib/artdag/conformance.sh` → **36/36** (2 suites: dag, analyze) ## Ground rules @@ -88,11 +88,11 @@ lib/artdag/optimize.sx lib/artdag/federation.sx ## Phase 2 — Analyze (Datalog) -- [ ] `lib/artdag/analyze.sx` — project edges to Datalog; `deps-of`, `dependents-of`, +- [x] `lib/artdag/analyze.sx` — project edges to Datalog; `deps-of`, `dependents-of`, transitive `reachable` (the recursive-reachability shape) -- [ ] **dirty propagation:** given a set of changed nodes, compute the transitive +- [x] **dirty propagation:** given a set of changed nodes, compute the transitive set of dependents that must recompute (`dirty-closure`) -- [ ] `lib/artdag/tests/analyze.sx` — deep chains, diamonds, dirty closure +- [x] `lib/artdag/tests/analyze.sx` — deep chains, diamonds, dirty closure correctness, unaffected nodes stay clean ## Phase 3 — Plan @@ -136,6 +136,17 @@ lib/artdag/optimize.sx lib/artdag/federation.sx ## Progress log +- **Phase 2 — Analyze on Datalog** (analyze suite 16/16, total 36/36). + `lib/artdag/analyze.sx`: `artdag/edge-facts` projects each `(input-id, node-id)` + pair to an `(edge ...)` fact; `artdag/analyze` builds a `dl-program-data` db with + recursive `reachable(X,Y) :- edge(X,Y); edge(X,Y),reachable(Y,Z)` (the acl/relations + reachability shape). Query helpers `deps-of`/`dependents-of` (direct), + `reachable-from` (transitive downstream), `ancestors-of` (transitive upstream), all + returning sorted id lists. `dirty-closure` builds a db with `dirty(Y) :- edge(X,Y), + dirty(X)` seeded by changed-node facts and returns the transitive forward closure — + keystone test confirms changing a mid node dirties only it + downstream, leaving + siblings/upstream clean. Content-ids work as opaque Datalog string constants. + - **Phase 1 — DAG model + content addressing** (dag suite 20/20). `lib/artdag/dag.sx`: node `{:op :inputs :params :commutative}`; `artdag/content-id` = `"node:"` + a deterministic canonical serialization of `(op, inputs, params)` with dict keys