artdag: Phase 2 Analyze on Datalog + 16 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 57s

analyze.sx projects DAG edges to (edge in out) facts and runs recursive
reachable rules for deps-of/dependents-of/reachable-from/ancestors-of, plus
dirty-closure (dirty(Y):-edge(X,Y),dirty(X)) for incremental recompute. Keystone:
changing a mid node dirties only it + downstream. analyze 16/16, total 36/36.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 11:53:29 +00:00
parent e4a8dff9ba
commit b9afe671ae
6 changed files with 239 additions and 9 deletions

88
lib/artdag/analyze.sx Normal file
View File

@@ -0,0 +1,88 @@
; lib/artdag/analyze.sx — Phase 2: Analyze on Datalog.
; Project the DAG's edges into a Datalog db and answer dependency questions
; (deps, dependents, transitive reachability) plus dirty-closure propagation
; as recursive Datalog — the acl/relations reachability shape. Depends on
; lib/artdag/dag.sx and the lib/datalog/ public API.
; edge(input-id, node-id): data flows input -> node (input is a dependency).
(define
artdag/edge-facts
(fn
(dag)
(reduce
(fn
(acc id)
(concat
acc
(map
(fn (in) (list (quote edge) in id))
(artdag/node-inputs (artdag/dag-get dag id)))))
(list)
(keys (artdag/dag-nodes dag)))))
; reachable(X,Y): Y is a transitive dependent of X (forward, downstream).
(define
artdag/reach-rules
(quote
((reachable X Y <- (edge X Y))
(reachable X Z <- (edge X Y) (reachable Y Z)))))
(define
artdag/analyze
(fn (dag) (dl-program-data (artdag/edge-facts dag) artdag/reach-rules)))
; pull a single variable's bindings out of a subst list, sorted for determinism.
(define
artdag/-bindings
(fn
(substs var)
(artdag/sort-strings (map (fn (s) (get s var)) substs))))
; direct dependencies (inputs) of a node.
(define
artdag/deps-of
(fn
(db id)
(artdag/-bindings (dl-query db (list (quote edge) (quote X) id)) :X)))
; direct dependents of a node.
(define
artdag/dependents-of
(fn
(db id)
(artdag/-bindings (dl-query db (list (quote edge) id (quote Y))) :Y)))
; transitive dependents (everything downstream of a node).
(define
artdag/reachable-from
(fn
(db id)
(artdag/-bindings
(dl-query db (list (quote reachable) id (quote Y)))
:Y)))
; transitive dependencies (everything upstream of a node).
(define
artdag/ancestors-of
(fn
(db id)
(artdag/-bindings
(dl-query db (list (quote reachable) (quote X) id))
:X)))
; dirty propagation: dirty(Y) :- edge(X,Y), dirty(X). Seeds are changed nodes.
(define artdag/dirty-rules (quote ((dirty Y <- (edge X Y) (dirty X)))))
(define
artdag/dirty-seeds
(fn (changed) (map (fn (c) (list (quote dirty) c)) changed)))
; transitive dirty closure of a set of changed node-ids: the changed nodes plus
; every transitive dependent that must recompute. Sorted, deduplicated.
(define
artdag/dirty-closure
(fn
(dag changed)
(let
((db (dl-program-data (concat (artdag/edge-facts dag) (artdag/dirty-seeds changed)) artdag/dirty-rules)))
(artdag/-bindings (dl-query db (list (quote dirty) (quote X))) :X))))

View File

@@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then
exit 1
fi
SUITES=(dag)
SUITES=(dag analyze)
OUT_JSON="lib/artdag/scoreboard.json"
OUT_MD="lib/artdag/scoreboard.md"
@@ -27,7 +27,17 @@ run_suite() {
(epoch 1)
(load "spec/stdlib.sx")
(load "lib/r7rs.sx")
(load "lib/datalog/tokenizer.sx")
(load "lib/datalog/parser.sx")
(load "lib/datalog/unify.sx")
(load "lib/datalog/db.sx")
(load "lib/datalog/builtins.sx")
(load "lib/datalog/aggregates.sx")
(load "lib/datalog/strata.sx")
(load "lib/datalog/eval.sx")
(load "lib/datalog/api.sx")
(load "lib/artdag/dag.sx")
(load "lib/artdag/analyze.sx")
(epoch 2)
(eval "(define artdag-test-pass 0)")
(eval "(define artdag-test-fail 0)")

View File

@@ -1,8 +1,9 @@
{
"suites": {
"dag": {"pass": 20, "fail": 0}
"dag": {"pass": 20, "fail": 0},
"analyze": {"pass": 16, "fail": 0}
},
"total_pass": 20,
"total_pass": 36,
"total_fail": 0,
"total": 20
"total": 36
}

View File

@@ -5,4 +5,5 @@ _Generated by `lib/artdag/conformance.sh`_
| Suite | Pass | Fail | Total |
|-------|-----:|-----:|------:|
| dag | 20 | 0 | 20 |
| **Total** | **20** | **0** | **20** |
| analyze | 16 | 0 | 16 |
| **Total** | **36** | **0** | **36** |

119
lib/artdag/tests/analyze.sx Normal file
View File

@@ -0,0 +1,119 @@
; Phase 2 — Analyze on Datalog: deps/dependents/reachability + dirty closure.
; diamond: a -> b, a -> c, (b,c) -> d
(define
an-D
(artdag/build
(list
(list "a" "load" (list) {})
(list "b" "f" (list "a") {})
(list "c" "g" (list "a") {})
(list "d" "add" (list "b" "c") {} true))))
(define an-db (artdag/analyze an-D))
(define an-a (artdag/dag-id an-D "a"))
(define an-b (artdag/dag-id an-D "b"))
(define an-c (artdag/dag-id an-D "c"))
(define an-d (artdag/dag-id an-D "d"))
; ---- direct deps / dependents ----
(artdag-test
"deps-of: direct inputs"
(artdag/deps-of an-db an-d)
(artdag/sort-strings (list an-b an-c)))
(artdag-test "deps-of: leaf has none" (artdag/deps-of an-db an-a) (list))
(artdag-test
"dependents-of: direct consumers"
(artdag/dependents-of an-db an-a)
(artdag/sort-strings (list an-b an-c)))
(artdag-test
"dependents-of: output has none"
(artdag/dependents-of an-db an-d)
(list))
; ---- transitive reachability ----
(artdag-test
"reachable-from: all downstream"
(artdag/reachable-from an-db an-a)
(artdag/sort-strings (list an-b an-c an-d)))
(artdag-test
"reachable-from: mid node reaches output"
(artdag/reachable-from an-db an-b)
(list an-d))
(artdag-test
"ancestors-of: all upstream"
(artdag/ancestors-of an-db an-d)
(artdag/sort-strings (list an-a an-b an-c)))
(artdag-test
"ancestors-of: leaf has none"
(artdag/ancestors-of an-db an-a)
(list))
; ---- deep chain ----
(define
ch-D
(artdag/build
(list
(list "a" "load" (list) {})
(list "b" "f" (list "a") {})
(list "c" "f" (list "b") {})
(list "d" "f" (list "c") {}))))
(define ch-db (artdag/analyze ch-D))
(artdag-test
"deep chain: reachable-from leaf"
(artdag/reachable-from ch-db (artdag/dag-id ch-D "a"))
(artdag/sort-strings
(list
(artdag/dag-id ch-D "b")
(artdag/dag-id ch-D "c")
(artdag/dag-id ch-D "d"))))
(artdag-test
"deep chain: ancestors of tip"
(artdag/ancestors-of ch-db (artdag/dag-id ch-D "d"))
(artdag/sort-strings
(list
(artdag/dag-id ch-D "a")
(artdag/dag-id ch-D "b")
(artdag/dag-id ch-D "c"))))
; ---- dirty closure ----
(artdag-test
"dirty closure: change leaf dirties all"
(artdag/dirty-closure an-D (list an-a))
(artdag/sort-strings (list an-a an-b an-c an-d)))
(artdag-test
"dirty closure: change mid touches only downstream"
(artdag/dirty-closure an-D (list an-b))
(artdag/sort-strings (list an-b an-d)))
(artdag-test
"dirty closure: unaffected stay clean (count)"
(len (artdag/dirty-closure an-D (list an-b)))
2)
(artdag-test
"dirty closure: change output dirties only itself"
(artdag/dirty-closure an-D (list an-d))
(list an-d))
(artdag-test
"dirty closure: multiple seeds union"
(artdag/dirty-closure an-D (list an-b an-c))
(artdag/sort-strings (list an-b an-c an-d)))
(artdag-test
"dirty closure: empty seed set"
(artdag/dirty-closure an-D (list))
(list))

View File

@@ -30,7 +30,7 @@ edges.
## Status (rolling)
`bash lib/artdag/conformance.sh`**20/20** (1 suite: dag)
`bash lib/artdag/conformance.sh`**36/36** (2 suites: dag, analyze)
## Ground rules
@@ -88,11 +88,11 @@ lib/artdag/optimize.sx lib/artdag/federation.sx
## Phase 2 — Analyze (Datalog)
- [ ] `lib/artdag/analyze.sx` — project edges to Datalog; `deps-of`, `dependents-of`,
- [x] `lib/artdag/analyze.sx` — project edges to Datalog; `deps-of`, `dependents-of`,
transitive `reachable` (the recursive-reachability shape)
- [ ] **dirty propagation:** given a set of changed nodes, compute the transitive
- [x] **dirty propagation:** given a set of changed nodes, compute the transitive
set of dependents that must recompute (`dirty-closure`)
- [ ] `lib/artdag/tests/analyze.sx` — deep chains, diamonds, dirty closure
- [x] `lib/artdag/tests/analyze.sx` — deep chains, diamonds, dirty closure
correctness, unaffected nodes stay clean
## Phase 3 — Plan
@@ -136,6 +136,17 @@ lib/artdag/optimize.sx lib/artdag/federation.sx
## Progress log
- **Phase 2 — Analyze on Datalog** (analyze suite 16/16, total 36/36).
`lib/artdag/analyze.sx`: `artdag/edge-facts` projects each `(input-id, node-id)`
pair to an `(edge ...)` fact; `artdag/analyze` builds a `dl-program-data` db with
recursive `reachable(X,Y) :- edge(X,Y); edge(X,Y),reachable(Y,Z)` (the acl/relations
reachability shape). Query helpers `deps-of`/`dependents-of` (direct),
`reachable-from` (transitive downstream), `ancestors-of` (transitive upstream), all
returning sorted id lists. `dirty-closure` builds a db with `dirty(Y) :- edge(X,Y),
dirty(X)` seeded by changed-node facts and returns the transitive forward closure —
keystone test confirms changing a mid node dirties only it + downstream, leaving
siblings/upstream clean. Content-ids work as opaque Datalog string constants.
- **Phase 1 — DAG model + content addressing** (dag suite 20/20). `lib/artdag/dag.sx`:
node `{:op :inputs :params :commutative}`; `artdag/content-id` = `"node:"` + a
deterministic canonical serialization of `(op, inputs, params)` with dict keys