diff --git a/lib/artdag/conformance.sh b/lib/artdag/conformance.sh index cb59c336..0d439b5a 100755 --- a/lib/artdag/conformance.sh +++ b/lib/artdag/conformance.sh @@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -SUITES=(dag analyze plan) +SUITES=(dag analyze plan execute) OUT_JSON="lib/artdag/scoreboard.json" OUT_MD="lib/artdag/scoreboard.md" @@ -36,9 +36,15 @@ run_suite() { (load "lib/datalog/strata.sx") (load "lib/datalog/eval.sx") (load "lib/datalog/api.sx") +(load "lib/persist/event.sx") +(load "lib/persist/backend.sx") +(load "lib/persist/log.sx") +(load "lib/persist/kv.sx") +(load "lib/persist/api.sx") (load "lib/artdag/dag.sx") (load "lib/artdag/analyze.sx") (load "lib/artdag/plan.sx") +(load "lib/artdag/execute.sx") (epoch 2) (eval "(define artdag-test-pass 0)") (eval "(define artdag-test-fail 0)") diff --git a/lib/artdag/execute.sx b/lib/artdag/execute.sx new file mode 100644 index 00000000..d0bc1d18 --- /dev/null +++ b/lib/artdag/execute.sx @@ -0,0 +1,82 @@ +; lib/artdag/execute.sx — Phase 4: interpret a plan with a content-addressed +; memo cache. A node's result is keyed by its content-id, so a node whose id is +; already in the cache is skipped (cache hit). Because changing a leaf changes +; the content-ids of its whole dirty closure, re-running recomputes exactly those +; nodes and cache-hits the rest — incremental recompute falls out of content +; addressing. Depends on dag.sx and plan.sx; the cache is a lib/persist/ backend. + +; runner: (fn (op params input-results) -> result). The injected effect interface. +; In production this performs the op (perform -> JAX/IPFS adapter); in tests it +; dispatches a pure SX op over its already-computed input results. + +; build a runner from a dict of op-name -> (fn (params inputs) -> result). +(define + artdag/op-table-runner + (fn (table) (fn (op params inputs) ((get table op) params inputs)))) + +; resolve an input id's result: this run's results first, then the warm cache. +(define + artdag/-input-result + (fn + (results cache in) + (if (has-key? results in) (get results in) (persist/kv-get cache in)))) + +(define + artdag/-exec-node + (fn + (dag runner cache acc id) + (let + ((node (artdag/dag-get dag id))) + (if + (persist/kv-has? cache id) + (assoc + acc + :results (assoc (get acc :results) id (persist/kv-get cache id)) + :hits (concat (get acc :hits) (list id))) + (let + ((inputs (map (fn (in) (artdag/-input-result (get acc :results) cache in)) (artdag/node-inputs node)))) + (let + ((result (runner (artdag/node-op node) (artdag/node-params node) inputs))) + (begin + (persist/kv-put cache id result) + (assoc + acc + :results (assoc (get acc :results) id result) + :recomputed (concat (get acc :recomputed) (list id)))))))))) + +; execute a plan against a memo cache, returning {:results :recomputed :hits}. +(define + artdag/execute + (fn + (dag plan runner cache) + (reduce + (fn (acc id) (artdag/-exec-node dag runner cache acc id)) + {:recomputed (list) :results {} :hits (list)} + (artdag/plan-flatten plan)))) + +; full run over every node, unlimited width. +(define + artdag/run + (fn + (dag runner cache) + (artdag/execute dag (artdag/plan dag 0) runner cache))) + +; incremental run: schedule only the dirty closure of the changed nodes. +(define + artdag/run-dirty + (fn + (dag changed runner cache) + (artdag/execute + dag + (artdag/plan-dirty dag changed 0) + runner + cache))) + +; ---- result inspection ---- + +(define artdag/result-of (fn (exec id) (get (get exec :results) id))) +(define + artdag/recomputed + (fn (exec) (artdag/sort-strings (get exec :recomputed)))) +(define artdag/recompute-count (fn (exec) (len (get exec :recomputed)))) +(define artdag/hit-count (fn (exec) (len (get exec :hits)))) diff --git a/lib/artdag/scoreboard.json b/lib/artdag/scoreboard.json index cb0b8677..d6412a3e 100644 --- a/lib/artdag/scoreboard.json +++ b/lib/artdag/scoreboard.json @@ -2,9 +2,10 @@ "suites": { "dag": {"pass": 20, "fail": 0}, "analyze": {"pass": 16, "fail": 0}, - "plan": {"pass": 18, "fail": 0} + "plan": {"pass": 18, "fail": 0}, + "execute": {"pass": 15, "fail": 0} }, - "total_pass": 54, + "total_pass": 69, "total_fail": 0, - "total": 54 + "total": 69 } diff --git a/lib/artdag/scoreboard.md b/lib/artdag/scoreboard.md index 30ab2fc4..4a901cc4 100644 --- a/lib/artdag/scoreboard.md +++ b/lib/artdag/scoreboard.md @@ -7,4 +7,5 @@ _Generated by `lib/artdag/conformance.sh`_ | dag | 20 | 0 | 20 | | analyze | 16 | 0 | 16 | | plan | 18 | 0 | 18 | -| **Total** | **54** | **0** | **54** | +| execute | 15 | 0 | 15 | +| **Total** | **69** | **0** | **69** | diff --git a/lib/artdag/tests/execute.sx b/lib/artdag/tests/execute.sx new file mode 100644 index 00000000..bc36b8d1 --- /dev/null +++ b/lib/artdag/tests/execute.sx @@ -0,0 +1,188 @@ +; Phase 4 — Execute: effect interpreter + content-addressed memo + incremental. + +(define ex-RT (artdag/op-table-runner {:in (fn (params inputs) (get params :v)) :add (fn (params inputs) (+ (nth inputs 0) (nth inputs 1))) :inc (fn (params inputs) (+ 1 (first inputs)))})) + +; two-leaf diamond: p,q leaves; b=inc(p); c=inc(q); d=add(b,c) +(define + ex-D1 + (artdag/build + (list + (list "p" "in" (list) {:v 10}) + (list "q" "in" (list) {:v 20}) + (list "b" "inc" (list "p") {}) + (list "c" "inc" (list "q") {}) + (list "d" "add" (list "b" "c") {} true)))) + +; same shape, leaf q changed (20 -> 21) +(define + ex-D2 + (artdag/build + (list + (list "p" "in" (list) {:v 10}) + (list "q" "in" (list) {:v 21}) + (list "b" "inc" (list "p") {}) + (list "c" "inc" (list "q") {}) + (list "d" "add" (list "b" "c") {} true)))) + +; a different dag that shares the p->b subgraph with ex-D1, plus z=inc(b) +(define + ex-D3 + (artdag/build + (list + (list "p" "in" (list) {:v 10}) + (list "b" "inc" (list "p") {}) + (list "z" "inc" (list "b") {})))) + +; ---- full execution ---- + +(artdag-test + "full run: result is correct" + (let + ((cache (persist/open))) + (artdag/result-of + (artdag/run ex-D1 ex-RT cache) + (artdag/dag-id ex-D1 "d"))) + 32) + +(artdag-test + "full run: cold cache recomputes every node" + (let + ((cache (persist/open))) + (artdag/recompute-count (artdag/run ex-D1 ex-RT cache))) + 5) + +(artdag-test + "full run: cold cache has no hits" + (let + ((cache (persist/open))) + (artdag/hit-count (artdag/run ex-D1 ex-RT cache))) + 0) + +; ---- memoization ---- + +(artdag-test + "re-run unchanged: zero recomputes" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/recompute-count (artdag/run ex-D1 ex-RT cache)))) + 0) + +(artdag-test + "re-run unchanged: all cache hits" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/hit-count (artdag/run ex-D1 ex-RT cache)))) + 5) + +(artdag-test + "re-run unchanged: result preserved" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/result-of + (artdag/run ex-D1 ex-RT cache) + (artdag/dag-id ex-D1 "d")))) + 32) + +; ---- incremental recompute (the keystone) ---- + +(artdag-test + "leaf change recomputes only the dirty closure (count)" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/recompute-count (artdag/run ex-D2 ex-RT cache)))) + 3) + +(artdag-test + "leaf change: unchanged nodes are cache hits" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/hit-count (artdag/run ex-D2 ex-RT cache)))) + 2) + +(artdag-test + "leaf change: recomputed set is exactly q,c,d" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/recomputed (artdag/run ex-D2 ex-RT cache)))) + (artdag/sort-strings + (list + (artdag/dag-id ex-D2 "q") + (artdag/dag-id ex-D2 "c") + (artdag/dag-id ex-D2 "d")))) + +(artdag-test + "leaf change: untouched sibling p is reused" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/member? + (artdag/dag-id ex-D2 "p") + (get (artdag/run ex-D2 ex-RT cache) :hits)))) + true) + +(artdag-test + "leaf change: new result is correct" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/result-of + (artdag/run ex-D2 ex-RT cache) + (artdag/dag-id ex-D2 "d")))) + 33) + +; ---- explicit dirty-only execution ---- + +(artdag-test + "run-dirty: schedules only the changed closure" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/recompute-count + (artdag/run-dirty ex-D2 (list (artdag/dag-id ex-D2 "q")) ex-RT cache)))) + 3) + +; ---- cross-dag cache sharing (content addressing) ---- + +(artdag-test + "shared subgraph hits cache across different dags" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/recompute-count (artdag/run ex-D3 ex-RT cache)))) + 1) + +(artdag-test + "shared subgraph: p and b reused across dags" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/hit-count (artdag/run ex-D3 ex-RT cache)))) + 2) + +(artdag-test + "shared subgraph: z still computes correctly" + (let + ((cache (persist/open))) + (begin + (artdag/run ex-D1 ex-RT cache) + (artdag/result-of + (artdag/run ex-D3 ex-RT cache) + (artdag/dag-id ex-D3 "z")))) + 12) diff --git a/plans/artdag-on-sx.md b/plans/artdag-on-sx.md index 4d8ab8e7..7421af6a 100644 --- a/plans/artdag-on-sx.md +++ b/plans/artdag-on-sx.md @@ -30,7 +30,7 @@ edges. ## Status (rolling) -`bash lib/artdag/conformance.sh` → **54/54** (3 suites: dag, analyze, plan) +`bash lib/artdag/conformance.sh` → **69/69** (4 suites: dag, analyze, plan, execute) ## Ground rules @@ -105,13 +105,13 @@ lib/artdag/optimize.sx lib/artdag/federation.sx ## Phase 4 — Execute (incremental + memoized) -- [ ] `lib/artdag/execute.sx` — interpret a plan: each node op runs via `perform` +- [x] `lib/artdag/execute.sx` — interpret a plan: each node op runs via `perform` (mocked op in tests); results keyed by content-id -- [ ] **content-addressed memo cache** backed by `lib/persist/`: a node whose +- [x] **content-addressed memo cache** backed by `lib/persist/`: a node whose content-id already has a stored result is skipped (cache hit) -- [ ] **incremental execute:** re-running after a leaf change recomputes only the +- [x] **incremental execute:** re-running after a leaf change recomputes only the dirty closure; everything else is a cache hit -- [ ] `lib/artdag/tests/execute.sx` — full run, cache-hit on re-run, incremental +- [x] `lib/artdag/tests/execute.sx` — full run, cache-hit on re-run, incremental recompute touches only dirty nodes (assert recompute count) ## Phase 5 — Effect-pipeline optimization @@ -136,6 +136,18 @@ lib/artdag/optimize.sx lib/artdag/federation.sx ## Progress log +- **Phase 4 — Execute (incremental + memoized)** (execute suite 15/15, total 69/69). + `lib/artdag/execute.sx`: `artdag/execute` folds a plan, computing each node via an + injected `runner (op params input-results)` (production = `perform` to JAX/IPFS + adapter; tests = a pure op-table) and memoizing the result in a `lib/persist/` kv + backend keyed by **content-id**. A node whose content-id is already cached is a hit + (skipped). The keystone falls out of content addressing: changing a leaf changes the + ids of its whole dirty closure, so re-running the full plan against a warm cache + recomputes exactly those nodes and hits the rest — verified by recompute/hit counts + (5 cold → 0 on rerun → 3 after one leaf change, sibling reused). Cross-DAG sharing + verified: a different DAG containing a shared subgraph cache-hits it. `run`/`run-dirty` + helpers; `result-of`/`recompute-count`/`hit-count`/`recomputed` inspection. + - **Phase 3 — Plan** (plan suite 18/18, total 54/54). `lib/artdag/plan.sx`: `artdag/plan` schedules a dag into Kahn-wave topological batches — each batch's nodes have all in-scope deps satisfied by earlier batches, so they run in parallel.