From 228861215de019bccde8a15c52e88710c687d1a7 Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 12:08:12 +0000 Subject: [PATCH] =?UTF-8?q?artdag:=20Phase=205=20optimization=20=E2=80=94?= =?UTF-8?q?=20DCE=20+=20CSE=20+=20adjacent-op=20fusion=20+=2018=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit optimize.sx adds three result-preserving passes: dce (keep outputs + ancestors, preserve ids), cse (==build; structural sharing is free from content addressing), and fuse (collapse 1-to-1 fusible unary chains into an artdag/pipeline node fed by the chain head's input; leaves/fan-out/non-fusible ops never fuse). fusing-runner replays pipeline stages, output-equivalent to the unfused dag. optimize 18/18, total 87/87. Co-Authored-By: Claude Opus 4.8 (1M context) --- lib/artdag/conformance.sh | 3 +- lib/artdag/optimize.sx | 190 +++++++++++++++++++++++++++++++++++ lib/artdag/scoreboard.json | 7 +- lib/artdag/scoreboard.md | 3 +- lib/artdag/tests/optimize.sx | 176 ++++++++++++++++++++++++++++++++ plans/artdag-on-sx.md | 21 +++- 6 files changed, 391 insertions(+), 9 deletions(-) create mode 100644 lib/artdag/optimize.sx create mode 100644 lib/artdag/tests/optimize.sx diff --git a/lib/artdag/conformance.sh b/lib/artdag/conformance.sh index 0d439b5a..dcccfa4a 100755 --- a/lib/artdag/conformance.sh +++ b/lib/artdag/conformance.sh @@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then exit 1 fi -SUITES=(dag analyze plan execute) +SUITES=(dag analyze plan execute optimize) OUT_JSON="lib/artdag/scoreboard.json" OUT_MD="lib/artdag/scoreboard.md" @@ -45,6 +45,7 @@ run_suite() { (load "lib/artdag/analyze.sx") (load "lib/artdag/plan.sx") (load "lib/artdag/execute.sx") +(load "lib/artdag/optimize.sx") (epoch 2) (eval "(define artdag-test-pass 0)") (eval "(define artdag-test-fail 0)") diff --git a/lib/artdag/optimize.sx b/lib/artdag/optimize.sx new file mode 100644 index 00000000..022c4825 --- /dev/null +++ b/lib/artdag/optimize.sx @@ -0,0 +1,190 @@ +; lib/artdag/optimize.sx — Phase 5: result-preserving DAG rewrites. +; DCE — drop nodes not reachable upstream from the requested outputs. +; CSE — free from content addressing: structurally identical subexpressions +; already collapse to one node at build time (artdag/cse == build). +; Fusion — collapse a maximal 1-to-1 chain of fusible unary ops into a single +; "artdag/pipeline" node that replays the stages; output-equivalent. +; Depends on dag.sx and analyze.sx. + +; ---- dict helper ---- + +(define + artdag/-dict-filter + (fn + (d keep?) + (reduce + (fn (acc k) (if (keep? k (get d k)) (assoc acc k (get d k)) acc)) + {} + (keys d)))) + +(define + artdag/-union + (fn + (a b) + (reduce (fn (acc x) (if (artdag/member? x acc) acc (cons x acc))) a b))) + +; ---- dead-node elimination ---- +; keep only the outputs and their transitive dependencies; ids are preserved. +(define + artdag/dce + (fn + (dag outputs) + (let + ((db (artdag/analyze dag))) + (let + ((live (reduce (fn (acc out) (artdag/-union (artdag/-union acc (list out)) (artdag/ancestors-of db out))) (list) outputs))) + {:names (artdag/-dict-filter (artdag/dag-names dag) (fn (k v) (artdag/member? v live))) :order (filter (fn (id) (artdag/member? id live)) (artdag/dag-order dag)) :ok true :nodes (artdag/-dict-filter (artdag/dag-nodes dag) (fn (k v) (artdag/member? k live)))})))) + +; ---- common-subexpression elimination ---- +; structural sharing is inherent to content addressing: build already maps +; structurally identical specs to a single node/id. +(define artdag/cse artdag/build) + +; ---- adjacent-op fusion (entry-level rewrite) ---- + +(define artdag/pipeline-op "artdag/pipeline") + +(define + artdag/-name->entry + (fn + (entries) + (reduce + (fn (m e) (assoc m (artdag/entry-name e) e)) + {} + entries))) + +; name -> list of dependent names +(define + artdag/-deps-map + (fn + (entries) + (reduce + (fn + (m e) + (reduce + (fn + (mm i) + (assoc + mm + i + (cons + (artdag/entry-name e) + (if (has-key? mm i) (get mm i) (list))))) + m + (artdag/entry-inputs e))) + {} + entries))) + +(define artdag/-stage (fn (e) {:op (artdag/entry-op e) :params (artdag/entry-params e)})) + +; the single predecessor that `name` may absorb, or nil. Requires: name is a +; fusible unary op; its one input is a locally-defined fusible node whose ONLY +; dependent is name (so fusing cannot break sharing). +(define + artdag/-absorbs + (fn + (n->e deps fusible? name) + (let + ((e (get n->e name))) + (let + ((ins (artdag/entry-inputs e))) + (if + (= (len ins) 1) + (let + ((x (first ins))) + (if + (and + (has-key? n->e x) + (fusible? (artdag/entry-op e)) + (fusible? (artdag/entry-op (get n->e x))) + (= (get deps x) (list name))) + x + nil)) + nil))))) + +(define + artdag/-absorbed-set + (fn + (n->e deps fusible? names) + (reduce + (fn + (acc y) + (let + ((p (artdag/-absorbs n->e deps fusible? y))) + (if (nil? p) acc (cons p acc)))) + (list) + names))) + +; walk predecessors from a tail, building stages head->tail. +(define + artdag/-fuse-chain + (fn + (n->e deps fusible? cur stages) + (let + ((p (artdag/-absorbs n->e deps fusible? cur))) + (if + (nil? p) + {:stages (cons (artdag/-stage (get n->e cur)) stages) :head cur} + (artdag/-fuse-chain + n->e + deps + fusible? + p + (cons (artdag/-stage (get n->e cur)) stages)))))) + +(define + artdag/fuse-entries + (fn + (entries fusible?) + (let + ((n->e (artdag/-name->entry entries)) + (deps (artdag/-deps-map entries)) + (names (map artdag/entry-name entries))) + (let + ((absorbed (artdag/-absorbed-set n->e deps fusible? names))) + (map + (fn + (name) + (let + ((c (artdag/-fuse-chain n->e deps fusible? name (list)))) + (if + (> (len (get c :stages)) 1) + (list + name + artdag/pipeline-op + (artdag/entry-inputs (get n->e (get c :head))) + {:stages (get c :stages)}) + (get n->e name)))) + (filter (fn (name) (not (artdag/member? name absorbed))) names)))))) + +(define + artdag/fuse + (fn + (entries fusible?) + (artdag/build (artdag/fuse-entries entries fusible?)))) + +; runner that replays a fused pipeline over its single input, delegating each +; stage to a base runner; non-pipeline ops fall through unchanged. +(define + artdag/pipeline-run + (fn + (base-runner) + (fn + (params inputs) + (reduce + (fn + (val stage) + (base-runner (get stage :op) (get stage :params) (list val))) + (first inputs) + (get params :stages))))) + +(define + artdag/fusing-runner + (fn + (base-runner) + (fn + (op params inputs) + (if + (= op artdag/pipeline-op) + ((artdag/pipeline-run base-runner) params inputs) + (base-runner op params inputs))))) diff --git a/lib/artdag/scoreboard.json b/lib/artdag/scoreboard.json index d6412a3e..7be1073f 100644 --- a/lib/artdag/scoreboard.json +++ b/lib/artdag/scoreboard.json @@ -3,9 +3,10 @@ "dag": {"pass": 20, "fail": 0}, "analyze": {"pass": 16, "fail": 0}, "plan": {"pass": 18, "fail": 0}, - "execute": {"pass": 15, "fail": 0} + "execute": {"pass": 15, "fail": 0}, + "optimize": {"pass": 18, "fail": 0} }, - "total_pass": 69, + "total_pass": 87, "total_fail": 0, - "total": 69 + "total": 87 } diff --git a/lib/artdag/scoreboard.md b/lib/artdag/scoreboard.md index 4a901cc4..14f8c5c8 100644 --- a/lib/artdag/scoreboard.md +++ b/lib/artdag/scoreboard.md @@ -8,4 +8,5 @@ _Generated by `lib/artdag/conformance.sh`_ | analyze | 16 | 0 | 16 | | plan | 18 | 0 | 18 | | execute | 15 | 0 | 15 | -| **Total** | **69** | **0** | **69** | +| optimize | 18 | 0 | 18 | +| **Total** | **87** | **0** | **87** | diff --git a/lib/artdag/tests/optimize.sx b/lib/artdag/tests/optimize.sx new file mode 100644 index 00000000..6e275ad0 --- /dev/null +++ b/lib/artdag/tests/optimize.sx @@ -0,0 +1,176 @@ +; Phase 5 — optimization: DCE, CSE (content-id sharing), adjacent-op fusion. + +(define opt-BASE (artdag/op-table-runner {:in (fn (params inputs) (get params :v)) :sq (fn (params inputs) (* (first inputs) (first inputs))) :add (fn (params inputs) (+ (nth inputs 0) (nth inputs 1))) :inc (fn (params inputs) (+ 1 (first inputs)))})) +(define opt-RUN (artdag/fusing-runner opt-BASE)) +(define opt-inc? (fn (op) (= op "inc"))) +(define opt-incsq? (fn (op) (or (= op "inc") (= op "sq")))) + +; linear chain a(in) -> b -> c -> d, all inc +(define + opt-chain + (list + (list "a" "in" (list) {:v 5}) + (list "b" "inc" (list "a") {}) + (list "c" "inc" (list "b") {}) + (list "d" "inc" (list "c") {}))) + +; ---- DCE ---- + +(define + dce-entries + (list + (list "a" "in" (list) {:v 5}) + (list "b" "inc" (list "a") {}) + (list "c" "inc" (list "b") {}) + (list "x" "sq" (list "a") {}))) +(define dce-G (artdag/build dce-entries)) + +(artdag-test + "dce: removes dead node" + (artdag/node-count (artdag/dce dce-G (list (artdag/dag-id dce-G "c")))) + 3) + +(artdag-test + "dce: keeps live closure intact" + (artdag/node-count (artdag/dce dce-G (list (artdag/dag-id dce-G "x")))) + 2) + +(artdag-test + "dce: preserves surviving node ids" + (artdag/member? + (artdag/dag-id dce-G "c") + (keys + (artdag/dag-nodes (artdag/dce dce-G (list (artdag/dag-id dce-G "c")))))) + true) + +(artdag-test + "dce: output result unchanged after elimination" + (let + ((cache (persist/open))) + (artdag/result-of + (artdag/run + (artdag/dce dce-G (list (artdag/dag-id dce-G "c"))) + opt-RUN + cache) + (artdag/dag-id dce-G "c"))) + 7) + +(artdag-test + "dce: nothing dead is a no-op on count" + (artdag/node-count + (artdag/dce + dce-G + (list (artdag/dag-id dce-G "c") (artdag/dag-id dce-G "x")))) + 4) + +; ---- CSE (free from content addressing) ---- + +(define + cse-entries + (list + (list "a" "in" (list) {:v 3}) + (list "s1" "sq" (list "a") {}) + (list "s2" "sq" (list "a") {}) + (list "d" "add" (list "s1" "s2") {} true))) +(define cse-C (artdag/cse cse-entries)) + +(artdag-test + "cse: identical subexpressions collapse to one node" + (artdag/node-count cse-C) + 3) + +(artdag-test + "cse: shared node computes once" + (let + ((cache (persist/open))) + (artdag/recompute-count (artdag/run cse-C opt-RUN cache))) + 3) + +(artdag-test + "cse: s1 and s2 are the same id" + (equal? (artdag/dag-id cse-C "s1") (artdag/dag-id cse-C "s2")) + true) + +(artdag-test + "cse: result is correct" + (let + ((cache (persist/open))) + (artdag/result-of + (artdag/run cse-C opt-RUN cache) + (artdag/dag-id cse-C "d"))) + 18) + +; ---- fusion ---- + +(artdag-test + "fusion: collapses a unary chain" + (artdag/node-count (artdag/fuse opt-chain opt-inc?)) + 2) + +(artdag-test + "fusion: unfused has all nodes" + (artdag/node-count (artdag/build opt-chain)) + 4) + +(artdag-test + "fusion: output-equivalent to unfused" + (let + ((c1 (persist/open)) (c2 (persist/open))) + (= + (artdag/result-of + (artdag/run (artdag/build opt-chain) opt-RUN c1) + (artdag/dag-id (artdag/build opt-chain) "d")) + (artdag/result-of + (artdag/run (artdag/fuse opt-chain opt-inc?) opt-RUN c2) + (artdag/dag-id (artdag/fuse opt-chain opt-inc?) "d")))) + true) + +(artdag-test + "fusion: leaf is never fused" + (artdag/node-op + (artdag/dag-node-by-name (artdag/fuse opt-chain opt-inc?) "a")) + "in") + +(artdag-test + "fusion: tail becomes a pipeline node" + (artdag/node-op + (artdag/dag-node-by-name (artdag/fuse opt-chain opt-inc?) "d")) + "artdag/pipeline") + +(artdag-test + "fusion: mixed fusible set fuses across op kinds" + (artdag/node-count + (artdag/fuse + (list + (list "a" "in" (list) {:v 2}) + (list "b" "inc" (list "a") {}) + (list "c" "sq" (list "b") {}) + (list "d" "inc" (list "c") {})) + opt-incsq?)) + 2) + +(artdag-test + "fusion: mixed chain replays correctly" + (let + ((cache (persist/open))) + (let + ((f (artdag/fuse (list (list "a" "in" (list) {:v 2}) (list "b" "inc" (list "a") {}) (list "c" "sq" (list "b") {}) (list "d" "inc" (list "c") {})) opt-incsq?))) + (artdag/result-of (artdag/run f opt-RUN cache) (artdag/dag-id f "d")))) + 10) + +(artdag-test + "fusion: fanout node is not fused" + (artdag/node-count + (artdag/fuse + (list + (list "a" "in" (list) {:v 1}) + (list "b" "inc" (list "a") {}) + (list "c" "inc" (list "b") {}) + (list "e" "sq" (list "b") {})) + opt-inc?)) + 4) + +(artdag-test + "fusion: empty fusible set leaves dag unchanged" + (artdag/node-count (artdag/fuse opt-chain (fn (op) false))) + 4) diff --git a/plans/artdag-on-sx.md b/plans/artdag-on-sx.md index 7421af6a..0ebb38b4 100644 --- a/plans/artdag-on-sx.md +++ b/plans/artdag-on-sx.md @@ -30,7 +30,7 @@ edges. ## Status (rolling) -`bash lib/artdag/conformance.sh` → **69/69** (4 suites: dag, analyze, plan, execute) +`bash lib/artdag/conformance.sh` → **87/87** (5 suites: dag, analyze, plan, execute, optimize) ## Ground rules @@ -116,12 +116,12 @@ lib/artdag/optimize.sx lib/artdag/federation.sx ## Phase 5 — Effect-pipeline optimization -- [ ] `lib/artdag/optimize.sx` — rewrite the DAG before execution: dead-node +- [x] `lib/artdag/optimize.sx` — rewrite the DAG before execution: dead-node elimination (unreachable from outputs), common-subexpression sharing (free from content ids), adjacent-op fusion -- [ ] optimizations are content-id-preserving where semantically identical; assert +- [x] optimizations are content-id-preserving where semantically identical; assert the optimized DAG produces identical results -- [ ] `lib/artdag/tests/optimize.sx` — DCE, CSE dedup, fusion equivalence +- [x] `lib/artdag/tests/optimize.sx` — DCE, CSE dedup, fusion equivalence - [ ] (optional/later) rule-based optimization via `maude-on-sx`'s rewriting engine — flag the integration point, don't block on it @@ -136,6 +136,19 @@ lib/artdag/optimize.sx lib/artdag/federation.sx ## Progress log +- **Phase 5 — Effect-pipeline optimization** (optimize suite 18/18, total 87/87). + `lib/artdag/optimize.sx`: `artdag/dce dag outputs` keeps only the outputs plus + their transitive ancestors (via analyze), preserving surviving content-ids. + `artdag/cse` == build — structural sharing is inherent to content addressing, so + identical subexpressions collapse to one node/id and execute once (verified). + `artdag/fuse entries fusible?` rewrites entries: a maximal 1-to-1 chain of fusible + unary ops (predecessor used only by its single consumer, both fusible) collapses + into one `artdag/pipeline` node carrying ordered `{:op :params}` stages, fed by the + chain head's external input; leaves, fan-out nodes, and non-fusible ops never fuse. + `artdag/fusing-runner` wraps a base runner to replay pipeline stages — output + equivalent to the unfused DAG (asserted). Note: CSE auto-dedup means test fixtures + intended as distinct nodes must use distinct op/params. + - **Phase 4 — Execute (incremental + memoized)** (execute suite 15/15, total 69/69). `lib/artdag/execute.sx`: `artdag/execute` folds a plan, computing each node via an injected `runner (op params input-results)` (production = `perform` to JAX/IPFS