artdag: Phase 5 optimization — DCE + CSE + adjacent-op fusion + 18 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 57s

optimize.sx adds three result-preserving passes: dce (keep outputs + ancestors,
preserve ids), cse (==build; structural sharing is free from content addressing),
and fuse (collapse 1-to-1 fusible unary chains into an artdag/pipeline node fed by
the chain head's input; leaves/fan-out/non-fusible ops never fuse). fusing-runner
replays pipeline stages, output-equivalent to the unfused dag. optimize 18/18,
total 87/87.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-07 12:08:12 +00:00
parent a2f4fb5e89
commit 228861215d
6 changed files with 391 additions and 9 deletions

View File

@@ -13,7 +13,7 @@ if [ ! -x "$SX_SERVER" ]; then
exit 1
fi
SUITES=(dag analyze plan execute)
SUITES=(dag analyze plan execute optimize)
OUT_JSON="lib/artdag/scoreboard.json"
OUT_MD="lib/artdag/scoreboard.md"
@@ -45,6 +45,7 @@ run_suite() {
(load "lib/artdag/analyze.sx")
(load "lib/artdag/plan.sx")
(load "lib/artdag/execute.sx")
(load "lib/artdag/optimize.sx")
(epoch 2)
(eval "(define artdag-test-pass 0)")
(eval "(define artdag-test-fail 0)")

190
lib/artdag/optimize.sx Normal file
View File

@@ -0,0 +1,190 @@
; lib/artdag/optimize.sx — Phase 5: result-preserving DAG rewrites.
; DCE — drop nodes not reachable upstream from the requested outputs.
; CSE — free from content addressing: structurally identical subexpressions
; already collapse to one node at build time (artdag/cse == build).
; Fusion — collapse a maximal 1-to-1 chain of fusible unary ops into a single
; "artdag/pipeline" node that replays the stages; output-equivalent.
; Depends on dag.sx and analyze.sx.
; ---- dict helper ----
(define
artdag/-dict-filter
(fn
(d keep?)
(reduce
(fn (acc k) (if (keep? k (get d k)) (assoc acc k (get d k)) acc))
{}
(keys d))))
(define
artdag/-union
(fn
(a b)
(reduce (fn (acc x) (if (artdag/member? x acc) acc (cons x acc))) a b)))
; ---- dead-node elimination ----
; keep only the outputs and their transitive dependencies; ids are preserved.
(define
artdag/dce
(fn
(dag outputs)
(let
((db (artdag/analyze dag)))
(let
((live (reduce (fn (acc out) (artdag/-union (artdag/-union acc (list out)) (artdag/ancestors-of db out))) (list) outputs)))
{:names (artdag/-dict-filter (artdag/dag-names dag) (fn (k v) (artdag/member? v live))) :order (filter (fn (id) (artdag/member? id live)) (artdag/dag-order dag)) :ok true :nodes (artdag/-dict-filter (artdag/dag-nodes dag) (fn (k v) (artdag/member? k live)))}))))
; ---- common-subexpression elimination ----
; structural sharing is inherent to content addressing: build already maps
; structurally identical specs to a single node/id.
(define artdag/cse artdag/build)
; ---- adjacent-op fusion (entry-level rewrite) ----
(define artdag/pipeline-op "artdag/pipeline")
(define
artdag/-name->entry
(fn
(entries)
(reduce
(fn (m e) (assoc m (artdag/entry-name e) e))
{}
entries)))
; name -> list of dependent names
(define
artdag/-deps-map
(fn
(entries)
(reduce
(fn
(m e)
(reduce
(fn
(mm i)
(assoc
mm
i
(cons
(artdag/entry-name e)
(if (has-key? mm i) (get mm i) (list)))))
m
(artdag/entry-inputs e)))
{}
entries)))
(define artdag/-stage (fn (e) {:op (artdag/entry-op e) :params (artdag/entry-params e)}))
; the single predecessor that `name` may absorb, or nil. Requires: name is a
; fusible unary op; its one input is a locally-defined fusible node whose ONLY
; dependent is name (so fusing cannot break sharing).
(define
artdag/-absorbs
(fn
(n->e deps fusible? name)
(let
((e (get n->e name)))
(let
((ins (artdag/entry-inputs e)))
(if
(= (len ins) 1)
(let
((x (first ins)))
(if
(and
(has-key? n->e x)
(fusible? (artdag/entry-op e))
(fusible? (artdag/entry-op (get n->e x)))
(= (get deps x) (list name)))
x
nil))
nil)))))
(define
artdag/-absorbed-set
(fn
(n->e deps fusible? names)
(reduce
(fn
(acc y)
(let
((p (artdag/-absorbs n->e deps fusible? y)))
(if (nil? p) acc (cons p acc))))
(list)
names)))
; walk predecessors from a tail, building stages head->tail.
(define
artdag/-fuse-chain
(fn
(n->e deps fusible? cur stages)
(let
((p (artdag/-absorbs n->e deps fusible? cur)))
(if
(nil? p)
{:stages (cons (artdag/-stage (get n->e cur)) stages) :head cur}
(artdag/-fuse-chain
n->e
deps
fusible?
p
(cons (artdag/-stage (get n->e cur)) stages))))))
(define
artdag/fuse-entries
(fn
(entries fusible?)
(let
((n->e (artdag/-name->entry entries))
(deps (artdag/-deps-map entries))
(names (map artdag/entry-name entries)))
(let
((absorbed (artdag/-absorbed-set n->e deps fusible? names)))
(map
(fn
(name)
(let
((c (artdag/-fuse-chain n->e deps fusible? name (list))))
(if
(> (len (get c :stages)) 1)
(list
name
artdag/pipeline-op
(artdag/entry-inputs (get n->e (get c :head)))
{:stages (get c :stages)})
(get n->e name))))
(filter (fn (name) (not (artdag/member? name absorbed))) names))))))
(define
artdag/fuse
(fn
(entries fusible?)
(artdag/build (artdag/fuse-entries entries fusible?))))
; runner that replays a fused pipeline over its single input, delegating each
; stage to a base runner; non-pipeline ops fall through unchanged.
(define
artdag/pipeline-run
(fn
(base-runner)
(fn
(params inputs)
(reduce
(fn
(val stage)
(base-runner (get stage :op) (get stage :params) (list val)))
(first inputs)
(get params :stages)))))
(define
artdag/fusing-runner
(fn
(base-runner)
(fn
(op params inputs)
(if
(= op artdag/pipeline-op)
((artdag/pipeline-run base-runner) params inputs)
(base-runner op params inputs)))))

View File

@@ -3,9 +3,10 @@
"dag": {"pass": 20, "fail": 0},
"analyze": {"pass": 16, "fail": 0},
"plan": {"pass": 18, "fail": 0},
"execute": {"pass": 15, "fail": 0}
"execute": {"pass": 15, "fail": 0},
"optimize": {"pass": 18, "fail": 0}
},
"total_pass": 69,
"total_pass": 87,
"total_fail": 0,
"total": 69
"total": 87
}

View File

@@ -8,4 +8,5 @@ _Generated by `lib/artdag/conformance.sh`_
| analyze | 16 | 0 | 16 |
| plan | 18 | 0 | 18 |
| execute | 15 | 0 | 15 |
| **Total** | **69** | **0** | **69** |
| optimize | 18 | 0 | 18 |
| **Total** | **87** | **0** | **87** |

View File

@@ -0,0 +1,176 @@
; Phase 5 — optimization: DCE, CSE (content-id sharing), adjacent-op fusion.
(define opt-BASE (artdag/op-table-runner {:in (fn (params inputs) (get params :v)) :sq (fn (params inputs) (* (first inputs) (first inputs))) :add (fn (params inputs) (+ (nth inputs 0) (nth inputs 1))) :inc (fn (params inputs) (+ 1 (first inputs)))}))
(define opt-RUN (artdag/fusing-runner opt-BASE))
(define opt-inc? (fn (op) (= op "inc")))
(define opt-incsq? (fn (op) (or (= op "inc") (= op "sq"))))
; linear chain a(in) -> b -> c -> d, all inc
(define
opt-chain
(list
(list "a" "in" (list) {:v 5})
(list "b" "inc" (list "a") {})
(list "c" "inc" (list "b") {})
(list "d" "inc" (list "c") {})))
; ---- DCE ----
(define
dce-entries
(list
(list "a" "in" (list) {:v 5})
(list "b" "inc" (list "a") {})
(list "c" "inc" (list "b") {})
(list "x" "sq" (list "a") {})))
(define dce-G (artdag/build dce-entries))
(artdag-test
"dce: removes dead node"
(artdag/node-count (artdag/dce dce-G (list (artdag/dag-id dce-G "c"))))
3)
(artdag-test
"dce: keeps live closure intact"
(artdag/node-count (artdag/dce dce-G (list (artdag/dag-id dce-G "x"))))
2)
(artdag-test
"dce: preserves surviving node ids"
(artdag/member?
(artdag/dag-id dce-G "c")
(keys
(artdag/dag-nodes (artdag/dce dce-G (list (artdag/dag-id dce-G "c"))))))
true)
(artdag-test
"dce: output result unchanged after elimination"
(let
((cache (persist/open)))
(artdag/result-of
(artdag/run
(artdag/dce dce-G (list (artdag/dag-id dce-G "c")))
opt-RUN
cache)
(artdag/dag-id dce-G "c")))
7)
(artdag-test
"dce: nothing dead is a no-op on count"
(artdag/node-count
(artdag/dce
dce-G
(list (artdag/dag-id dce-G "c") (artdag/dag-id dce-G "x"))))
4)
; ---- CSE (free from content addressing) ----
(define
cse-entries
(list
(list "a" "in" (list) {:v 3})
(list "s1" "sq" (list "a") {})
(list "s2" "sq" (list "a") {})
(list "d" "add" (list "s1" "s2") {} true)))
(define cse-C (artdag/cse cse-entries))
(artdag-test
"cse: identical subexpressions collapse to one node"
(artdag/node-count cse-C)
3)
(artdag-test
"cse: shared node computes once"
(let
((cache (persist/open)))
(artdag/recompute-count (artdag/run cse-C opt-RUN cache)))
3)
(artdag-test
"cse: s1 and s2 are the same id"
(equal? (artdag/dag-id cse-C "s1") (artdag/dag-id cse-C "s2"))
true)
(artdag-test
"cse: result is correct"
(let
((cache (persist/open)))
(artdag/result-of
(artdag/run cse-C opt-RUN cache)
(artdag/dag-id cse-C "d")))
18)
; ---- fusion ----
(artdag-test
"fusion: collapses a unary chain"
(artdag/node-count (artdag/fuse opt-chain opt-inc?))
2)
(artdag-test
"fusion: unfused has all nodes"
(artdag/node-count (artdag/build opt-chain))
4)
(artdag-test
"fusion: output-equivalent to unfused"
(let
((c1 (persist/open)) (c2 (persist/open)))
(=
(artdag/result-of
(artdag/run (artdag/build opt-chain) opt-RUN c1)
(artdag/dag-id (artdag/build opt-chain) "d"))
(artdag/result-of
(artdag/run (artdag/fuse opt-chain opt-inc?) opt-RUN c2)
(artdag/dag-id (artdag/fuse opt-chain opt-inc?) "d"))))
true)
(artdag-test
"fusion: leaf is never fused"
(artdag/node-op
(artdag/dag-node-by-name (artdag/fuse opt-chain opt-inc?) "a"))
"in")
(artdag-test
"fusion: tail becomes a pipeline node"
(artdag/node-op
(artdag/dag-node-by-name (artdag/fuse opt-chain opt-inc?) "d"))
"artdag/pipeline")
(artdag-test
"fusion: mixed fusible set fuses across op kinds"
(artdag/node-count
(artdag/fuse
(list
(list "a" "in" (list) {:v 2})
(list "b" "inc" (list "a") {})
(list "c" "sq" (list "b") {})
(list "d" "inc" (list "c") {}))
opt-incsq?))
2)
(artdag-test
"fusion: mixed chain replays correctly"
(let
((cache (persist/open)))
(let
((f (artdag/fuse (list (list "a" "in" (list) {:v 2}) (list "b" "inc" (list "a") {}) (list "c" "sq" (list "b") {}) (list "d" "inc" (list "c") {})) opt-incsq?)))
(artdag/result-of (artdag/run f opt-RUN cache) (artdag/dag-id f "d"))))
10)
(artdag-test
"fusion: fanout node is not fused"
(artdag/node-count
(artdag/fuse
(list
(list "a" "in" (list) {:v 1})
(list "b" "inc" (list "a") {})
(list "c" "inc" (list "b") {})
(list "e" "sq" (list "b") {}))
opt-inc?))
4)
(artdag-test
"fusion: empty fusible set leaves dag unchanged"
(artdag/node-count (artdag/fuse opt-chain (fn (op) false)))
4)

View File

@@ -30,7 +30,7 @@ edges.
## Status (rolling)
`bash lib/artdag/conformance.sh`**69/69** (4 suites: dag, analyze, plan, execute)
`bash lib/artdag/conformance.sh`**87/87** (5 suites: dag, analyze, plan, execute, optimize)
## Ground rules
@@ -116,12 +116,12 @@ lib/artdag/optimize.sx lib/artdag/federation.sx
## Phase 5 — Effect-pipeline optimization
- [ ] `lib/artdag/optimize.sx` — rewrite the DAG before execution: dead-node
- [x] `lib/artdag/optimize.sx` — rewrite the DAG before execution: dead-node
elimination (unreachable from outputs), common-subexpression sharing (free from
content ids), adjacent-op fusion
- [ ] optimizations are content-id-preserving where semantically identical; assert
- [x] optimizations are content-id-preserving where semantically identical; assert
the optimized DAG produces identical results
- [ ] `lib/artdag/tests/optimize.sx` — DCE, CSE dedup, fusion equivalence
- [x] `lib/artdag/tests/optimize.sx` — DCE, CSE dedup, fusion equivalence
- [ ] (optional/later) rule-based optimization via `maude-on-sx`'s rewriting engine —
flag the integration point, don't block on it
@@ -136,6 +136,19 @@ lib/artdag/optimize.sx lib/artdag/federation.sx
## Progress log
- **Phase 5 — Effect-pipeline optimization** (optimize suite 18/18, total 87/87).
`lib/artdag/optimize.sx`: `artdag/dce dag outputs` keeps only the outputs plus
their transitive ancestors (via analyze), preserving surviving content-ids.
`artdag/cse` == build — structural sharing is inherent to content addressing, so
identical subexpressions collapse to one node/id and execute once (verified).
`artdag/fuse entries fusible?` rewrites entries: a maximal 1-to-1 chain of fusible
unary ops (predecessor used only by its single consumer, both fusible) collapses
into one `artdag/pipeline` node carrying ordered `{:op :params}` stages, fed by the
chain head's external input; leaves, fan-out nodes, and non-fusible ops never fuse.
`artdag/fusing-runner` wraps a base runner to replay pipeline stages — output
equivalent to the unfused DAG (asserted). Note: CSE auto-dedup means test fixtures
intended as distinct nodes must use distinct op/params.
- **Phase 4 — Execute (incremental + memoized)** (execute suite 15/15, total 69/69).
`lib/artdag/execute.sx`: `artdag/execute` folds a plan, computing each node via an
injected `runner (op params input-results)` (production = `perform` to JAX/IPFS