artdag: fault-tolerant execution — confined failure, cache never poisoned + 14 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m4s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m4s
fault.sx run-safe: a node op may return (artdag/fail reason); failure is confined to that node + downstream dependents while independent branches compute, and failed results are never cached, so retry after a fix recomputes only the failed closure and hits the good nodes. fault 14/14, total 158/158. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
144
lib/artdag/tests/fault.sx
Normal file
144
lib/artdag/tests/fault.sx
Normal file
@@ -0,0 +1,144 @@
|
||||
; fault-tolerant execution: failure confined to its closure, cache never poisoned.
|
||||
|
||||
(define ft-BAD (artdag/op-table-runner {:boom (fn (p i) (artdag/fail "kaboom")) :in (fn (p i) (get p :v)) :add (fn (p i) (+ (nth i 0) (nth i 1))) :inc (fn (p i) (+ 1 (first i)))}))
|
||||
|
||||
(define ft-GOOD (artdag/op-table-runner {:boom (fn (p i) 99) :in (fn (p i) (get p :v)) :add (fn (p i) (+ (nth i 0) (nth i 1))) :inc (fn (p i) (+ 1 (first i)))}))
|
||||
|
||||
; p,q leaves; b=inc(p) (independent); c=boom(q); d=add(b,c)
|
||||
(define
|
||||
ft-D
|
||||
(artdag/build
|
||||
(list
|
||||
(list "p" "in" (list) {:v 10})
|
||||
(list "q" "in" (list) {:v 20})
|
||||
(list "b" "inc" (list "p") {})
|
||||
(list "c" "boom" (list "q") {})
|
||||
(list "d" "add" (list "b" "c") {} true))))
|
||||
|
||||
; ---- markers ----
|
||||
|
||||
(artdag-test
|
||||
"fail constructor is detected"
|
||||
(artdag/failed? (artdag/fail "x"))
|
||||
true)
|
||||
|
||||
(artdag-test
|
||||
"plain values are not failures"
|
||||
(artdag/failed? 42)
|
||||
false)
|
||||
|
||||
; ---- failure confinement ----
|
||||
|
||||
(artdag-test
|
||||
"failure count covers node and its dependents"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(artdag/failure-count (artdag/run-safe ft-D ft-BAD cache)))
|
||||
2)
|
||||
|
||||
(artdag-test
|
||||
"failed set is exactly c and d"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(artdag/failed-nodes (artdag/run-safe ft-D ft-BAD cache)))
|
||||
(artdag/sort-strings
|
||||
(list (artdag/dag-id ft-D "c") (artdag/dag-id ft-D "d"))))
|
||||
|
||||
(artdag-test
|
||||
"independent branch still computes"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(artdag/recompute-count (artdag/run-safe ft-D ft-BAD cache)))
|
||||
3)
|
||||
|
||||
(artdag-test
|
||||
"independent node result is available"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(artdag/result-of
|
||||
(artdag/run-safe ft-D ft-BAD cache)
|
||||
(artdag/dag-id ft-D "b")))
|
||||
11)
|
||||
|
||||
(artdag-test
|
||||
"all-ok? is false when something failed"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(artdag/all-ok? (artdag/run-safe ft-D ft-BAD cache)))
|
||||
false)
|
||||
|
||||
(artdag-test
|
||||
"all-ok? is true on a clean run"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(artdag/all-ok? (artdag/run-safe ft-D ft-GOOD cache)))
|
||||
true)
|
||||
|
||||
; ---- cache integrity ----
|
||||
|
||||
(artdag-test
|
||||
"good node is cached"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(begin
|
||||
(artdag/run-safe ft-D ft-BAD cache)
|
||||
(persist/kv-has? cache (artdag/dag-id ft-D "b"))))
|
||||
true)
|
||||
|
||||
(artdag-test
|
||||
"failed node is never cached"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(begin
|
||||
(artdag/run-safe ft-D ft-BAD cache)
|
||||
(persist/kv-has? cache (artdag/dag-id ft-D "c"))))
|
||||
false)
|
||||
|
||||
; ---- retry after fix ----
|
||||
|
||||
(artdag-test
|
||||
"retry recomputes only the failed closure"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(begin
|
||||
(artdag/run-safe ft-D ft-BAD cache)
|
||||
(artdag/recompute-count (artdag/run-safe ft-D ft-GOOD cache))))
|
||||
2)
|
||||
|
||||
(artdag-test
|
||||
"retry reuses the good nodes from cache"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(begin
|
||||
(artdag/run-safe ft-D ft-BAD cache)
|
||||
(artdag/hit-count (artdag/run-safe ft-D ft-GOOD cache))))
|
||||
3)
|
||||
|
||||
(artdag-test
|
||||
"retry produces the correct result"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(begin
|
||||
(artdag/run-safe ft-D ft-BAD cache)
|
||||
(artdag/result-of
|
||||
(artdag/run-safe ft-D ft-GOOD cache)
|
||||
(artdag/dag-id ft-D "d"))))
|
||||
110)
|
||||
|
||||
; ---- transitive cascade ----
|
||||
|
||||
(artdag-test
|
||||
"failure cascades through a deep chain"
|
||||
(let
|
||||
((cache (persist/open)))
|
||||
(artdag/failure-count
|
||||
(artdag/run-safe
|
||||
(artdag/build
|
||||
(list
|
||||
(list "a" "in" (list) {:v 1})
|
||||
(list "b" "boom" (list "a") {})
|
||||
(list "c" "inc" (list "b") {})
|
||||
(list "d" "inc" (list "c") {})))
|
||||
ft-BAD
|
||||
cache)))
|
||||
3)
|
||||
Reference in New Issue
Block a user