Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m4s
fault.sx run-safe: a node op may return (artdag/fail reason); failure is confined to that node + downstream dependents while independent branches compute, and failed results are never cached, so retry after a fix recomputes only the failed closure and hits the good nodes. fault 14/14, total 158/158. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
57 lines
2.2 KiB
Plaintext
57 lines
2.2 KiB
Plaintext
; lib/artdag/fault.sx — fault-tolerant execution. A node op may fail by returning
|
|
; (artdag/fail reason); the failure is confined to that node and its transitive
|
|
; dependents (which cannot run without it), while independent branches still
|
|
; compute. Failed results are NEVER cached, so a later run with the fault fixed
|
|
; recomputes only the failed closure. Depends on execute.sx and plan.sx.
|
|
|
|
(define artdag/fail (fn (reason) {:artdag-fail true :reason reason}))
|
|
(define artdag/failed? (fn (v) (and (dict? v) (has-key? v :artdag-fail))))
|
|
|
|
(define
|
|
artdag/-exec-safe-node
|
|
(fn
|
|
(dag runner cache acc id)
|
|
(let
|
|
((node (artdag/dag-get dag id)))
|
|
(let
|
|
((ins (artdag/node-inputs node)))
|
|
(if
|
|
(some (fn (in) (artdag/member? in (get acc :failed))) ins)
|
|
(assoc acc :failed (concat (get acc :failed) (list id)))
|
|
(if
|
|
(persist/kv-has? cache id)
|
|
(assoc
|
|
acc
|
|
:results (assoc (get acc :results) id (persist/kv-get cache id))
|
|
:hits (concat (get acc :hits) (list id)))
|
|
(let
|
|
((inputs (map (fn (in) (artdag/-input-result (get acc :results) cache in)) ins)))
|
|
(let
|
|
((result (runner (artdag/node-op node) (artdag/node-params node) inputs)))
|
|
(if
|
|
(artdag/failed? result)
|
|
(assoc acc :failed (concat (get acc :failed) (list id)))
|
|
(begin
|
|
(persist/kv-put cache id result)
|
|
(assoc
|
|
acc
|
|
:results (assoc (get acc :results) id result)
|
|
:recomputed (concat (get acc :recomputed) (list id)))))))))))))
|
|
|
|
(define
|
|
artdag/run-safe
|
|
(fn
|
|
(dag runner cache)
|
|
(reduce
|
|
(fn (acc id) (artdag/-exec-safe-node dag runner cache acc id))
|
|
{:recomputed (list) :results {} :hits (list) :failed (list)}
|
|
(artdag/plan-flatten (artdag/plan dag 0)))))
|
|
|
|
(define
|
|
artdag/failed-nodes
|
|
(fn (exec) (artdag/sort-strings (get exec :failed))))
|
|
(define artdag/failure-count (fn (exec) (len (get exec :failed))))
|
|
(define
|
|
artdag/all-ok?
|
|
(fn (exec) (= (len (get exec :failed)) 0)))
|