; lib/artdag/fault.sx — fault-tolerant execution. A node op may fail by returning ; (artdag/fail reason); the failure is confined to that node and its transitive ; dependents (which cannot run without it), while independent branches still ; compute. Failed results are NEVER cached, so a later run with the fault fixed ; recomputes only the failed closure. Depends on execute.sx and plan.sx. (define artdag/fail (fn (reason) {:artdag-fail true :reason reason})) (define artdag/failed? (fn (v) (and (dict? v) (has-key? v :artdag-fail)))) (define artdag/-exec-safe-node (fn (dag runner cache acc id) (let ((node (artdag/dag-get dag id))) (let ((ins (artdag/node-inputs node))) (if (some (fn (in) (artdag/member? in (get acc :failed))) ins) (assoc acc :failed (concat (get acc :failed) (list id))) (if (persist/kv-has? cache id) (assoc acc :results (assoc (get acc :results) id (persist/kv-get cache id)) :hits (concat (get acc :hits) (list id))) (let ((inputs (map (fn (in) (artdag/-input-result (get acc :results) cache in)) ins))) (let ((result (runner (artdag/node-op node) (artdag/node-params node) inputs))) (if (artdag/failed? result) (assoc acc :failed (concat (get acc :failed) (list id))) (begin (persist/kv-put cache id result) (assoc acc :results (assoc (get acc :results) id result) :recomputed (concat (get acc :recomputed) (list id))))))))))))) (define artdag/run-safe (fn (dag runner cache) (reduce (fn (acc id) (artdag/-exec-safe-node dag runner cache acc id)) {:recomputed (list) :results {} :hits (list) :failed (list)} (artdag/plan-flatten (artdag/plan dag 0))))) (define artdag/failed-nodes (fn (exec) (artdag/sort-strings (get exec :failed)))) (define artdag/failure-count (fn (exec) (len (get exec :failed)))) (define artdag/all-ok? (fn (exec) (= (len (get exec :failed)) 0)))