agentic-sx Phase 4: durable — agent sessions as durable flow workflows (TDD)

Deterministic replay IS the durability mechanism: every transition re-runs a
self-contained flow program (defflow source + flow/start + replay of all
recorded resume values), so the only durable state is {:flow :input :resumes}
in persist kv — restart-safe by construction (fresh space handles over the
same backend resume mid-flight runs). fork-an-agent-run = copy the record;
the two replays diverge independently. Effects are data (suspend tags +
typed request envelopes surface as plain SX); transitions ride the Phase-3
trace buffer so session history travels with the next commit. Guest numeric
results compared with = per house convention. 43/43 (196/196 total).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-03 13:13:50 +00:00
parent b92095ccaf
commit c66ee35010
5 changed files with 523 additions and 5 deletions

View File

@@ -0,0 +1,281 @@
; Phase 4 — durable: agent sessions as durable flow workflows. Fixture story:
; worker-1 runs the two-suspend "triage" flow to completion; worker-1b proves
; restart-safety (fresh space handles over the same backend, resume across
; the restart); worker-2's mid-flight run is FORKED to worker-2b and the two
; replays diverge; worker-3 exercises typed (request kind payload) effects
; and the trace-buffer composition with Phase 3.
; NOTE: numbers computed inside the guest are compared with = (numeric
; equality), not equal? — guest numerics box differently at the boundary.
(define agd-db (persist/mem-backend))
(define agd-sp (agentic/space agd-db "agentic-durable-test"))
(define
agd-a
(agentic/spawn!
agd-sp
"worker-1"
(agentic/briefing "long task" "run a durable session" {})))
(define
agd-b
(agentic/spawn-from!
agd-sp
"worker-1b"
(agentic/briefing "second worker" "restart survivor" {})
"worker-1"))
(agentic/defsession!
agd-sp
"triage"
"(defflow triage (sequence (lambda (x) (+ x (suspend (quote ask-priority)))) (lambda (y) (* y (suspend (quote ask-factor))))))")
; ---- literals across the guest boundary ----
(agentic-test "scm-lit numbers" (agentic/scm-lit 42) "42")
(agentic-test "scm-lit strings" (agentic/scm-lit "hi") "\"hi\"")
(agentic-test
"scm-lit lists nest"
(agentic/scm-lit (list 1 "a"))
"(list 1 \"a\")")
(agentic-test
"scm-out unboxes scheme strings"
(agentic/scm-out {:scm-string "x"})
"x")
; ---- lifecycle: start / suspend / resume / done ----
(agentic-test
"session flow source is durable"
(starts-with? (agentic/session-def agd-sp "triage") "(defflow")
true)
(agentic-test
"no session before start"
(agentic/session-status agd-sp "worker-1")
"none")
(define agd-s1 (agentic/session-start! agd-sp "worker-1" "triage" 10))
(agentic-test
"start suspends at the first effect"
(get agd-s1 :status)
"suspended")
(agentic-test "the suspend tag is data" (get agd-s1 :tag) "ask-priority")
(agentic-test
"session-status tracks the suspension"
(agentic/session-status agd-sp "worker-1")
"suspended")
(agentic-test
"session-pending exposes the tag"
(agentic/session-pending agd-sp "worker-1")
"ask-priority")
(agentic-test
"start on unknown agent fails"
(get (agentic/session-start! agd-sp "ghost" "triage" 1) :error)
"no-such-agent")
(agentic-test
"start with unknown flow fails"
(get
(agentic/session-start! agd-sp "worker-1b" "frobnicate" 1)
:error)
"no-such-session-flow")
(agentic-test
"a failed start leaves no session"
(agentic/session-status agd-sp "worker-1b")
"none")
(define agd-s2 (agentic/session-resume! agd-sp "worker-1" 5))
(agentic-test
"resume replays to the next effect"
(get agd-s2 :tag)
"ask-factor")
(agentic-test
"resume on session-less agent fails"
(get (agentic/session-resume! agd-sp "worker-1b" 1) :error)
"no-session")
(define agd-s3 (agentic/session-resume! agd-sp "worker-1" 3))
(agentic-test
"final resume completes the session"
(get agd-s3 :status)
"done")
(agentic-test
"deterministic replay computes the result"
(= (get agd-s3 :result) 45)
true)
(agentic-test
"session-status done"
(agentic/session-status agd-sp "worker-1")
"done")
(agentic-test
"session-result reads back"
(= (agentic/session-result agd-sp "worker-1") 45)
true)
(agentic-test
"resume after done fails"
(get (agentic/session-resume! agd-sp "worker-1" 9) :error)
"not-suspended")
(agentic-test
"the record keeps the full replay history"
(=
(get (agentic/session-record agd-sp "worker-1") :resumes)
(list 5 3))
true)
; ---- restart: a fresh space handle over the same backend ----
(define agd-sp2 (agentic/space agd-db "agentic-durable-test"))
(agentic-test
"restart sees the finished session"
(agentic/session-status agd-sp2 "worker-1")
"done")
(agentic-test
"restart sees the result"
(= (agentic/session-result agd-sp2 "worker-1") 45)
true)
(define
agd-s4
(agentic/session-start! agd-sp "worker-1b" "triage" 100))
(define agd-sp3 (agentic/space agd-db "agentic-durable-test"))
(agentic-test
"restart mid-flight stays suspended"
(agentic/session-status agd-sp3 "worker-1b")
"suspended")
(agentic-test
"resume across the restart replays deterministically"
(get (agentic/session-resume! agd-sp3 "worker-1b" 2) :tag)
"ask-factor")
(agentic-test
"the resumed run completes across the restart"
(=
(get (agentic/session-resume! agd-sp3 "worker-1b" 7) :result)
714)
true)
; ---- fork-an-agent-run: copy the record, replays diverge ----
(define
agd-w2
(agentic/spawn!
agd-sp
"worker-2"
(agentic/briefing "explore" "mainline run" {})))
(define
agd-w2b
(agentic/spawn-from!
agd-sp
"worker-2b"
(agentic/briefing "explore alt" "forked run" {})
"worker-2"))
(define agd-f0 (agentic/session-start! agd-sp "worker-2" "triage" 10))
(define agd-f1 (agentic/session-resume! agd-sp "worker-2" 5))
(define agd-fork (agentic/session-fork! agd-sp "worker-2" "worker-2b"))
(agentic-test
"fork replays to the same suspended state"
(get agd-fork :tag)
"ask-factor")
(agentic-test
"forked session is live"
(agentic/session-status agd-sp "worker-2b")
"suspended")
(agentic-test
"forked history is copied"
(=
(get (agentic/session-record agd-sp "worker-2b") :resumes)
(list 5))
true)
(agentic-test
"mainline resumes its own way"
(=
(get (agentic/session-resume! agd-sp "worker-2" 3) :result)
45)
true)
(agentic-test
"fork diverges independently"
(=
(get (agentic/session-resume! agd-sp "worker-2b" 100) :result)
1500)
true)
(agentic-test
"the fork's divergence never touches the mainline"
(= (agentic/session-result agd-sp "worker-2") 45)
true)
(agentic-test
"fork needs an existing session"
(get (agentic/session-fork! agd-sp "worker-1x" "worker-2b") :error)
"no-session")
(agentic-test
"fork target must be spawned"
(get (agentic/session-fork! agd-sp "worker-2" "ghost") :error)
"no-such-agent")
(agentic-test
"fork refuses to clobber a session"
(get (agentic/session-fork! agd-sp "worker-2" "worker-2b") :error)
"session-exists")
; ---- typed effects: (request kind payload) envelopes as data ----
(agentic/defsession!
agd-sp
"review-loop"
"(defflow review-loop (sequence (lambda (x) (await-human (list (quote approve?) x))) (branch (lambda (d) (eq? d 1)) (flow-const (quote shipped)) (flow-const (quote parked)))))")
(define
agd-w3
(agentic/spawn!
agd-sp
"worker-3"
(agentic/briefing "ship it" "review then ship" {})))
(define
agd-r1
(agentic/session-start! agd-sp "worker-3" "review-loop" 7))
(agentic-test
"request effects are typed envelopes"
(agentic/effect-request? (get agd-r1 :tag))
true)
(agentic-test "effect kind" (agentic/effect-kind (get agd-r1 :tag)) "human")
(agentic-test
"effect payload"
(=
(agentic/effect-payload (get agd-r1 :tag))
(list "approve?" 7))
true)
(agentic-test
"plain tags are not request envelopes"
(agentic/effect-request? "ask-priority")
false)
(agentic-test
"the human decision resumes the session"
(get (agentic/session-resume! agd-sp "worker-3" 1) :result)
"shipped")
; ---- composition with Phase 3: transitions ride the trace buffer ----
(agentic-test
"session transitions land in the trace buffer"
(len
(filter
(fn (e) (= (get e :kind) "session"))
(agentic/trace-pending agd-sp "worker-3")))
2)
(define
agd-c
(agentic/commit-with-trace!
agd-sp
"worker-3"
"decision"
(assoc {} "ship.md" "approved\n")
{:message "shipped"}))
(agentic-test
"the session history travels with the commit"
(len (agentic/trace-entries (agentic/trace-for agd-sp (get agd-c :cid))))
2)
(agentic-test
"the bound trace records the session start"
(get
(nth
(agentic/trace-entries (agentic/trace-for agd-sp (get agd-c :cid)))
0)
:text)
"start review-loop")