#!/usr/bin/env bash # next/tests/delivery_retry_timer.sh — m2 Step 8b-timer. # # Live timer wiring on the delivery_worker gen_server. The pure # bookkeeping is covered by delivery_retry.sh — this suite proves the # erlang:send_after / cancel_timer wiring fires retries from the # scheduler's logical clock without anyone calling drain by hand. # # Substrate dependency: erlang:send_after/3 + cancel_timer/1 + # monotonic_time/0,1 — landed via cherry-pick from loops/erlang # (commits 3709460d / 98b0104c / 779e53b2 on this branch). # # Test discipline: every test cancels its leftover timer before # returning. If we don't, the scheduler keeps the run loop alive # advancing time through the full backoff chain (30s → 5m → 30m → # 6h → 24h), and each tick costs ~10s of wall time inside the # Erlang-on-SX VM. Canceling the trailing timer is the difference # between a 25s test and a 60s+ test. set -uo pipefail cd "$(git rev-parse --show-toplevel)" SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" if [ ! -x "$SX_SERVER" ]; then SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" fi if [ ! -x "$SX_SERVER" ]; then echo "ERROR: sx_server.exe not found." >&2 exit 1 fi VERBOSE="${1:-}" PASS=0; FAIL=0; ERRORS="" TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT # A canned activity with cid <<1,2,3>>. SETUP='Act = [{id, <<1,2,3>>}, {type, note}, {actor, alice}], FailFn = fun(_) -> {error, transient} end,' # Convenience: cancel any leftover timer for cid <<1,2,3>> on Peer. # Prevents the scheduler from grinding through 30s/5m/30m/6h/24h of # retries between epochs. CANCEL='CancelLeftover = fun(Peer) -> SS = delivery_worker:state_srv(Peer), case delivery_worker:timer_ref_for(<<1,2,3>>, SS) of undefined -> ok; LRef -> erlang:cancel_timer(LRef), ok end end,' cat > "$TMPFILE" <>]} = delivery_worker:flush(bob), S = delivery_worker:state_srv(bob), Ref = delivery_worker:timer_ref_for(<<1,2,3>>, S), Result = is_reference(Ref), CancelLeftover(bob), Result\") :name)") ;; T2 — initial flush bumps the attempt counter to 1; next_retry_at ;; gets set; cancel the timer before returning. (epoch 11) (eval "(get (erlang-eval-ast \"${SETUP}${CANCEL} delivery_worker:start_link(bob, FailFn), delivery_worker:enqueue(bob, Act), delivery_worker:flush(bob), S = delivery_worker:state_srv(bob), Result = delivery_worker:attempts_for(<<1,2,3>>, S) =:= 1, CancelLeftover(bob), Result\") :name)") ;; T3 — advancing the logical clock past the 30s backoff fires the ;; timer; handle_info({retry, Cid}) bumps attempts to 2 and arms ;; the next slot (backoff(2)=300s). Then cancel the new timer. (epoch 12) (eval "(get (erlang-eval-ast \"${SETUP}${CANCEL} delivery_worker:start_link(bob, FailFn), delivery_worker:enqueue(bob, Act), delivery_worker:flush(bob), receive after 31000 -> ok end, S = delivery_worker:state_srv(bob), Result = delivery_worker:attempts_for(<<1,2,3>>, S) =:= 2, CancelLeftover(bob), Result\") :name)") ;; T4 — after the retry fires the worker has armed a fresh timer ;; for the next backoff slot. Confirm it's a live ref, then ;; cancel it. (epoch 13) (eval "(get (erlang-eval-ast \"${SETUP}${CANCEL} delivery_worker:start_link(bob, FailFn), delivery_worker:enqueue(bob, Act), delivery_worker:flush(bob), receive after 31000 -> ok end, S = delivery_worker:state_srv(bob), Result = is_reference(delivery_worker:timer_ref_for(<<1,2,3>>, S)), CancelLeftover(bob), Result\") :name)") ;; T5 — successful retry path. Dispatch fails twice then succeeds ;; (ets-backed counter). After two backoff slots elapse ;; (30s, then 300s), the third attempt succeeds and ;; record_success_pure clears the per-cid bookkeeping. No new ;; timer is scheduled, so the scheduler terminates naturally. (epoch 14) (eval "(get (erlang-eval-ast \"${SETUP} ets:new(rt_ctr, [named_table, public]), ets:insert(rt_ctr, {n, 0}), Mixed = fun(_) -> [{n, N}] = ets:lookup(rt_ctr, n), ets:insert(rt_ctr, {n, N+1}), case N < 2 of true -> {error, transient}; false -> ok end end, delivery_worker:start_link(carol, Mixed), delivery_worker:enqueue(carol, Act), delivery_worker:flush(carol), receive after 31000 -> ok end, receive after 301000 -> ok end, S = delivery_worker:state_srv(carol), delivery_worker:pending(S) =:= [] andalso delivery_worker:attempts_for(<<1,2,3>>, S) =:= 0 andalso delivery_worker:timer_ref_for(<<1,2,3>>, S) =:= undefined\") :name)") EPOCHS OUTPUT=$(timeout 900 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) check() { local epoch="$1" desc="$2" expected="$3" local actual actual=$(echo "$OUTPUT" | awk -v e="$epoch" ' $0 ~ "^\\(ok-len " e " " { getline; print; exit } $0 ~ "^\\(ok " e " " { print; exit } $0 ~ "^\\(error " e " " { print; exit } ') [ -z "$actual" ] && actual="" if echo "$actual" | grep -qF -- "$expected"; then PASS=$((PASS+1)) [ "$VERBOSE" = "-v" ] && echo " ok $desc" else FAIL=$((FAIL+1)) ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual " fi } check 10 "T1 flush schedules a timer" "true" check 11 "T2 initial flush bumps attempts to 1" "true" check 12 "T3 timer fires; attempts=2" "true" check 13 "T4 retry rearms next timer" "true" check 14 "T5 success clears retry state" "true" TOTAL=$((PASS+FAIL)) if [ $FAIL -eq 0 ]; then echo "ok $PASS/$TOTAL next/tests/delivery_retry_timer.sh passed" else echo "FAIL $PASS/$TOTAL passed, $FAIL failed:" echo "$ERRORS" if [ "$VERBOSE" = "-v" ]; then echo "--- sx_server output ---" echo "$OUTPUT" | tail -40 echo "---" fi fi [ $FAIL -eq 0 ]