From 9621599606f16bc630d271f9afc6eeb4e6d072af Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 7 Jun 2026 05:39:46 +0000 Subject: [PATCH] =?UTF-8?q?fed-sx-m2:=20Step=209a=20=E2=80=94=20pure-funct?= =?UTF-8?q?ional=20backfill=20slicing=20+=2020=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New next/kernel/backfill.erl owns the §13.3 backfill mode slicing. Given an outbox log + a mode, returns the activity list to send to a new follower as backfill. Public API: slice/2(Mode, LogState) default Wrap=false slice/3(Mode, LogState, Wrap) Wrap=true wraps entries wrap_backfill/1 add {backfilled, true} parse_mode/1 lift Follow :backfill field Modes: none new follower: forward-only content full entire outbox {last_n, N} last N activities (FIFO) {last_t, T, NowFn} entries with :published in (NowFn()-T .. NowFn()] {since_cid, Cid} entries after the one with :id = Cid (consumes the matched entry; returns every entry after it) wrap_backfill/1 marks each entry {backfilled, true}. Per §13.3 wrapped bodies preserve :id so the receiver's replay defence still catches duplicates from the live stream. parse_mode/1 accepts: nil / none / full / {last_n, _} / {last_t, _, _} / {since_cid, _} — pass through or normalize Proplist with :mode + :limit -> {last_n, N} Proplist with :mode + :duration -> {last_t, T, fun() -> 0 end} Proplist with :mode = full -> full Anything else -> none (open-world default) Substrate gotchas re-confirmed and worked around: - lists:nthtail/2 not registered — rolled drop_n/2 - Pattern-alias 'Pat = Var' not supported by this port's parser — parse_mode/1 clauses use explicit deconstruction 20/20 in next/tests/backfill.sh covering all five modes plus edge cases (N=0, N>length, T=0 -> empty window, since_cid hit/miss/unknown), wrap_backfill semantics, parse_mode for atoms / tuple shapes / proplists / unknown / nil. Step 9b (outbox listing ?since=Cid&limit=N pagination) and Step 9c (Follow-Accept-backfill wiring) layer on top. Conformance preserved at 761/761. --- next/kernel/backfill.erl | 136 +++++++++++++++++++++++++++++ next/tests/backfill.sh | 170 ++++++++++++++++++++++++++++++++++++ plans/fed-sx-milestone-2.md | 51 +++++++++-- 3 files changed, 352 insertions(+), 5 deletions(-) create mode 100644 next/kernel/backfill.erl create mode 100755 next/tests/backfill.sh diff --git a/next/kernel/backfill.erl b/next/kernel/backfill.erl new file mode 100644 index 00000000..a4760535 --- /dev/null +++ b/next/kernel/backfill.erl @@ -0,0 +1,136 @@ +-module(backfill). +-export([slice/2, slice/3, + wrap_backfill/1, parse_mode/1, + all_entries/1, last_n_entries/2, last_t_entries/3, + since_cid_entries/2, none_entries/0]). + +%% Backfill mode slicing per design §13.3 / Step 9. When A follows B +%% with a backfill spec, B's kernel slices the outbox log into the +%% appropriate window and delivers each entry as +%% `{backfilled, true}`-marked envelopes alongside forward-going +%% activity. +%% +%% Mode shapes (per the Follow activity's `:backfill` field): +%% none — newer follower sees only forward content +%% {last_n, N} — backfill last N activities (FIFO order) +%% {last_t, T, NowFn} — backfill activities with :published in +%% (Now - T .. Now]. NowFn is a 0-arity fun +%% so tests can fake-time it. +%% full — backfill the entire outbox +%% +%% slice/2 returns the activity list. slice/3 also wraps each entry +%% with `{backfilled, true}` so projections can decide whether to +%% re-fold or skip (the §13.3 Backfilled bodies preserve the +%% original `:id` so replay defence still works on the receiver). +%% +%% parse_mode/1 lifts the Follow activity's `:backfill` proplist +%% (or atom) into the internal mode tuple. Unknown shapes fall back +%% to `none` — the default open-world policy. + +slice(Mode, LogState) -> + slice(Mode, LogState, false). + +slice(Mode, LogState, Wrap) -> + Entries = log:entries(LogState), + Slice = case Mode of + none -> none_entries(); + full -> all_entries(Entries); + {last_n, N} -> last_n_entries(N, Entries); + {last_t, T, NowFn} -> last_t_entries(T, NowFn, Entries); + {since_cid, Cid} -> since_cid_entries(Cid, Entries); + _ -> none_entries() + end, + case Wrap of + true -> wrap_backfill(Slice); + _ -> Slice + end. + +%% ── Mode-specific entry selection ───────────────────────────── + +all_entries(Entries) -> Entries. + +none_entries() -> []. + +%% last_n_entries/2 — tail N entries in FIFO order. + +last_n_entries(N, _) when N =< 0 -> []; +last_n_entries(N, Entries) -> + Len = length(Entries), + case Len =< N of + true -> Entries; + false -> drop_n(Len - N, Entries) + end. + +drop_n(0, L) -> L; +drop_n(_, []) -> []; +drop_n(N, [_ | Rest]) -> drop_n(N - 1, Rest). + +%% last_t_entries/3 — entries whose :published is within the last +%% T units of (NowFn() - T .. NowFn()]. T and :published are +%% integers (seconds-since-epoch in production; opaque ints in tests). + +last_t_entries(T, NowFn, Entries) when is_integer(T), T >= 0 -> + Now = NowFn(), + Cutoff = Now - T, + [E || E <- Entries, in_window(E, Cutoff, Now)]; +last_t_entries(_, _, _) -> []. + +in_window(Activity, Cutoff, Now) -> + case envelope:get_field(published, Activity) of + {ok, P} when is_integer(P), P > Cutoff, P =< Now -> true; + _ -> false + end. + +%% since_cid_entries/2 — every entry after the one with :id = Cid. +%% If Cid isn't in the log, returns [] (caller's pointer is stale). +%% Used by `GET /actors//outbox?since=Cid` pagination. + +since_cid_entries(_Cid, []) -> []; +since_cid_entries(Cid, [E | Rest]) -> + case envelope:get_field(id, E) of + {ok, Cid} -> Rest; + _ -> since_cid_entries(Cid, Rest) + end. + +%% wrap_backfill/1 — append `{backfilled, true}` to each entry. +%% The receiving projection scheduler reads this field and chooses +%% whether to fold (re-emit) or skip (already known via replay +%% defence on `:id`). + +wrap_backfill([]) -> []; +wrap_backfill([E | Rest]) -> + [E ++ [{backfilled, true}] | wrap_backfill(Rest)]. + +%% parse_mode/1 — Lift a Follow activity's `:backfill` value into the +%% internal mode tuple. Accepts: +%% nil / not_found -> none +%% none -> none +%% full -> full +%% {last_n, N} -> {last_n, N} (already-parsed shape) +%% {last_t, T, NowFn} -> pass-through +%% Proplist with :mode + :limit / :duration -> parsed +%% Unknown shape -> none (open-world default). + +parse_mode(nil) -> none; +parse_mode(none) -> none; +parse_mode(full) -> full; +parse_mode({last_n, N}) -> {last_n, N}; +parse_mode({last_t, T, NowFn}) -> {last_t, T, NowFn}; +parse_mode({since_cid, Cid}) -> {since_cid, Cid}; +parse_mode(List) when is_list(List) -> + case envelope:get_field(mode, List) of + {ok, last_n} -> + case envelope:get_field(limit, List) of + {ok, N} when is_integer(N) -> {last_n, N}; + _ -> none + end; + {ok, last_t} -> + case envelope:get_field(duration, List) of + {ok, T} when is_integer(T) -> {last_t, T, fun () -> 0 end}; + _ -> none + end; + {ok, full} -> full; + {ok, none} -> none; + _ -> none + end; +parse_mode(_) -> none. diff --git a/next/tests/backfill.sh b/next/tests/backfill.sh new file mode 100755 index 00000000..c9681d18 --- /dev/null +++ b/next/tests/backfill.sh @@ -0,0 +1,170 @@ +#!/usr/bin/env bash +# next/tests/backfill.sh — m2 Step 9a test. +# +# Backfill mode slicing per design §13.3. Given an outbox log + +# a mode (none / last_n / last_t / full / since_cid), backfill:slice +# returns the activity list to send to a new follower as backfill. + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found." >&2 + exit 1 +fi + +VERBOSE="${1:-}" +PASS=0; FAIL=0; ERRORS="" +TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT + +# Five activities published at :published = 1, 2, 3, 4, 5 +SETUP='Act1 = [{id, <<1>>}, {type, note}, {actor, alice}, {published, 1}], Act2 = [{id, <<2>>}, {type, note}, {actor, alice}, {published, 2}], Act3 = [{id, <<3>>}, {type, note}, {actor, alice}, {published, 3}], Act4 = [{id, <<4>>}, {type, note}, {actor, alice}, {published, 4}], Act5 = [{id, <<5>>}, {type, note}, {actor, alice}, {published, 5}], {ok, L0} = log:open(alice, <<98,97,115,101>>), {ok, L1, _} = log:append(L0, Act1), {ok, L2, _} = log:append(L1, Act2), {ok, L3, _} = log:append(L2, Act3), {ok, L4, _} = log:append(L3, Act4), {ok, L5, _} = log:append(L4, Act5),' + +cat > "$TMPFILE" < [] +(epoch 10) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice(none, L5) =:= []\") :name)") + +;; full mode -> all 5 +(epoch 11) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice(full, L5) =:= [Act1, Act2, Act3, Act4, Act5]\") :name)") + +;; last_n with N=2 -> tail 2 (Act4, Act5) +(epoch 12) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_n, 2}, L5) =:= [Act4, Act5]\") :name)") + +;; last_n with N > total -> all entries +(epoch 13) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_n, 100}, L5) =:= [Act1, Act2, Act3, Act4, Act5]\") :name)") + +;; last_n with N = 0 -> [] +(epoch 14) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_n, 0}, L5) =:= []\") :name)") + +;; last_t with T=2, Now=5 -> activities with :published > 3 and <= 5 -> [Act4, Act5] +(epoch 15) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_t, 2, fun() -> 5 end}, L5) =:= [Act4, Act5]\") :name)") + +;; last_t with T=10, Now=5 -> covers everything from :published > -5 -> all 5 +(epoch 16) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_t, 10, fun() -> 5 end}, L5) =:= [Act1, Act2, Act3, Act4, Act5]\") :name)") + +;; last_t with T=0, Now=5 -> only entries at exactly Now (>0, <=5) — really [] because window is (5..5] +(epoch 17) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_t, 0, fun() -> 5 end}, L5) =:= []\") :name)") + +;; since_cid with the 2nd cid -> entries AFTER it (Act3..Act5) +(epoch 18) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({since_cid, <<2>>}, L5) =:= [Act3, Act4, Act5]\") :name)") + +;; since_cid with last cid -> [] +(epoch 19) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({since_cid, <<5>>}, L5) =:= []\") :name)") + +;; since_cid with unknown cid -> [] +(epoch 20) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({since_cid, <<99>>}, L5) =:= []\") :name)") + +;; wrap_backfill adds {backfilled, true} to each entry +(epoch 21) +(eval "(get (erlang-eval-ast \"${SETUP} Wrapped = backfill:slice({last_n, 1}, L5, true), [Act5W] = Wrapped, envelope:get_field(backfilled, Act5W) =:= {ok, true}\") :name)") + +;; Wrapped entries preserve :id +(epoch 22) +(eval "(get (erlang-eval-ast \"${SETUP} Wrapped = backfill:slice({last_n, 1}, L5, true), [Act5W] = Wrapped, envelope:get_field(id, Act5W) =:= {ok, <<5>>}\") :name)") + +;; parse_mode: nil / none / atoms +(epoch 23) +(eval "(get (erlang-eval-ast \"{backfill:parse_mode(nil), backfill:parse_mode(none), backfill:parse_mode(full)} =:= {none, none, full}\") :name)") + +;; parse_mode: tuple shapes pass through +(epoch 24) +(eval "(get (erlang-eval-ast \"backfill:parse_mode({last_n, 3}) =:= {last_n, 3}\") :name)") + +;; parse_mode: proplist with mode + limit +(epoch 25) +(eval "(get (erlang-eval-ast \"backfill:parse_mode([{mode, last_n}, {limit, 50}]) =:= {last_n, 50}\") :name)") + +;; parse_mode: proplist with mode = full +(epoch 26) +(eval "(get (erlang-eval-ast \"backfill:parse_mode([{mode, full}]) =:= full\") :name)") + +;; parse_mode: unknown -> none +(epoch 27) +(eval "(get (erlang-eval-ast \"backfill:parse_mode([{mode, mystery}]) =:= none\") :name)") + +;; Unknown mode -> [] +(epoch 28) +(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice(garbage, L5) =:= []\") :name)") +EPOCHS + +OUTPUT=$(timeout 280 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) + +check() { + local epoch="$1" desc="$2" expected="$3" + local actual + actual=$(echo "$OUTPUT" | awk -v e="$epoch" ' + $0 ~ "^\\(ok-len " e " " { getline; print; exit } + $0 ~ "^\\(ok " e " " { print; exit } + $0 ~ "^\\(error " e " " { print; exit } + ') + [ -z "$actual" ] && actual="" + if echo "$actual" | grep -qF -- "$expected"; then + PASS=$((PASS+1)) + [ "$VERBOSE" = "-v" ] && echo " ok $desc" + else + FAIL=$((FAIL+1)) + ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual +" + fi +} + +check 4 "backfill module loaded" "backfill" +check 10 "none mode -> []" "true" +check 11 "full mode -> all 5" "true" +check 12 "last_n N=2 -> tail 2" "true" +check 13 "last_n N=100 -> all 5" "true" +check 14 "last_n N=0 -> []" "true" +check 15 "last_t T=2 Now=5 -> 4,5" "true" +check 16 "last_t T=10 Now=5 -> all 5" "true" +check 17 "last_t T=0 Now=5 -> []" "true" +check 18 "since_cid mid -> tail 3" "true" +check 19 "since_cid last -> []" "true" +check 20 "since_cid unknown -> []" "true" +check 21 "wrap adds backfilled=true" "true" +check 22 "wrap preserves :id" "true" +check 23 "parse_mode atoms" "true" +check 24 "parse_mode tuple passthrough" "true" +check 25 "parse_mode proplist last_n" "true" +check 26 "parse_mode proplist full" "true" +check 27 "parse_mode unknown -> none" "true" +check 28 "unknown slice mode -> []" "true" + +TOTAL=$((PASS+FAIL)) +if [ $FAIL -eq 0 ]; then + echo "ok $PASS/$TOTAL next/tests/backfill.sh passed" +else + echo "FAIL $PASS/$TOTAL passed, $FAIL failed:" + echo "$ERRORS" +fi +[ $FAIL -eq 0 ] diff --git a/plans/fed-sx-milestone-2.md b/plans/fed-sx-milestone-2.md index 27bcb4ee..681596fa 100644 --- a/plans/fed-sx-milestone-2.md +++ b/plans/fed-sx-milestone-2.md @@ -630,11 +630,37 @@ Per §13.3: A wants B's history when A first follows B. Four modes: **Deliverables:** -- Follow activity may carry `:backfill {:mode :last-N :limit 100}`. -- On Accept, B's outbox is GET-paged with appropriate filters. -- `GET /actors//outbox?since=Cid&limit=N` returns a paged response. -- Backfill bodies wrap the original activities in `:backfilled true` - so projections can decide whether to re-fold or skip. +- [x] **9a** — Pure-functional backfill slicing in + `next/kernel/backfill.erl`: + - `slice/2,3(Mode, LogState[, Wrap])` returns the entry list + for a given mode. Wrap=true marks each entry + `{backfilled, true}` so receiving projections can decide + whether to re-fold or skip (per §13.3, wrapped bodies + preserve `:id` so replay defence still catches duplicates). + - Modes: `none`, `full`, `{last_n, N}`, `{last_t, T, NowFn}`, + `{since_cid, Cid}`. NowFn is a 0-arity fun so tests can + fake-time it. + - `parse_mode/1` lifts the Follow activity's `:backfill` + value (atom or proplist) into the internal mode tuple; + unknown shapes degrade to `none` (open-world default). + Substrate gotchas re-confirmed: + `lists:nthtail/2` not in this port (rolled `drop_n/2`); + pattern-alias `Pat = Var` not supported (rewrote + `parse_mode/1` clauses with explicit deconstruction). + 20/20 in `backfill.sh` covering all 5 modes (with edge + cases: N=0, N>length, T=0, since_cid hit/miss/unknown), + wrap_backfill, parse_mode atoms / tuples / proplists / + unknown. +- [ ] **9b** — `GET /actors//outbox?since=Cid&limit=N` + pagination route. Extends the Step 4d outbox listing with + the `?since=` query param (calls `backfill:since_cid_entries/2`). + Acceptance test extends `http_multi_actor.sh`. +- [ ] **9c** — Follow → Accept → backfill-delivery wiring. + The receiving kernel reads the Follow's `:backfill` field + via `parse_mode/1`, slices its outbox, and dispatches each + entry to the new follower's delivery_worker queue (Step 8d). + Gates on Blockers #2 (httpc) for the actual peer fetch path + but the in-process drain works today. **Tests:** @@ -1010,6 +1036,21 @@ proceed. Newest first. +- **2026-06-07** — Step 9a: pure-functional backfill slicing. + `next/kernel/backfill.erl` with `slice/2,3(Mode, LogState + [, Wrap])` returning the appropriate activity list. Modes + `none / full / {last_n, N} / {last_t, T, NowFn} / + {since_cid, Cid}` cover the §13.3 grammar; `wrap_backfill/1` + marks each entry `{backfilled, true}` (id preserved so the + receiver's replay defence still works). `parse_mode/1` lifts + the Follow activity's `:backfill` value (atom or proplist) + into the internal mode tuple; unknown shapes -> none. 20/20 + in `backfill.sh`. Substrate gotchas re-confirmed: + `lists:nthtail/2` not registered (rolled `drop_n/2`); pattern- + alias `Pat = Var` not supported in this port (rewrote + `parse_mode/1` clauses with explicit deconstruction). + Conformance preserved at 761/761. + - **2026-06-07** — Step 11b: projection folds for the new verbs. Two new modules in `next/kernel/`: `announce_state.erl` (per-Cid announcer-set fold, set