fed-sx-m2: Step 9a — pure-functional backfill slicing + 20 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 49s

New next/kernel/backfill.erl owns the §13.3 backfill mode
slicing. Given an outbox log + a mode, returns the activity
list to send to a new follower as backfill.

Public API:
  slice/2(Mode, LogState)               default Wrap=false
  slice/3(Mode, LogState, Wrap)         Wrap=true wraps entries
  wrap_backfill/1                       add {backfilled, true}
  parse_mode/1                          lift Follow :backfill field

Modes:
  none                       new follower: forward-only content
  full                       entire outbox
  {last_n, N}                last N activities (FIFO)
  {last_t, T, NowFn}         entries with :published in
                             (NowFn()-T .. NowFn()]
  {since_cid, Cid}           entries after the one with :id = Cid
                             (consumes the matched entry; returns
                             every entry after it)

wrap_backfill/1 marks each entry {backfilled, true}. Per §13.3
wrapped bodies preserve :id so the receiver's replay defence
still catches duplicates from the live stream.

parse_mode/1 accepts:
  nil / none / full / {last_n, _} / {last_t, _, _} /
  {since_cid, _} — pass through or normalize
  Proplist with :mode + :limit -> {last_n, N}
  Proplist with :mode + :duration -> {last_t, T, fun() -> 0 end}
  Proplist with :mode = full -> full
  Anything else -> none (open-world default)

Substrate gotchas re-confirmed and worked around:
  - lists:nthtail/2 not registered — rolled drop_n/2
  - Pattern-alias 'Pat = Var' not supported by this port's
    parser — parse_mode/1 clauses use explicit deconstruction

20/20 in next/tests/backfill.sh covering all five modes plus
edge cases (N=0, N>length, T=0 -> empty window, since_cid
hit/miss/unknown), wrap_backfill semantics, parse_mode for
atoms / tuple shapes / proplists / unknown / nil.

Step 9b (outbox listing ?since=Cid&limit=N pagination) and
Step 9c (Follow-Accept-backfill wiring) layer on top.
Conformance preserved at 761/761.
This commit is contained in:
2026-06-07 05:39:46 +00:00
parent b2b61a0112
commit 9621599606
3 changed files with 352 additions and 5 deletions

136
next/kernel/backfill.erl Normal file
View File

@@ -0,0 +1,136 @@
-module(backfill).
-export([slice/2, slice/3,
wrap_backfill/1, parse_mode/1,
all_entries/1, last_n_entries/2, last_t_entries/3,
since_cid_entries/2, none_entries/0]).
%% Backfill mode slicing per design §13.3 / Step 9. When A follows B
%% with a backfill spec, B's kernel slices the outbox log into the
%% appropriate window and delivers each entry as
%% `{backfilled, true}`-marked envelopes alongside forward-going
%% activity.
%%
%% Mode shapes (per the Follow activity's `:backfill` field):
%% none — newer follower sees only forward content
%% {last_n, N} — backfill last N activities (FIFO order)
%% {last_t, T, NowFn} — backfill activities with :published in
%% (Now - T .. Now]. NowFn is a 0-arity fun
%% so tests can fake-time it.
%% full — backfill the entire outbox
%%
%% slice/2 returns the activity list. slice/3 also wraps each entry
%% with `{backfilled, true}` so projections can decide whether to
%% re-fold or skip (the §13.3 Backfilled bodies preserve the
%% original `:id` so replay defence still works on the receiver).
%%
%% parse_mode/1 lifts the Follow activity's `:backfill` proplist
%% (or atom) into the internal mode tuple. Unknown shapes fall back
%% to `none` — the default open-world policy.
slice(Mode, LogState) ->
slice(Mode, LogState, false).
slice(Mode, LogState, Wrap) ->
Entries = log:entries(LogState),
Slice = case Mode of
none -> none_entries();
full -> all_entries(Entries);
{last_n, N} -> last_n_entries(N, Entries);
{last_t, T, NowFn} -> last_t_entries(T, NowFn, Entries);
{since_cid, Cid} -> since_cid_entries(Cid, Entries);
_ -> none_entries()
end,
case Wrap of
true -> wrap_backfill(Slice);
_ -> Slice
end.
%% ── Mode-specific entry selection ─────────────────────────────
all_entries(Entries) -> Entries.
none_entries() -> [].
%% last_n_entries/2 — tail N entries in FIFO order.
last_n_entries(N, _) when N =< 0 -> [];
last_n_entries(N, Entries) ->
Len = length(Entries),
case Len =< N of
true -> Entries;
false -> drop_n(Len - N, Entries)
end.
drop_n(0, L) -> L;
drop_n(_, []) -> [];
drop_n(N, [_ | Rest]) -> drop_n(N - 1, Rest).
%% last_t_entries/3 — entries whose :published is within the last
%% T units of (NowFn() - T .. NowFn()]. T and :published are
%% integers (seconds-since-epoch in production; opaque ints in tests).
last_t_entries(T, NowFn, Entries) when is_integer(T), T >= 0 ->
Now = NowFn(),
Cutoff = Now - T,
[E || E <- Entries, in_window(E, Cutoff, Now)];
last_t_entries(_, _, _) -> [].
in_window(Activity, Cutoff, Now) ->
case envelope:get_field(published, Activity) of
{ok, P} when is_integer(P), P > Cutoff, P =< Now -> true;
_ -> false
end.
%% since_cid_entries/2 — every entry after the one with :id = Cid.
%% If Cid isn't in the log, returns [] (caller's pointer is stale).
%% Used by `GET /actors/<id>/outbox?since=Cid` pagination.
since_cid_entries(_Cid, []) -> [];
since_cid_entries(Cid, [E | Rest]) ->
case envelope:get_field(id, E) of
{ok, Cid} -> Rest;
_ -> since_cid_entries(Cid, Rest)
end.
%% wrap_backfill/1 — append `{backfilled, true}` to each entry.
%% The receiving projection scheduler reads this field and chooses
%% whether to fold (re-emit) or skip (already known via replay
%% defence on `:id`).
wrap_backfill([]) -> [];
wrap_backfill([E | Rest]) ->
[E ++ [{backfilled, true}] | wrap_backfill(Rest)].
%% parse_mode/1 — Lift a Follow activity's `:backfill` value into the
%% internal mode tuple. Accepts:
%% nil / not_found -> none
%% none -> none
%% full -> full
%% {last_n, N} -> {last_n, N} (already-parsed shape)
%% {last_t, T, NowFn} -> pass-through
%% Proplist with :mode + :limit / :duration -> parsed
%% Unknown shape -> none (open-world default).
parse_mode(nil) -> none;
parse_mode(none) -> none;
parse_mode(full) -> full;
parse_mode({last_n, N}) -> {last_n, N};
parse_mode({last_t, T, NowFn}) -> {last_t, T, NowFn};
parse_mode({since_cid, Cid}) -> {since_cid, Cid};
parse_mode(List) when is_list(List) ->
case envelope:get_field(mode, List) of
{ok, last_n} ->
case envelope:get_field(limit, List) of
{ok, N} when is_integer(N) -> {last_n, N};
_ -> none
end;
{ok, last_t} ->
case envelope:get_field(duration, List) of
{ok, T} when is_integer(T) -> {last_t, T, fun () -> 0 end};
_ -> none
end;
{ok, full} -> full;
{ok, none} -> none;
_ -> none
end;
parse_mode(_) -> none.

170
next/tests/backfill.sh Executable file
View File

@@ -0,0 +1,170 @@
#!/usr/bin/env bash
# next/tests/backfill.sh — m2 Step 9a test.
#
# Backfill mode slicing per design §13.3. Given an outbox log +
# a mode (none / last_n / last_t / full / since_cid), backfill:slice
# returns the activity list to send to a new follower as backfill.
set -uo pipefail
cd "$(git rev-parse --show-toplevel)"
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
if [ ! -x "$SX_SERVER" ]; then
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
fi
if [ ! -x "$SX_SERVER" ]; then
echo "ERROR: sx_server.exe not found." >&2
exit 1
fi
VERBOSE="${1:-}"
PASS=0; FAIL=0; ERRORS=""
TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT
# Five activities published at :published = 1, 2, 3, 4, 5
SETUP='Act1 = [{id, <<1>>}, {type, note}, {actor, alice}, {published, 1}], Act2 = [{id, <<2>>}, {type, note}, {actor, alice}, {published, 2}], Act3 = [{id, <<3>>}, {type, note}, {actor, alice}, {published, 3}], Act4 = [{id, <<4>>}, {type, note}, {actor, alice}, {published, 4}], Act5 = [{id, <<5>>}, {type, note}, {actor, alice}, {published, 5}], {ok, L0} = log:open(alice, <<98,97,115,101>>), {ok, L1, _} = log:append(L0, Act1), {ok, L2, _} = log:append(L1, Act2), {ok, L3, _} = log:append(L2, Act3), {ok, L4, _} = log:append(L3, Act4), {ok, L5, _} = log:append(L4, Act5),'
cat > "$TMPFILE" <<EPOCHS
(epoch 1)
(load "lib/erlang/tokenizer.sx")
(load "lib/erlang/parser.sx")
(load "lib/erlang/parser-core.sx")
(load "lib/erlang/parser-expr.sx")
(load "lib/erlang/parser-module.sx")
(load "lib/erlang/transpile.sx")
(load "lib/erlang/runtime.sx")
(load "lib/erlang/vm/dispatcher.sx")
(epoch 2)
(eval "(get (erlang-load-module (file-read \"next/kernel/envelope.erl\")) :name)")
(epoch 3)
(eval "(get (erlang-load-module (file-read \"next/kernel/log.erl\")) :name)")
(epoch 4)
(eval "(get (erlang-load-module (file-read \"next/kernel/backfill.erl\")) :name)")
;; none mode -> []
(epoch 10)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice(none, L5) =:= []\") :name)")
;; full mode -> all 5
(epoch 11)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice(full, L5) =:= [Act1, Act2, Act3, Act4, Act5]\") :name)")
;; last_n with N=2 -> tail 2 (Act4, Act5)
(epoch 12)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_n, 2}, L5) =:= [Act4, Act5]\") :name)")
;; last_n with N > total -> all entries
(epoch 13)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_n, 100}, L5) =:= [Act1, Act2, Act3, Act4, Act5]\") :name)")
;; last_n with N = 0 -> []
(epoch 14)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_n, 0}, L5) =:= []\") :name)")
;; last_t with T=2, Now=5 -> activities with :published > 3 and <= 5 -> [Act4, Act5]
(epoch 15)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_t, 2, fun() -> 5 end}, L5) =:= [Act4, Act5]\") :name)")
;; last_t with T=10, Now=5 -> covers everything from :published > -5 -> all 5
(epoch 16)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_t, 10, fun() -> 5 end}, L5) =:= [Act1, Act2, Act3, Act4, Act5]\") :name)")
;; last_t with T=0, Now=5 -> only entries at exactly Now (>0, <=5) — really [] because window is (5..5]
(epoch 17)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({last_t, 0, fun() -> 5 end}, L5) =:= []\") :name)")
;; since_cid with the 2nd cid -> entries AFTER it (Act3..Act5)
(epoch 18)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({since_cid, <<2>>}, L5) =:= [Act3, Act4, Act5]\") :name)")
;; since_cid with last cid -> []
(epoch 19)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({since_cid, <<5>>}, L5) =:= []\") :name)")
;; since_cid with unknown cid -> []
(epoch 20)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice({since_cid, <<99>>}, L5) =:= []\") :name)")
;; wrap_backfill adds {backfilled, true} to each entry
(epoch 21)
(eval "(get (erlang-eval-ast \"${SETUP} Wrapped = backfill:slice({last_n, 1}, L5, true), [Act5W] = Wrapped, envelope:get_field(backfilled, Act5W) =:= {ok, true}\") :name)")
;; Wrapped entries preserve :id
(epoch 22)
(eval "(get (erlang-eval-ast \"${SETUP} Wrapped = backfill:slice({last_n, 1}, L5, true), [Act5W] = Wrapped, envelope:get_field(id, Act5W) =:= {ok, <<5>>}\") :name)")
;; parse_mode: nil / none / atoms
(epoch 23)
(eval "(get (erlang-eval-ast \"{backfill:parse_mode(nil), backfill:parse_mode(none), backfill:parse_mode(full)} =:= {none, none, full}\") :name)")
;; parse_mode: tuple shapes pass through
(epoch 24)
(eval "(get (erlang-eval-ast \"backfill:parse_mode({last_n, 3}) =:= {last_n, 3}\") :name)")
;; parse_mode: proplist with mode + limit
(epoch 25)
(eval "(get (erlang-eval-ast \"backfill:parse_mode([{mode, last_n}, {limit, 50}]) =:= {last_n, 50}\") :name)")
;; parse_mode: proplist with mode = full
(epoch 26)
(eval "(get (erlang-eval-ast \"backfill:parse_mode([{mode, full}]) =:= full\") :name)")
;; parse_mode: unknown -> none
(epoch 27)
(eval "(get (erlang-eval-ast \"backfill:parse_mode([{mode, mystery}]) =:= none\") :name)")
;; Unknown mode -> []
(epoch 28)
(eval "(get (erlang-eval-ast \"${SETUP} backfill:slice(garbage, L5) =:= []\") :name)")
EPOCHS
OUTPUT=$(timeout 280 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
check() {
local epoch="$1" desc="$2" expected="$3"
local actual
actual=$(echo "$OUTPUT" | awk -v e="$epoch" '
$0 ~ "^\\(ok-len " e " " { getline; print; exit }
$0 ~ "^\\(ok " e " " { print; exit }
$0 ~ "^\\(error " e " " { print; exit }
')
[ -z "$actual" ] && actual="<no output for epoch $epoch>"
if echo "$actual" | grep -qF -- "$expected"; then
PASS=$((PASS+1))
[ "$VERBOSE" = "-v" ] && echo " ok $desc"
else
FAIL=$((FAIL+1))
ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual
"
fi
}
check 4 "backfill module loaded" "backfill"
check 10 "none mode -> []" "true"
check 11 "full mode -> all 5" "true"
check 12 "last_n N=2 -> tail 2" "true"
check 13 "last_n N=100 -> all 5" "true"
check 14 "last_n N=0 -> []" "true"
check 15 "last_t T=2 Now=5 -> 4,5" "true"
check 16 "last_t T=10 Now=5 -> all 5" "true"
check 17 "last_t T=0 Now=5 -> []" "true"
check 18 "since_cid mid -> tail 3" "true"
check 19 "since_cid last -> []" "true"
check 20 "since_cid unknown -> []" "true"
check 21 "wrap adds backfilled=true" "true"
check 22 "wrap preserves :id" "true"
check 23 "parse_mode atoms" "true"
check 24 "parse_mode tuple passthrough" "true"
check 25 "parse_mode proplist last_n" "true"
check 26 "parse_mode proplist full" "true"
check 27 "parse_mode unknown -> none" "true"
check 28 "unknown slice mode -> []" "true"
TOTAL=$((PASS+FAIL))
if [ $FAIL -eq 0 ]; then
echo "ok $PASS/$TOTAL next/tests/backfill.sh passed"
else
echo "FAIL $PASS/$TOTAL passed, $FAIL failed:"
echo "$ERRORS"
fi
[ $FAIL -eq 0 ]

View File

@@ -630,11 +630,37 @@ Per §13.3: A wants B's history when A first follows B. Four modes:
**Deliverables:**
- Follow activity may carry `:backfill {:mode :last-N :limit 100}`.
- On Accept, B's outbox is GET-paged with appropriate filters.
- `GET /actors/<id>/outbox?since=Cid&limit=N` returns a paged response.
- Backfill bodies wrap the original activities in `:backfilled true`
so projections can decide whether to re-fold or skip.
- [x] **9a** — Pure-functional backfill slicing in
`next/kernel/backfill.erl`:
- `slice/2,3(Mode, LogState[, Wrap])` returns the entry list
for a given mode. Wrap=true marks each entry
`{backfilled, true}` so receiving projections can decide
whether to re-fold or skip (per §13.3, wrapped bodies
preserve `:id` so replay defence still catches duplicates).
- Modes: `none`, `full`, `{last_n, N}`, `{last_t, T, NowFn}`,
`{since_cid, Cid}`. NowFn is a 0-arity fun so tests can
fake-time it.
- `parse_mode/1` lifts the Follow activity's `:backfill`
value (atom or proplist) into the internal mode tuple;
unknown shapes degrade to `none` (open-world default).
Substrate gotchas re-confirmed:
`lists:nthtail/2` not in this port (rolled `drop_n/2`);
pattern-alias `Pat = Var` not supported (rewrote
`parse_mode/1` clauses with explicit deconstruction).
20/20 in `backfill.sh` covering all 5 modes (with edge
cases: N=0, N>length, T=0, since_cid hit/miss/unknown),
wrap_backfill, parse_mode atoms / tuples / proplists /
unknown.
- [ ] **9b** — `GET /actors/<id>/outbox?since=Cid&limit=N`
pagination route. Extends the Step 4d outbox listing with
the `?since=` query param (calls `backfill:since_cid_entries/2`).
Acceptance test extends `http_multi_actor.sh`.
- [ ] **9c** — Follow → Accept → backfill-delivery wiring.
The receiving kernel reads the Follow's `:backfill` field
via `parse_mode/1`, slices its outbox, and dispatches each
entry to the new follower's delivery_worker queue (Step 8d).
Gates on Blockers #2 (httpc) for the actual peer fetch path
but the in-process drain works today.
**Tests:**
@@ -1010,6 +1036,21 @@ proceed.
Newest first.
- **2026-06-07** — Step 9a: pure-functional backfill slicing.
`next/kernel/backfill.erl` with `slice/2,3(Mode, LogState
[, Wrap])` returning the appropriate activity list. Modes
`none / full / {last_n, N} / {last_t, T, NowFn} /
{since_cid, Cid}` cover the §13.3 grammar; `wrap_backfill/1`
marks each entry `{backfilled, true}` (id preserved so the
receiver's replay defence still works). `parse_mode/1` lifts
the Follow activity's `:backfill` value (atom or proplist)
into the internal mode tuple; unknown shapes -> none. 20/20
in `backfill.sh`. Substrate gotchas re-confirmed:
`lists:nthtail/2` not registered (rolled `drop_n/2`); pattern-
alias `Pat = Var` not supported in this port (rewrote
`parse_mode/1` clauses with explicit deconstruction).
Conformance preserved at 761/761.
- **2026-06-07** — Step 11b: projection folds for the new verbs.
Two new modules in `next/kernel/`:
`announce_state.erl` (per-Cid announcer-set fold, set