fed-sx-m1: Step 3b on-disk log — open_disk/2 + write-through append/2 + length-framed segments; 12/12 log_disk tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 29s

This commit is contained in:
2026-06-05 07:20:29 +00:00
parent 6d7f0a3f15
commit 595c15a3fb
3 changed files with 253 additions and 4 deletions

View File

@@ -1,5 +1,5 @@
-module(log).
-export([open/2, append/2, tip/1, replay/3, entries/1]).
-export([open/2, open_disk/2, append/2, tip/1, replay/3, entries/1]).
%% Per-actor activity log — the canonical record of everything an
%% actor has emitted, in chronological order. Per design §15.2 this
@@ -36,9 +36,112 @@ open(ActorId, BasePath) ->
append(LogState, Activity) ->
Seq = field(seq, LogState),
Entries = field(entries, LogState),
NewEntries = Entries ++ [Activity],
NewState = replace_field(seq, Seq + 1,
replace_field(entries, Entries ++ [Activity], LogState)),
{ok, NewState, Seq}.
replace_field(entries, NewEntries, LogState)),
case persisted_path(LogState) of
{persisted, Path} ->
ok = write_segment(Path, NewEntries),
{ok, NewState, Seq};
not_persisted ->
{ok, NewState, Seq}
end.
%% open_disk/2 — disk-backed variant of open. Reads any existing
%% segment file under BasePath, replays entries into memory state,
%% and tags the state {persisted, true} so future append/2 calls
%% write through. BasePath must be a binary or charlist (real path),
%% not an atom — the in-memory open/2 still accepts atoms for tests.
%%
%% Segment format (per frame): 4-byte big-endian length + that many
%% bytes of term_codec:encode(Activity). Whole file is the concat of
%% all frames in append order; no header.
%%
%% Returns {ok, LogState} on success, {error, {corrupt, Reason}} if
%% the segment is truncated/garbled, {error, {read, Reason}} on other
%% file errors. Missing file is treated as an empty fresh log.
open_disk(ActorId, BasePath) ->
Path = segment_path(ActorId, BasePath),
case try_read_segment(Path) of
{ok, Entries} ->
State = [{actor, ActorId}, {base, BasePath},
{seq, length(Entries)},
{entries, Entries},
{persisted, true},
{path, Path}],
{ok, State};
{error, _} = E ->
E
end.
persisted_path(LogState) ->
case lookup(persisted, LogState) of
true ->
case lookup(path, LogState) of
undefined -> not_persisted;
P -> {persisted, P}
end;
_ -> not_persisted
end.
%% segment_path/2 — returns the segment file path as a charlist (list
%% of int char codes). BasePath may be a binary OR a charlist; we
%% normalize to charlist via binary_to_list so the result is purely
%% cons-based — this works around an iolist-walker quirk in
%% er-source-to-string that surfaces when list_to_binary nests binaries
%% built from charlists. file:read_file accepts charlists fine.
segment_path(ActorId, BasePath) ->
base_chars(BasePath) ++ [$/] ++ atom_to_list(ActorId)
++ [$., $l, $o, $g].
base_chars(B) when is_binary(B) -> binary_to_list(B);
base_chars(L) when is_list(L) -> L.
write_segment(Path, Entries) ->
Frames = [frame(term_codec:encode(E)) || E <- Entries],
file:write_file(Path, list_to_binary(Frames)).
%% frame/1 — prepend 4-byte big-endian length to Payload.
frame(Payload) when is_binary(Payload) ->
L = byte_size(Payload),
B3 = (L div 16777216) rem 256,
B2 = (L div 65536) rem 256,
B1 = (L div 256) rem 256,
B0 = L rem 256,
[B3, B2, B1, B0, Payload].
try_read_segment(Path) ->
case file:read_file(Path) of
{ok, Bin} ->
try {ok, decode_frames(binary_to_list(Bin), [])}
catch
throw:Reason -> {error, {corrupt, Reason}};
error:Reason -> {error, {corrupt, Reason}}
end;
{error, enoent} ->
{ok, []};
{error, R} ->
{error, {read, R}}
end.
decode_frames([], Acc) ->
lists:reverse(Acc);
decode_frames([B3, B2, B1, B0 | Rest], Acc) ->
Len = B3 * 16777216 + B2 * 65536 + B1 * 256 + B0,
{Payload, Rest2} = take_n(Len, Rest),
case term_codec:decode(list_to_binary(Payload)) of
{ok, Term, _} -> decode_frames(Rest2, [Term | Acc]);
{error, R} -> throw({decode, R})
end;
decode_frames(_, _) ->
throw(truncated_header).
take_n(0, R) -> {[], R};
take_n(N, [H | T]) ->
{Hs, Tl} = take_n(N - 1, T),
{[H | Hs], Tl};
take_n(_, []) ->
throw(truncated_body).
tip(LogState) ->
field(seq, LogState).
@@ -58,6 +161,12 @@ field(K, [{K, V} | _]) -> V;
field(K, [_ | Rest]) -> field(K, Rest);
field(_, []) -> erlang:error(badkey).
%% lookup/2 — like field but returns `undefined` for missing key
%% (used by persisted_path/1 which probes optional state fields).
lookup(K, [{K, V} | _]) -> V;
lookup(K, [_ | Rest]) -> lookup(K, Rest);
lookup(_, []) -> undefined.
replace_field(K, V, []) -> [{K, V}];
replace_field(K, V, [{K, _} | Rest]) -> [{K, V} | Rest];
replace_field(K, V, [P | Rest]) -> [P | replace_field(K, V, Rest)].

139
next/tests/log_disk.sh Executable file
View File

@@ -0,0 +1,139 @@
#!/usr/bin/env bash
# next/tests/log_disk.sh — Step 3b on-disk log acceptance test.
#
# Exercises log:open_disk/2, append/2 (write-through), and the
# read-segment-on-reopen path. Uses next/kernel/term_codec.erl for
# the entry encoding and a 4-byte big-endian length prefix per frame.
set -uo pipefail
cd "$(git rev-parse --show-toplevel)"
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
if [ ! -x "$SX_SERVER" ]; then
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
fi
if [ ! -x "$SX_SERVER" ]; then
echo "ERROR: sx_server.exe not found." >&2
exit 1
fi
# Fixed tmp dir so we can refer to it as an Erlang binary literal.
DISK_BASE=/tmp/fed_sx_m1_log_disk
rm -rf "$DISK_BASE"
mkdir -p "$DISK_BASE"
# Pre-write a corrupted segment file for the corrupt-detect test
# (just a truncated 4-byte length header with no payload).
printf '\x00\x00\x00\x05XX' > "$DISK_BASE/corrupted.log"
VERBOSE="${1:-}"
PASS=0; FAIL=0; ERRORS=""
TMPFILE=$(mktemp); trap "rm -f $TMPFILE; rm -rf $DISK_BASE" EXIT
cat > "$TMPFILE" <<'EPOCHS'
(epoch 1)
(load "lib/erlang/tokenizer.sx")
(load "lib/erlang/parser.sx")
(load "lib/erlang/parser-core.sx")
(load "lib/erlang/parser-expr.sx")
(load "lib/erlang/parser-module.sx")
(load "lib/erlang/transpile.sx")
(load "lib/erlang/runtime.sx")
(load "lib/erlang/vm/dispatcher.sx")
(epoch 2)
(eval "(get (erlang-load-module (file-read \"next/kernel/term_codec.erl\")) :name)")
(epoch 3)
(eval "(get (erlang-load-module (file-read \"next/kernel/log.erl\")) :name)")
;; Base path: /tmp/fed_sx_m1_log_disk constructed as an Erlang binary
;; via list_to_binary of the char codes. (`<<"...">>` literals don't
;; carry through in this port — see Step 3b substrate fix #2.)
;; --- 3a in-memory open/2 still works unchanged ---
(epoch 10)
(eval "(get (erlang-eval-ast \"{ok, L} = log:open(alice, base), log:tip(L) =:= 0\") :name)")
;; --- open_disk on missing file returns empty fresh state ---
(epoch 20)
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L} = log:open_disk(alice, Base), log:tip(L) =:= 0\") :name)")
(epoch 21)
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L} = log:open_disk(alice, Base), log:entries(L) =:= []\") :name)")
;; --- append + re-open: entries match ---
(epoch 30)
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(bob, Base), {ok, L1, _} = log:append(L0, hello), {ok, L2, _} = log:append(L1, world), {ok, L3} = log:open_disk(bob, Base), log:entries(L3) =:= [hello, world]\") :name)")
;; --- tip resumes correctly across restart ---
(epoch 31)
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(carol, Base), {ok, L1, _} = log:append(L0, a), {ok, L2, _} = log:append(L1, b), {ok, L3, _} = log:append(L2, c), {ok, L4} = log:open_disk(carol, Base), log:tip(L4) =:= 3\") :name)")
;; --- replay/3 over re-opened state visits append order ---
(epoch 32)
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(dave, Base), {ok, L1, _} = log:append(L0, a), {ok, L2, _} = log:append(L1, b), {ok, L3, _} = log:append(L2, c), {ok, L4} = log:open_disk(dave, Base), log:replay(L4, [], fun (X, S, Acc) -> [{S, X} | Acc] end) =:= [{2,c},{1,b},{0,a}]\") :name)")
;; --- mixed types round-trip (atom, int, binary, tuple, list) ---
(epoch 33)
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(eve, Base), {ok, L1, _} = log:append(L0, foo), {ok, L2, _} = log:append(L1, 42), {ok, L3, _} = log:append(L2, <<1,2,3>>), {ok, L4, _} = log:append(L3, {pair, alice, bob}), {ok, L5, _} = log:append(L4, [1, two, <<3>>]), {ok, L6} = log:open_disk(eve, Base), log:entries(L6) =:= [foo, 42, <<1,2,3>>, {pair, alice, bob}, [1, two, <<3>>]]\") :name)")
;; --- continuing to append after reopen preserves chronology ---
(epoch 34)
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(frank, Base), {ok, L1, _} = log:append(L0, a), {ok, L2} = log:open_disk(frank, Base), {ok, L3, S} = log:append(L2, b), {S, log:tip(L3)} =:= {1, 2}\") :name)")
;; --- corrupted segment returns {error, _} not crash ---
(epoch 40)
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), element(1, log:open_disk(corrupted, Base))\") :name)")
;; --- per-actor isolation: two disk-backed logs are independent ---
(epoch 41)
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, LA0} = log:open_disk(g1, Base), {ok, LB0} = log:open_disk(g2, Base), {ok, LA1, _} = log:append(LA0, x), {ok, LB1, _} = log:append(LB0, y1), {ok, LB2, _} = log:append(LB1, y2), {ok, LAr} = log:open_disk(g1, Base), {ok, LBr} = log:open_disk(g2, Base), {log:entries(LAr), log:entries(LBr)} =:= {[x], [y1, y2]}\") :name)")
EPOCHS
OUTPUT=$(timeout 90 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
check() {
local epoch="$1" desc="$2" expected="$3"
local actual
actual=$(echo "$OUTPUT" | grep -A1 "^(ok-len $epoch " | tail -1 || true)
if echo "$actual" | grep -q "^(ok-len"; then actual=""; fi
if [ -z "$actual" ]; then
actual=$(echo "$OUTPUT" | grep "^(ok $epoch " | head -1 || true)
fi
if [ -z "$actual" ]; then
actual=$(echo "$OUTPUT" | grep "^(error $epoch " | head -1 || true)
fi
[ -z "$actual" ] && actual="<no output for epoch $epoch>"
if echo "$actual" | grep -qF -- "$expected"; then
PASS=$((PASS+1))
[ "$VERBOSE" = "-v" ] && echo " ok $desc"
else
FAIL=$((FAIL+1))
ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual
"
fi
}
check 2 "term_codec loads" "term_codec"
check 3 "log module loads" "log"
check 10 "3a in-memory open/2 compat" "true"
check 20 "open_disk missing -> tip 0" "true"
check 21 "open_disk missing -> []" "true"
check 30 "append+reopen entries match" "true"
check 31 "tip resumes after restart" "true"
check 32 "replay chronological" "true"
check 33 "mixed types round-trip" "true"
check 34 "append after reopen" "true"
check 40 "corrupted segment -> error" "error"
check 41 "per-actor isolation" "true"
TOTAL=$((PASS+FAIL))
if [ $FAIL -eq 0 ]; then
echo "ok $PASS/$TOTAL log_disk tests passed"
else
echo "FAIL $PASS/$TOTAL passed, $FAIL failed:"
echo "$ERRORS"
fi
[ $FAIL -eq 0 ]