From ab159dfacef65905d970cb9f0d0f2111da21f3ae Mon Sep 17 00:00:00 2001 From: giles Date: Wed, 27 May 2026 07:06:40 +0000 Subject: [PATCH] =?UTF-8?q?fed-sx-m1:=20Step=203a=20=E2=80=94=20in-memory?= =?UTF-8?q?=20log:open/append/tip/replay=20+=2012=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- next/kernel/log.erl | 63 ++++++++++++++++++ next/tests/log_memory.sh | 123 ++++++++++++++++++++++++++++++++++++ plans/fed-sx-milestone-1.md | 6 ++ 3 files changed, 192 insertions(+) create mode 100644 next/kernel/log.erl create mode 100755 next/tests/log_memory.sh diff --git a/next/kernel/log.erl b/next/kernel/log.erl new file mode 100644 index 00000000..dcd78318 --- /dev/null +++ b/next/kernel/log.erl @@ -0,0 +1,63 @@ +-module(log). +-export([open/2, append/2, tip/1, replay/3, entries/1]). + +%% Per-actor activity log — the canonical record of everything an +%% actor has emitted, in chronological order. Per design §15.2 this +%% lives on disk as a JSONL segment file; v1 starts with an in-memory +%% backend so the API and seq-number machinery can be locked down +%% before the on-disk format is added (Step 3b). +%% +%% State shape (a property list): +%% [{actor, ActorId}, {base, BasePath}, {seq, NextSeq}, {entries, [Act|...]}] +%% +%% `entries` stores activities in append order — i.e. oldest first. +%% `seq` is the next sequence number that will be assigned by append. +%% `base` is kept on the state for forward-compatibility with 3b +%% (where it becomes the segment-file directory). +%% +%% open/2 takes ActorId + BasePath and returns {ok, LogState} starting +%% with seq=0 and no entries. +%% +%% append/2 returns {ok, NewLogState, AssignedSeq}. +%% +%% tip/1 returns the next seq the log would assign (== count of entries). +%% +%% replay/3 folds Fun(Activity, AssignedSeq, Acc) over every entry in +%% append order. Three-arity rather than two-arity because the plan's +%% example test is "sequence numbers gap-free across replay" — having +%% the seq number visible in the fold makes that test direct. +%% +%% entries/1 is a debug accessor returning [Activity, ...] in append +%% order. Not part of the public API contract. + +open(ActorId, BasePath) -> + {ok, [{actor, ActorId}, {base, BasePath}, {seq, 0}, {entries, []}]}. + +append(LogState, Activity) -> + Seq = field(seq, LogState), + Entries = field(entries, LogState), + NewState = replace_field(seq, Seq + 1, + replace_field(entries, Entries ++ [Activity], LogState)), + {ok, NewState, Seq}. + +tip(LogState) -> + field(seq, LogState). + +replay(LogState, InitAcc, Fun) -> + Entries = field(entries, LogState), + replay_loop(Entries, 0, InitAcc, Fun). + +replay_loop([], _, Acc, _) -> Acc; +replay_loop([Act | Rest], Seq, Acc, Fun) -> + replay_loop(Rest, Seq + 1, Fun(Act, Seq, Acc), Fun). + +entries(LogState) -> + field(entries, LogState). + +field(K, [{K, V} | _]) -> V; +field(K, [_ | Rest]) -> field(K, Rest); +field(_, []) -> erlang:error(badkey). + +replace_field(K, V, []) -> [{K, V}]; +replace_field(K, V, [{K, _} | Rest]) -> [{K, V} | Rest]; +replace_field(K, V, [P | Rest]) -> [P | replace_field(K, V, Rest)]. diff --git a/next/tests/log_memory.sh b/next/tests/log_memory.sh new file mode 100755 index 00000000..09f8017a --- /dev/null +++ b/next/tests/log_memory.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# next/tests/log_memory.sh — Step 3a acceptance test. +# +# Exercises the in-memory log API: open/2, append/2, tip/1, replay/3, +# entries/1. On-disk persistence is the job of Step 3b. 11 cases. + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found." >&2 + exit 1 +fi + +VERBOSE="${1:-}" +PASS=0; FAIL=0; ERRORS="" +TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT + +cat > "$TMPFILE" <<'EPOCHS' +(epoch 1) +(load "lib/erlang/tokenizer.sx") +(load "lib/erlang/parser.sx") +(load "lib/erlang/parser-core.sx") +(load "lib/erlang/parser-expr.sx") +(load "lib/erlang/parser-module.sx") +(load "lib/erlang/transpile.sx") +(load "lib/erlang/runtime.sx") +(load "lib/erlang/vm/dispatcher.sx") + +(epoch 2) +(eval "(get (erlang-load-module (file-read \"next/kernel/log.erl\")) :name)") + +;; Fresh log: tip is 0 +(epoch 10) +(eval "(get (erlang-eval-ast \"{ok, L} = log:open(alice, base), log:tip(L) =:= 0\") :name)") + +;; Fresh log: entries empty +(epoch 11) +(eval "(get (erlang-eval-ast \"{ok, L} = log:open(alice, base), log:entries(L) =:= []\") :name)") + +;; First append returns seq 0; tip advances to 1 +(epoch 12) +(eval "(get (erlang-eval-ast \"{ok, L0} = log:open(alice, base), {ok, L1, S} = log:append(L0, act_a), {S, log:tip(L1)} =:= {0, 1}\") :name)") + +;; Two appends: seq 0,1; tip = 2 +(epoch 13) +(eval "(get (erlang-eval-ast \"{ok, L0} = log:open(alice, base), {ok, L1, S0} = log:append(L0, a), {ok, L2, S1} = log:append(L1, b), {S0, S1, log:tip(L2)} =:= {0, 1, 2}\") :name)") + +;; Five appends: seq sequence gap-free +(epoch 14) +(eval "(get (erlang-eval-ast \"{ok, L0} = log:open(alice, base), {ok, L1, S0} = log:append(L0, a), {ok, L2, S1} = log:append(L1, b), {ok, L3, S2} = log:append(L2, c), {ok, L4, S3} = log:append(L3, d), {ok, L5, S4} = log:append(L4, e), {S0,S1,S2,S3,S4,log:tip(L5)} =:= {0,1,2,3,4,5}\") :name)") + +;; entries/1 returns activities in append order +(epoch 15) +(eval "(get (erlang-eval-ast \"{ok, L0} = log:open(alice, base), {ok, L1, _} = log:append(L0, a), {ok, L2, _} = log:append(L1, b), {ok, L3, _} = log:append(L2, c), log:entries(L3) =:= [a, b, c]\") :name)") + +;; Round-trip: appended activity is recoverable byte-for-byte +(epoch 16) +(eval "(get (erlang-eval-ast \"Act = [{id,1},{type,create},{actor,alice}], {ok, L0} = log:open(alice, base), {ok, L1, _} = log:append(L0, Act), log:entries(L1) =:= [Act]\") :name)") + +;; Per-actor isolation: two logs are independent +(epoch 17) +(eval "(get (erlang-eval-ast \"{ok, LA0} = log:open(alice, base), {ok, LB0} = log:open(bob, base), {ok, LA1, _} = log:append(LA0, a), {ok, LB1, _} = log:append(LB0, b1), {ok, LB2, _} = log:append(LB1, b2), {log:tip(LA1), log:tip(LB2)} =:= {1, 2}\") :name)") + +;; replay/3 visits all activities in append order with monotonic seqs +(epoch 18) +(eval "(get (erlang-eval-ast \"{ok, L0} = log:open(alice, base), {ok, L1, _} = log:append(L0, a), {ok, L2, _} = log:append(L1, b), {ok, L3, _} = log:append(L2, c), log:replay(L3, [], fun (A, S, Acc) -> [{S, A} | Acc] end) =:= [{2,c},{1,b},{0,a}]\") :name)") + +;; replay over empty log: InitAcc returned unchanged +(epoch 19) +(eval "(get (erlang-eval-ast \"{ok, L} = log:open(alice, base), log:replay(L, init_acc, fun (_, _, A) -> A end) =:= init_acc\") :name)") + +;; replay can compute a derived state (sum of integer activities) +(epoch 20) +(eval "(get (erlang-eval-ast \"{ok, L0} = log:open(alice, base), {ok, L1, _} = log:append(L0, 10), {ok, L2, _} = log:append(L1, 20), {ok, L3, _} = log:append(L2, 30), log:replay(L3, 0, fun (V, _, Acc) -> V + Acc end) =:= 60\") :name)") +EPOCHS + +OUTPUT=$(timeout 120 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) + +check() { + local epoch="$1" desc="$2" expected="$3" + local actual + actual=$(echo "$OUTPUT" | awk -v e="$epoch" ' + $0 ~ "^\\(ok-len " e " " { getline; print; exit } + $0 ~ "^\\(ok " e " " { print; exit } + $0 ~ "^\\(error " e " " { print; exit } + ') + [ -z "$actual" ] && actual="" + if echo "$actual" | grep -qF -- "$expected"; then + PASS=$((PASS+1)) + [ "$VERBOSE" = "-v" ] && echo " ok $desc" + else + FAIL=$((FAIL+1)) + ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual +" + fi +} + +check 2 "module load name" "log" +check 10 "fresh log tip is 0" "true" +check 11 "fresh log entries empty" "true" +check 12 "append returns seq 0, tip 1" "true" +check 13 "two appends seq 0,1; tip 2" "true" +check 14 "five appends gap-free" "true" +check 15 "entries in append order" "true" +check 16 "round-trip activity" "true" +check 17 "per-actor isolation" "true" +check 18 "replay visits all in order" "true" +check 19 "replay over empty log" "true" +check 20 "replay computes derived state" "true" + +TOTAL=$((PASS+FAIL)) +if [ $FAIL -eq 0 ]; then + echo "ok $PASS/$TOTAL next/tests/log_memory.sh passed" +else + echo "FAIL $PASS/$TOTAL passed, $FAIL failed:" + echo "$ERRORS" +fi +[ $FAIL -eq 0 ] diff --git a/plans/fed-sx-milestone-1.md b/plans/fed-sx-milestone-1.md index d224f3b7..be7a3d9f 100644 --- a/plans/fed-sx-milestone-1.md +++ b/plans/fed-sx-milestone-1.md @@ -195,6 +195,11 @@ verify_signature(Activity, ActorState) -> ## Step 3 — JSONL log + sequence numbers +**Sub-deliverables:** +- [x] **3a** — `log:open/2` + `log:append/2` + `log:tip/1` + `log:replay/3` + `log:entries/1` over an in-memory log state (per-actor seq; replay in append order; round-trip the stored activity). `next/tests/log_memory.sh` (12 cases). +- [ ] **3b** — Term codec + on-disk persistence: serializer/parser writing each activity as a JSONL-style line; restart-resumes-tip from the segment file. Blocker risk: `atom_to_list`/`integer_to_list` return SX strings (not Erlang charlists) and there is no `binary_to_list`, so a term-to-binary path needs a workaround. +- [ ] **3c** — Segment rotation at size threshold + gen_server-mediated concurrent appends. + **Deliverables:** ```erlang @@ -937,6 +942,7 @@ A few things still under-specified; resolve as work begins. Newest first. One line per sub-deliverable commit. Erlang conformance gate (`bash lib/erlang/conformance.sh`) must remain 729/729 on every entry. +- **2026-05-27** — Step 3a: `log:open/2 append/2 tip/1 replay/3 entries/1` over an in-memory state (per-actor seq, replay in append order, round-trip activities). `next/tests/log_memory.sh` 12/12. Pivoted from on-disk in this iteration: this port's `atom_to_list`/`integer_to_list` return SX strings rather than Erlang charlists, `binary_to_list` is unregistered, and `$X` char literals decode to nil — so a term codec needs a workaround. Captured as the Step 3b risk note in the plan. Erlang conformance 729/729. - **2026-05-26** — Step 2c: `envelope:verify_signature/2` — time-aware key lookup over `public_keys` (created ≤ published < superseded_at), MAC recompute via `crypto:hash(sha256, KeyMaterial ++ canonical_bytes)`, compared against `signature.value`. Returns ok or one of `no_signature | no_key_id | no_published | no_keys | no_active_key | bad_signature`. `next/tests/envelope_sig.sh` 11/11 pass. Erlang conformance 729/729. - **2026-05-26** — Step 2b: `envelope:canonical_bytes/1` — strip signature, insertion-sort property list by key, return host-CID-string as deterministic byte form (dag-cbor stand-in). `next/tests/envelope_canonical.sh` 8/8 pass. Erlang conformance 729/729 preserved. - **2026-05-26** — Step 2a: `next/kernel/envelope.erl` `validate_shape/1` + `get_field/2` over property-list envelopes (Erlang `#{}` maps not supported in this port). `next/tests/envelope_shape.sh` 15/15 pass. Erlang conformance 729/729 preserved.