-module(log). -export([open/2, open_disk/2, append/2, tip/1, replay/3, entries/1]). %% Per-actor activity log — the canonical record of everything an %% actor has emitted, in chronological order. Per design §15.2 this %% lives on disk as a JSONL segment file; v1 starts with an in-memory %% backend so the API and seq-number machinery can be locked down %% before the on-disk format is added (Step 3b). %% %% State shape (a property list): %% [{actor, ActorId}, {base, BasePath}, {seq, NextSeq}, {entries, [Act|...]}] %% %% `entries` stores activities in append order — i.e. oldest first. %% `seq` is the next sequence number that will be assigned by append. %% `base` is kept on the state for forward-compatibility with 3b %% (where it becomes the segment-file directory). %% %% open/2 takes ActorId + BasePath and returns {ok, LogState} starting %% with seq=0 and no entries. %% %% append/2 returns {ok, NewLogState, AssignedSeq}. %% %% tip/1 returns the next seq the log would assign (== count of entries). %% %% replay/3 folds Fun(Activity, AssignedSeq, Acc) over every entry in %% append order. Three-arity rather than two-arity because the plan's %% example test is "sequence numbers gap-free across replay" — having %% the seq number visible in the fold makes that test direct. %% %% entries/1 is a debug accessor returning [Activity, ...] in append %% order. Not part of the public API contract. open(ActorId, BasePath) -> {ok, [{actor, ActorId}, {base, BasePath}, {seq, 0}, {entries, []}]}. append(LogState, Activity) -> Seq = field(seq, LogState), Entries = field(entries, LogState), NewEntries = Entries ++ [Activity], NewState = replace_field(seq, Seq + 1, replace_field(entries, NewEntries, LogState)), case persisted_path(LogState) of {persisted, Path} -> ok = write_segment(Path, NewEntries), {ok, NewState, Seq}; not_persisted -> {ok, NewState, Seq} end. %% open_disk/2 — disk-backed variant of open. Reads any existing %% segment file under BasePath, replays entries into memory state, %% and tags the state {persisted, true} so future append/2 calls %% write through. BasePath must be a binary or charlist (real path), %% not an atom — the in-memory open/2 still accepts atoms for tests. %% %% Segment format (per frame): 4-byte big-endian length + that many %% bytes of term_codec:encode(Activity). Whole file is the concat of %% all frames in append order; no header. %% %% Returns {ok, LogState} on success, {error, {corrupt, Reason}} if %% the segment is truncated/garbled, {error, {read, Reason}} on other %% file errors. Missing file is treated as an empty fresh log. open_disk(ActorId, BasePath) -> Path = segment_path(ActorId, BasePath), case try_read_segment(Path) of {ok, Entries} -> State = [{actor, ActorId}, {base, BasePath}, {seq, length(Entries)}, {entries, Entries}, {persisted, true}, {path, Path}], {ok, State}; {error, _} = E -> E end. persisted_path(LogState) -> case lookup(persisted, LogState) of true -> case lookup(path, LogState) of undefined -> not_persisted; P -> {persisted, P} end; _ -> not_persisted end. %% segment_path/2 — returns the segment file path as a charlist (list %% of int char codes). BasePath may be a binary OR a charlist; we %% normalize to charlist via binary_to_list so the result is purely %% cons-based — this works around an iolist-walker quirk in %% er-source-to-string that surfaces when list_to_binary nests binaries %% built from charlists. file:read_file accepts charlists fine. segment_path(ActorId, BasePath) -> base_chars(BasePath) ++ [$/] ++ atom_to_list(ActorId) ++ [$., $l, $o, $g]. base_chars(B) when is_binary(B) -> binary_to_list(B); base_chars(L) when is_list(L) -> L. write_segment(Path, Entries) -> Frames = [frame(term_codec:encode(E)) || E <- Entries], file:write_file(Path, list_to_binary(Frames)). %% frame/1 — prepend 4-byte big-endian length to Payload. frame(Payload) when is_binary(Payload) -> L = byte_size(Payload), B3 = (L div 16777216) rem 256, B2 = (L div 65536) rem 256, B1 = (L div 256) rem 256, B0 = L rem 256, [B3, B2, B1, B0, Payload]. try_read_segment(Path) -> case file:read_file(Path) of {ok, Bin} -> try {ok, decode_frames(binary_to_list(Bin), [])} catch throw:Reason -> {error, {corrupt, Reason}}; error:Reason -> {error, {corrupt, Reason}} end; {error, enoent} -> {ok, []}; {error, R} -> {error, {read, R}} end. decode_frames([], Acc) -> lists:reverse(Acc); decode_frames([B3, B2, B1, B0 | Rest], Acc) -> Len = B3 * 16777216 + B2 * 65536 + B1 * 256 + B0, {Payload, Rest2} = take_n(Len, Rest), case term_codec:decode(list_to_binary(Payload)) of {ok, Term, _} -> decode_frames(Rest2, [Term | Acc]); {error, R} -> throw({decode, R}) end; decode_frames(_, _) -> throw(truncated_header). take_n(0, R) -> {[], R}; take_n(N, [H | T]) -> {Hs, Tl} = take_n(N - 1, T), {[H | Hs], Tl}; take_n(_, []) -> throw(truncated_body). tip(LogState) -> field(seq, LogState). replay(LogState, InitAcc, Fun) -> Entries = field(entries, LogState), replay_loop(Entries, 0, InitAcc, Fun). replay_loop([], _, Acc, _) -> Acc; replay_loop([Act | Rest], Seq, Acc, Fun) -> replay_loop(Rest, Seq + 1, Fun(Act, Seq, Acc), Fun). entries(LogState) -> field(entries, LogState). field(K, [{K, V} | _]) -> V; field(K, [_ | Rest]) -> field(K, Rest); field(_, []) -> erlang:error(badkey). %% lookup/2 — like field but returns `undefined` for missing key %% (used by persisted_path/1 which probes optional state fields). lookup(K, [{K, V} | _]) -> V; lookup(K, [_ | Rest]) -> lookup(K, Rest); lookup(_, []) -> undefined. replace_field(K, V, []) -> [{K, V}]; replace_field(K, V, [{K, _} | Rest]) -> [{K, V} | Rest]; replace_field(K, V, [P | Rest]) -> [P | replace_field(K, V, Rest)].