fed-sx-m1: Step 3b on-disk log — open_disk/2 + write-through append/2 + length-framed segments; 12/12 log_disk tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 29s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 29s
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
-module(log).
|
-module(log).
|
||||||
-export([open/2, append/2, tip/1, replay/3, entries/1]).
|
-export([open/2, open_disk/2, append/2, tip/1, replay/3, entries/1]).
|
||||||
|
|
||||||
%% Per-actor activity log — the canonical record of everything an
|
%% Per-actor activity log — the canonical record of everything an
|
||||||
%% actor has emitted, in chronological order. Per design §15.2 this
|
%% actor has emitted, in chronological order. Per design §15.2 this
|
||||||
@@ -36,9 +36,112 @@ open(ActorId, BasePath) ->
|
|||||||
append(LogState, Activity) ->
|
append(LogState, Activity) ->
|
||||||
Seq = field(seq, LogState),
|
Seq = field(seq, LogState),
|
||||||
Entries = field(entries, LogState),
|
Entries = field(entries, LogState),
|
||||||
|
NewEntries = Entries ++ [Activity],
|
||||||
NewState = replace_field(seq, Seq + 1,
|
NewState = replace_field(seq, Seq + 1,
|
||||||
replace_field(entries, Entries ++ [Activity], LogState)),
|
replace_field(entries, NewEntries, LogState)),
|
||||||
{ok, NewState, Seq}.
|
case persisted_path(LogState) of
|
||||||
|
{persisted, Path} ->
|
||||||
|
ok = write_segment(Path, NewEntries),
|
||||||
|
{ok, NewState, Seq};
|
||||||
|
not_persisted ->
|
||||||
|
{ok, NewState, Seq}
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% open_disk/2 — disk-backed variant of open. Reads any existing
|
||||||
|
%% segment file under BasePath, replays entries into memory state,
|
||||||
|
%% and tags the state {persisted, true} so future append/2 calls
|
||||||
|
%% write through. BasePath must be a binary or charlist (real path),
|
||||||
|
%% not an atom — the in-memory open/2 still accepts atoms for tests.
|
||||||
|
%%
|
||||||
|
%% Segment format (per frame): 4-byte big-endian length + that many
|
||||||
|
%% bytes of term_codec:encode(Activity). Whole file is the concat of
|
||||||
|
%% all frames in append order; no header.
|
||||||
|
%%
|
||||||
|
%% Returns {ok, LogState} on success, {error, {corrupt, Reason}} if
|
||||||
|
%% the segment is truncated/garbled, {error, {read, Reason}} on other
|
||||||
|
%% file errors. Missing file is treated as an empty fresh log.
|
||||||
|
open_disk(ActorId, BasePath) ->
|
||||||
|
Path = segment_path(ActorId, BasePath),
|
||||||
|
case try_read_segment(Path) of
|
||||||
|
{ok, Entries} ->
|
||||||
|
State = [{actor, ActorId}, {base, BasePath},
|
||||||
|
{seq, length(Entries)},
|
||||||
|
{entries, Entries},
|
||||||
|
{persisted, true},
|
||||||
|
{path, Path}],
|
||||||
|
{ok, State};
|
||||||
|
{error, _} = E ->
|
||||||
|
E
|
||||||
|
end.
|
||||||
|
|
||||||
|
persisted_path(LogState) ->
|
||||||
|
case lookup(persisted, LogState) of
|
||||||
|
true ->
|
||||||
|
case lookup(path, LogState) of
|
||||||
|
undefined -> not_persisted;
|
||||||
|
P -> {persisted, P}
|
||||||
|
end;
|
||||||
|
_ -> not_persisted
|
||||||
|
end.
|
||||||
|
|
||||||
|
%% segment_path/2 — returns the segment file path as a charlist (list
|
||||||
|
%% of int char codes). BasePath may be a binary OR a charlist; we
|
||||||
|
%% normalize to charlist via binary_to_list so the result is purely
|
||||||
|
%% cons-based — this works around an iolist-walker quirk in
|
||||||
|
%% er-source-to-string that surfaces when list_to_binary nests binaries
|
||||||
|
%% built from charlists. file:read_file accepts charlists fine.
|
||||||
|
segment_path(ActorId, BasePath) ->
|
||||||
|
base_chars(BasePath) ++ [$/] ++ atom_to_list(ActorId)
|
||||||
|
++ [$., $l, $o, $g].
|
||||||
|
|
||||||
|
base_chars(B) when is_binary(B) -> binary_to_list(B);
|
||||||
|
base_chars(L) when is_list(L) -> L.
|
||||||
|
|
||||||
|
write_segment(Path, Entries) ->
|
||||||
|
Frames = [frame(term_codec:encode(E)) || E <- Entries],
|
||||||
|
file:write_file(Path, list_to_binary(Frames)).
|
||||||
|
|
||||||
|
%% frame/1 — prepend 4-byte big-endian length to Payload.
|
||||||
|
frame(Payload) when is_binary(Payload) ->
|
||||||
|
L = byte_size(Payload),
|
||||||
|
B3 = (L div 16777216) rem 256,
|
||||||
|
B2 = (L div 65536) rem 256,
|
||||||
|
B1 = (L div 256) rem 256,
|
||||||
|
B0 = L rem 256,
|
||||||
|
[B3, B2, B1, B0, Payload].
|
||||||
|
|
||||||
|
try_read_segment(Path) ->
|
||||||
|
case file:read_file(Path) of
|
||||||
|
{ok, Bin} ->
|
||||||
|
try {ok, decode_frames(binary_to_list(Bin), [])}
|
||||||
|
catch
|
||||||
|
throw:Reason -> {error, {corrupt, Reason}};
|
||||||
|
error:Reason -> {error, {corrupt, Reason}}
|
||||||
|
end;
|
||||||
|
{error, enoent} ->
|
||||||
|
{ok, []};
|
||||||
|
{error, R} ->
|
||||||
|
{error, {read, R}}
|
||||||
|
end.
|
||||||
|
|
||||||
|
decode_frames([], Acc) ->
|
||||||
|
lists:reverse(Acc);
|
||||||
|
decode_frames([B3, B2, B1, B0 | Rest], Acc) ->
|
||||||
|
Len = B3 * 16777216 + B2 * 65536 + B1 * 256 + B0,
|
||||||
|
{Payload, Rest2} = take_n(Len, Rest),
|
||||||
|
case term_codec:decode(list_to_binary(Payload)) of
|
||||||
|
{ok, Term, _} -> decode_frames(Rest2, [Term | Acc]);
|
||||||
|
{error, R} -> throw({decode, R})
|
||||||
|
end;
|
||||||
|
decode_frames(_, _) ->
|
||||||
|
throw(truncated_header).
|
||||||
|
|
||||||
|
take_n(0, R) -> {[], R};
|
||||||
|
take_n(N, [H | T]) ->
|
||||||
|
{Hs, Tl} = take_n(N - 1, T),
|
||||||
|
{[H | Hs], Tl};
|
||||||
|
take_n(_, []) ->
|
||||||
|
throw(truncated_body).
|
||||||
|
|
||||||
tip(LogState) ->
|
tip(LogState) ->
|
||||||
field(seq, LogState).
|
field(seq, LogState).
|
||||||
@@ -58,6 +161,12 @@ field(K, [{K, V} | _]) -> V;
|
|||||||
field(K, [_ | Rest]) -> field(K, Rest);
|
field(K, [_ | Rest]) -> field(K, Rest);
|
||||||
field(_, []) -> erlang:error(badkey).
|
field(_, []) -> erlang:error(badkey).
|
||||||
|
|
||||||
|
%% lookup/2 — like field but returns `undefined` for missing key
|
||||||
|
%% (used by persisted_path/1 which probes optional state fields).
|
||||||
|
lookup(K, [{K, V} | _]) -> V;
|
||||||
|
lookup(K, [_ | Rest]) -> lookup(K, Rest);
|
||||||
|
lookup(_, []) -> undefined.
|
||||||
|
|
||||||
replace_field(K, V, []) -> [{K, V}];
|
replace_field(K, V, []) -> [{K, V}];
|
||||||
replace_field(K, V, [{K, _} | Rest]) -> [{K, V} | Rest];
|
replace_field(K, V, [{K, _} | Rest]) -> [{K, V} | Rest];
|
||||||
replace_field(K, V, [P | Rest]) -> [P | replace_field(K, V, Rest)].
|
replace_field(K, V, [P | Rest]) -> [P | replace_field(K, V, Rest)].
|
||||||
|
|||||||
139
next/tests/log_disk.sh
Executable file
139
next/tests/log_disk.sh
Executable file
@@ -0,0 +1,139 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# next/tests/log_disk.sh — Step 3b on-disk log acceptance test.
|
||||||
|
#
|
||||||
|
# Exercises log:open_disk/2, append/2 (write-through), and the
|
||||||
|
# read-segment-on-reopen path. Uses next/kernel/term_codec.erl for
|
||||||
|
# the entry encoding and a 4-byte big-endian length prefix per frame.
|
||||||
|
|
||||||
|
set -uo pipefail
|
||||||
|
cd "$(git rev-parse --show-toplevel)"
|
||||||
|
|
||||||
|
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||||
|
if [ ! -x "$SX_SERVER" ]; then
|
||||||
|
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||||
|
fi
|
||||||
|
if [ ! -x "$SX_SERVER" ]; then
|
||||||
|
echo "ERROR: sx_server.exe not found." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Fixed tmp dir so we can refer to it as an Erlang binary literal.
|
||||||
|
DISK_BASE=/tmp/fed_sx_m1_log_disk
|
||||||
|
rm -rf "$DISK_BASE"
|
||||||
|
mkdir -p "$DISK_BASE"
|
||||||
|
|
||||||
|
# Pre-write a corrupted segment file for the corrupt-detect test
|
||||||
|
# (just a truncated 4-byte length header with no payload).
|
||||||
|
printf '\x00\x00\x00\x05XX' > "$DISK_BASE/corrupted.log"
|
||||||
|
|
||||||
|
VERBOSE="${1:-}"
|
||||||
|
PASS=0; FAIL=0; ERRORS=""
|
||||||
|
TMPFILE=$(mktemp); trap "rm -f $TMPFILE; rm -rf $DISK_BASE" EXIT
|
||||||
|
|
||||||
|
cat > "$TMPFILE" <<'EPOCHS'
|
||||||
|
(epoch 1)
|
||||||
|
(load "lib/erlang/tokenizer.sx")
|
||||||
|
(load "lib/erlang/parser.sx")
|
||||||
|
(load "lib/erlang/parser-core.sx")
|
||||||
|
(load "lib/erlang/parser-expr.sx")
|
||||||
|
(load "lib/erlang/parser-module.sx")
|
||||||
|
(load "lib/erlang/transpile.sx")
|
||||||
|
(load "lib/erlang/runtime.sx")
|
||||||
|
(load "lib/erlang/vm/dispatcher.sx")
|
||||||
|
|
||||||
|
(epoch 2)
|
||||||
|
(eval "(get (erlang-load-module (file-read \"next/kernel/term_codec.erl\")) :name)")
|
||||||
|
|
||||||
|
(epoch 3)
|
||||||
|
(eval "(get (erlang-load-module (file-read \"next/kernel/log.erl\")) :name)")
|
||||||
|
|
||||||
|
;; Base path: /tmp/fed_sx_m1_log_disk constructed as an Erlang binary
|
||||||
|
;; via list_to_binary of the char codes. (`<<"...">>` literals don't
|
||||||
|
;; carry through in this port — see Step 3b substrate fix #2.)
|
||||||
|
|
||||||
|
;; --- 3a in-memory open/2 still works unchanged ---
|
||||||
|
(epoch 10)
|
||||||
|
(eval "(get (erlang-eval-ast \"{ok, L} = log:open(alice, base), log:tip(L) =:= 0\") :name)")
|
||||||
|
|
||||||
|
;; --- open_disk on missing file returns empty fresh state ---
|
||||||
|
(epoch 20)
|
||||||
|
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L} = log:open_disk(alice, Base), log:tip(L) =:= 0\") :name)")
|
||||||
|
|
||||||
|
(epoch 21)
|
||||||
|
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L} = log:open_disk(alice, Base), log:entries(L) =:= []\") :name)")
|
||||||
|
|
||||||
|
;; --- append + re-open: entries match ---
|
||||||
|
(epoch 30)
|
||||||
|
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(bob, Base), {ok, L1, _} = log:append(L0, hello), {ok, L2, _} = log:append(L1, world), {ok, L3} = log:open_disk(bob, Base), log:entries(L3) =:= [hello, world]\") :name)")
|
||||||
|
|
||||||
|
;; --- tip resumes correctly across restart ---
|
||||||
|
(epoch 31)
|
||||||
|
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(carol, Base), {ok, L1, _} = log:append(L0, a), {ok, L2, _} = log:append(L1, b), {ok, L3, _} = log:append(L2, c), {ok, L4} = log:open_disk(carol, Base), log:tip(L4) =:= 3\") :name)")
|
||||||
|
|
||||||
|
;; --- replay/3 over re-opened state visits append order ---
|
||||||
|
(epoch 32)
|
||||||
|
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(dave, Base), {ok, L1, _} = log:append(L0, a), {ok, L2, _} = log:append(L1, b), {ok, L3, _} = log:append(L2, c), {ok, L4} = log:open_disk(dave, Base), log:replay(L4, [], fun (X, S, Acc) -> [{S, X} | Acc] end) =:= [{2,c},{1,b},{0,a}]\") :name)")
|
||||||
|
|
||||||
|
;; --- mixed types round-trip (atom, int, binary, tuple, list) ---
|
||||||
|
(epoch 33)
|
||||||
|
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(eve, Base), {ok, L1, _} = log:append(L0, foo), {ok, L2, _} = log:append(L1, 42), {ok, L3, _} = log:append(L2, <<1,2,3>>), {ok, L4, _} = log:append(L3, {pair, alice, bob}), {ok, L5, _} = log:append(L4, [1, two, <<3>>]), {ok, L6} = log:open_disk(eve, Base), log:entries(L6) =:= [foo, 42, <<1,2,3>>, {pair, alice, bob}, [1, two, <<3>>]]\") :name)")
|
||||||
|
|
||||||
|
;; --- continuing to append after reopen preserves chronology ---
|
||||||
|
(epoch 34)
|
||||||
|
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, L0} = log:open_disk(frank, Base), {ok, L1, _} = log:append(L0, a), {ok, L2} = log:open_disk(frank, Base), {ok, L3, S} = log:append(L2, b), {S, log:tip(L3)} =:= {1, 2}\") :name)")
|
||||||
|
|
||||||
|
;; --- corrupted segment returns {error, _} not crash ---
|
||||||
|
(epoch 40)
|
||||||
|
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), element(1, log:open_disk(corrupted, Base))\") :name)")
|
||||||
|
|
||||||
|
;; --- per-actor isolation: two disk-backed logs are independent ---
|
||||||
|
(epoch 41)
|
||||||
|
(eval "(get (erlang-eval-ast \"Base = list_to_binary([$/, $t, $m, $p, $/, $f, $e, $d, $_, $s, $x, $_, $m, $1, $_, $l, $o, $g, $_, $d, $i, $s, $k]), {ok, LA0} = log:open_disk(g1, Base), {ok, LB0} = log:open_disk(g2, Base), {ok, LA1, _} = log:append(LA0, x), {ok, LB1, _} = log:append(LB0, y1), {ok, LB2, _} = log:append(LB1, y2), {ok, LAr} = log:open_disk(g1, Base), {ok, LBr} = log:open_disk(g2, Base), {log:entries(LAr), log:entries(LBr)} =:= {[x], [y1, y2]}\") :name)")
|
||||||
|
EPOCHS
|
||||||
|
|
||||||
|
OUTPUT=$(timeout 90 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
|
||||||
|
|
||||||
|
check() {
|
||||||
|
local epoch="$1" desc="$2" expected="$3"
|
||||||
|
local actual
|
||||||
|
actual=$(echo "$OUTPUT" | grep -A1 "^(ok-len $epoch " | tail -1 || true)
|
||||||
|
if echo "$actual" | grep -q "^(ok-len"; then actual=""; fi
|
||||||
|
if [ -z "$actual" ]; then
|
||||||
|
actual=$(echo "$OUTPUT" | grep "^(ok $epoch " | head -1 || true)
|
||||||
|
fi
|
||||||
|
if [ -z "$actual" ]; then
|
||||||
|
actual=$(echo "$OUTPUT" | grep "^(error $epoch " | head -1 || true)
|
||||||
|
fi
|
||||||
|
[ -z "$actual" ] && actual="<no output for epoch $epoch>"
|
||||||
|
|
||||||
|
if echo "$actual" | grep -qF -- "$expected"; then
|
||||||
|
PASS=$((PASS+1))
|
||||||
|
[ "$VERBOSE" = "-v" ] && echo " ok $desc"
|
||||||
|
else
|
||||||
|
FAIL=$((FAIL+1))
|
||||||
|
ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual
|
||||||
|
"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
check 2 "term_codec loads" "term_codec"
|
||||||
|
check 3 "log module loads" "log"
|
||||||
|
check 10 "3a in-memory open/2 compat" "true"
|
||||||
|
check 20 "open_disk missing -> tip 0" "true"
|
||||||
|
check 21 "open_disk missing -> []" "true"
|
||||||
|
check 30 "append+reopen entries match" "true"
|
||||||
|
check 31 "tip resumes after restart" "true"
|
||||||
|
check 32 "replay chronological" "true"
|
||||||
|
check 33 "mixed types round-trip" "true"
|
||||||
|
check 34 "append after reopen" "true"
|
||||||
|
check 40 "corrupted segment -> error" "error"
|
||||||
|
check 41 "per-actor isolation" "true"
|
||||||
|
|
||||||
|
TOTAL=$((PASS+FAIL))
|
||||||
|
if [ $FAIL -eq 0 ]; then
|
||||||
|
echo "ok $PASS/$TOTAL log_disk tests passed"
|
||||||
|
else
|
||||||
|
echo "FAIL $PASS/$TOTAL passed, $FAIL failed:"
|
||||||
|
echo "$ERRORS"
|
||||||
|
fi
|
||||||
|
[ $FAIL -eq 0 ]
|
||||||
@@ -197,7 +197,7 @@ verify_signature(Activity, ActorState) ->
|
|||||||
|
|
||||||
**Sub-deliverables:**
|
**Sub-deliverables:**
|
||||||
- [x] **3a** — `log:open/2` + `log:append/2` + `log:tip/1` + `log:replay/3` + `log:entries/1` over an in-memory log state (per-actor seq; replay in append order; round-trip the stored activity). `next/tests/log_memory.sh` (12 cases).
|
- [x] **3a** — `log:open/2` + `log:append/2` + `log:tip/1` + `log:replay/3` + `log:entries/1` over an in-memory log state (per-actor seq; replay in append order; round-trip the stored activity). `next/tests/log_memory.sh` (12 cases).
|
||||||
- [~] **3b** — Term codec landed (`next/kernel/term_codec.erl`): `encode/1`/`decode/1` for atoms, integers, binaries, tuples, lists, nesting; netstring-ish framing (`a/i/b/t/l` tag + length + body); byte-clean (binary bodies may contain NUL/LF). 18 round-trip + streaming + bad-form tests in `next/tests/term_codec.sh`. On-disk segment writer (open/2 reads existing, append/2 writes-through, replay/3 reads from disk) is the next sub-step — codec is the load-bearing piece.
|
- [x] **3b** — Term codec + on-disk persistence. Codec: `next/kernel/term_codec.erl` `encode/1` + `decode/1` over netstring framing (`a/i/b/t/l` + length + body; binary bodies byte-clean — NUL/LF allowed). On-disk: `log:open_disk/2(ActorId, BasePath)` reads any existing segment file (charlist path = `BasePath ++ "/" ++ atom_to_list(ActorId) ++ ".log"`); `append/2` is polymorphic on a `{persisted, true}` state field and writes through. Frame format on disk: 4-byte big-endian length prefix + `term_codec:encode(Activity)`. `try_read_segment` catches throw/error and surfaces `{error, {corrupt, Reason}}`. 18 codec round-trips + 12 disk acceptance tests (`next/tests/term_codec.sh`, `next/tests/log_disk.sh`); 3a in-memory `open/2` semantics unchanged. `encode/1`/`decode/1` for atoms, integers, binaries, tuples, lists, nesting; netstring-ish framing (`a/i/b/t/l` tag + length + body); byte-clean (binary bodies may contain NUL/LF). 18 round-trip + streaming + bad-form tests in `next/tests/term_codec.sh`. On-disk segment writer (open/2 reads existing, append/2 writes-through, replay/3 reads from disk) is the next sub-step — codec is the load-bearing piece.
|
||||||
- [ ] **3c** — Segment rotation at size threshold + gen_server-mediated concurrent appends.
|
- [ ] **3c** — Segment rotation at size threshold + gen_server-mediated concurrent appends.
|
||||||
|
|
||||||
**Blockers (Step 3b) — byte-level path resolved 2026-06-04:** `binary_to_list/1` and `list_to_binary/1` are now registered Erlang BIFs in `lib/erlang/runtime.sx` (Step 3b substrate fix, +9 ffi tests, 738/738 conformance). `list_to_binary` is iolist-aware: accepts nested cons of integer bytes (0-255) and/or binaries; `binary_to_list` returns a proper Erlang charlist of integers. Round-trip verified: `list_to_binary(binary_to_list(B)) =:= B`. On-disk segment writer (3b) can now build segment bytes from `[Header, IoListPayload]` and reconstruct on read — option (c) of the original workaround menu is now cheap. `$X` char literals now decode correctly **as of 2026-06-04**: the Erlang tokenizer's `(= ch "$")` branch (`lib/erlang/tokenizer.sx`) now emits the decimal char code as the token value instead of the raw `$X` text (which `parse-number` couldn't decode → nil). Plain chars use `char->integer` of the first char; the standard escape table (`\n=10 \t=9 \r=13 \s=32 \b=8 \e=27 \f=12 \v=11 \d=127 \0=0 \\=92 \"=34 \'=39`) handles `$\X` forms. So `[$h, $i | T]` patterns and `list_to_binary([$f,$e,$d])` both work end-to-end. +12 eval tests, 750/750. Combined with 3b's `binary_to_list`/`list_to_binary`, Erlang code can now read/write byte sequences and string-shaped char lists fluently. **All three substrate gaps resolved as of 2026-06-05.** `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons of int char codes — standard Erlang semantics) via a new `er-string->charlist` helper in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer). Composition works end-to-end: `list_to_binary(atom_to_list(hello)) =:= <<104,101,108,108,111>>` and `integer_to_list(N)` round-trips through `list_to_integer`. 5 existing eval tests rewritten to charlist semantics, 8 new charlist-aware tests added (759/759). The full term-codec primitive set — `binary_to_list`, `list_to_binary`, `$X`, `atom_to_list`, `integer_to_list` charlist semantics, plus existing `file:read_file`/`write_file`/`list_dir` — is now in place.
|
**Blockers (Step 3b) — byte-level path resolved 2026-06-04:** `binary_to_list/1` and `list_to_binary/1` are now registered Erlang BIFs in `lib/erlang/runtime.sx` (Step 3b substrate fix, +9 ffi tests, 738/738 conformance). `list_to_binary` is iolist-aware: accepts nested cons of integer bytes (0-255) and/or binaries; `binary_to_list` returns a proper Erlang charlist of integers. Round-trip verified: `list_to_binary(binary_to_list(B)) =:= B`. On-disk segment writer (3b) can now build segment bytes from `[Header, IoListPayload]` and reconstruct on read — option (c) of the original workaround menu is now cheap. `$X` char literals now decode correctly **as of 2026-06-04**: the Erlang tokenizer's `(= ch "$")` branch (`lib/erlang/tokenizer.sx`) now emits the decimal char code as the token value instead of the raw `$X` text (which `parse-number` couldn't decode → nil). Plain chars use `char->integer` of the first char; the standard escape table (`\n=10 \t=9 \r=13 \s=32 \b=8 \e=27 \f=12 \v=11 \d=127 \0=0 \\=92 \"=34 \'=39`) handles `$\X` forms. So `[$h, $i | T]` patterns and `list_to_binary([$f,$e,$d])` both work end-to-end. +12 eval tests, 750/750. Combined with 3b's `binary_to_list`/`list_to_binary`, Erlang code can now read/write byte sequences and string-shaped char lists fluently. **All three substrate gaps resolved as of 2026-06-05.** `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons of int char codes — standard Erlang semantics) via a new `er-string->charlist` helper in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer). Composition works end-to-end: `list_to_binary(atom_to_list(hello)) =:= <<104,101,108,108,111>>` and `integer_to_list(N)` round-trips through `list_to_integer`. 5 existing eval tests rewritten to charlist semantics, 8 new charlist-aware tests added (759/759). The full term-codec primitive set — `binary_to_list`, `list_to_binary`, `$X`, `atom_to_list`, `integer_to_list` charlist semantics, plus existing `file:read_file`/`write_file`/`list_dir` — is now in place.
|
||||||
@@ -1003,6 +1003,7 @@ A few things still under-specified; resolve as work begins.
|
|||||||
Newest first. One line per sub-deliverable commit. Erlang conformance gate
|
Newest first. One line per sub-deliverable commit. Erlang conformance gate
|
||||||
(`bash lib/erlang/conformance.sh`) must remain 729/729 on every entry.
|
(`bash lib/erlang/conformance.sh`) must remain 729/729 on every entry.
|
||||||
|
|
||||||
|
- **2026-06-05** — Step 3b on-disk log: `next/kernel/log.erl` gains `open_disk/2(ActorId, BasePath)` and a write-through `append/2`. New state field `{persisted, true} | {path, CharList}` keys the polymorphism — 3a's in-memory `open/2` stays untouched and tests unchanged. `segment_path/2` builds the path as a charlist (`base_chars(BasePath) ++ "/" ++ atom_to_list(ActorId) ++ ".log"`) so it works whether the caller passes a binary or charlist BasePath; everything flows through `er-source-to-string` cleanly. On-disk frame format: 4-byte big-endian length prefix + `term_codec:encode(Activity)`. Restart path: `try_read_segment` reads the whole segment, length-decodes each frame, decodes via `term_codec`, returns `{ok, Entries}`; missing file → `{ok, []}`; throw/error during decode → `{error, {corrupt, _}}`. `next/tests/log_disk.sh` 12/12: open-missing-fresh, append+reopen-entries-match, tip-resumes, replay-chronological, mixed-types (atom/int/binary/tuple/list) round-trip, append-after-reopen, corrupted-segment, per-actor isolation, 3a back-compat. Erlang conformance **761/761** unchanged (log.erl is in next/, not lib/erlang/). Step 3b is now FULLY ticked; 3c (segment rotation + gen_server-mediated concurrent appends) remains for the next iteration.
|
||||||
- **2026-06-05** — Step 3b substrate fix #4: integer-literal eval now produces real ints (was floats). `transpile.sx`'s `(= ty "integer") (parse-number ...)` path returns `float_of_string` per host's `parse-number`, so `42`, `$X`, etc. were floats that `(integer? v)` returned true for but `(integer->char v)` rejected. Wrapped in `truncate` so all integer literals coerce to strict int; added nil-guard with a descriptive error. Discovered while debugging Step 3b on-disk log (file:read_file on a charlist path failed at the inner `(map integer->char ...)` because charlist elements were floats). Conformance **761/761** (eval 406→408, +2 net; no other suites changed). Unblocks any path that does `integer->char` on int-literal-derived values — most notably `file:read_file` / `file:write_file` on charlist paths and binaries built from `$X` literals.
|
- **2026-06-05** — Step 3b substrate fix #4: integer-literal eval now produces real ints (was floats). `transpile.sx`'s `(= ty "integer") (parse-number ...)` path returns `float_of_string` per host's `parse-number`, so `42`, `$X`, etc. were floats that `(integer? v)` returned true for but `(integer->char v)` rejected. Wrapped in `truncate` so all integer literals coerce to strict int; added nil-guard with a descriptive error. Discovered while debugging Step 3b on-disk log (file:read_file on a charlist path failed at the inner `(map integer->char ...)` because charlist elements were floats). Conformance **761/761** (eval 406→408, +2 net; no other suites changed). Unblocks any path that does `integer->char` on int-literal-derived values — most notably `file:read_file` / `file:write_file` on charlist paths and binaries built from `$X` literals.
|
||||||
- **2026-06-05** — Step 3b codec landed: `next/kernel/term_codec.erl` with `encode/1` + `decode/1` over a netstring-ish wire format (`a` atom / `i` int / `b` binary / `t` tuple / `l` list, each as `tag + decimal-length + ":" + body`; nil = `l0:`). Byte-clean — binary bodies may contain NUL, LF, or any byte; encoding stays parseable. Built end-to-end on the three substrate fixes (binary_to_list/list_to_binary + $X + atom_to_list/integer_to_list charlists). `decode/1` returns `{ok, Term, RestBinary}` so callers can stream multiple frames from one buffer. 18 acceptance tests in `next/tests/term_codec.sh`: encode bytes for every leaf type, round-trip for each, nested activity-shaped term (`{create, [{id,1},{actor,alice},{payload,<<104,105>>}]}`), 2-frame streaming, binary with embedded NUL+LF, bad-form returns `{error, badform}` not crash. Erlang conformance **759/759** unchanged (codec is in `next/`, not lib/erlang/). Step 3b on-disk segment writer (the second half — open/append/replay reading/writing the actual segment file) is the natural next iteration: encode each activity with `term_codec`, frame with a 4-byte big-endian length prefix, append to disk.
|
- **2026-06-05** — Step 3b codec landed: `next/kernel/term_codec.erl` with `encode/1` + `decode/1` over a netstring-ish wire format (`a` atom / `i` int / `b` binary / `t` tuple / `l` list, each as `tag + decimal-length + ":" + body`; nil = `l0:`). Byte-clean — binary bodies may contain NUL, LF, or any byte; encoding stays parseable. Built end-to-end on the three substrate fixes (binary_to_list/list_to_binary + $X + atom_to_list/integer_to_list charlists). `decode/1` returns `{ok, Term, RestBinary}` so callers can stream multiple frames from one buffer. 18 acceptance tests in `next/tests/term_codec.sh`: encode bytes for every leaf type, round-trip for each, nested activity-shaped term (`{create, [{id,1},{actor,alice},{payload,<<104,105>>}]}`), 2-frame streaming, binary with embedded NUL+LF, bad-form returns `{error, badform}` not crash. Erlang conformance **759/759** unchanged (codec is in `next/`, not lib/erlang/). Step 3b on-disk segment writer (the second half — open/append/replay reading/writing the actual segment file) is the natural next iteration: encode each activity with `term_codec`, frame with a 4-byte big-endian length prefix, append to disk.
|
||||||
- **2026-06-05** — Step 3b substrate fix #3 (final): `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons-of-int-char-codes) instead of SX strings — standard Erlang semantics. New helper `er-string->charlist` in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer, which already handles both shapes). 5 existing eval tests rewritten to match new semantics (e.g. `length(atom_to_list(hello)) =:= 5`, `hd(integer_to_list(42)) =:= 52`). 8 new charlist-coverage tests demonstrating composition: `list_to_binary(atom_to_list(ok)) =:= <<111,107>>`; `list_to_atom([$f,$o,$o])` round-trips; `list_to_integer([$1,$0,$0]) =:= 100`. Erlang conformance **759/759** (eval 397→406, +9 net). The full term-codec primitive set — `binary_to_list`/`list_to_binary` (24e3bf53), `$X` literals (3d80bd8c), and now `atom_to_list`/`integer_to_list` charlists — is in place; Step 3b on-disk segment writer can encode arbitrary Erlang activity terms (atoms, ints, binaries, tuples, lists) into byte sequences using only Erlang-native primitives.
|
- **2026-06-05** — Step 3b substrate fix #3 (final): `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons-of-int-char-codes) instead of SX strings — standard Erlang semantics. New helper `er-string->charlist` in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer, which already handles both shapes). 5 existing eval tests rewritten to match new semantics (e.g. `length(atom_to_list(hello)) =:= 5`, `hd(integer_to_list(42)) =:= 52`). 8 new charlist-coverage tests demonstrating composition: `list_to_binary(atom_to_list(ok)) =:= <<111,107>>`; `list_to_atom([$f,$o,$o])` round-trips; `list_to_integer([$1,$0,$0]) =:= 100`. Erlang conformance **759/759** (eval 397→406, +9 net). The full term-codec primitive set — `binary_to_list`/`list_to_binary` (24e3bf53), `$X` literals (3d80bd8c), and now `atom_to_list`/`integer_to_list` charlists — is in place; Step 3b on-disk segment writer can encode arbitrary Erlang activity terms (atoms, ints, binaries, tuples, lists) into byte sequences using only Erlang-native primitives.
|
||||||
|
|||||||
Reference in New Issue
Block a user