fed-sx-m1: Step 3b codec — next/kernel/term_codec.erl encode/decode + 18 round-trip tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
This commit is contained in:
105
next/kernel/term_codec.erl
Normal file
105
next/kernel/term_codec.erl
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
-module(term_codec).
|
||||||
|
-export([encode/1, decode/1]).
|
||||||
|
|
||||||
|
%% Erlang-side term <-> binary codec, built on the substrate fixes from
|
||||||
|
%% commits 24e3bf53 (binary_to_list / list_to_binary), 3d80bd8c ($X char
|
||||||
|
%% literals), 4852cca9 (atom_to_list / integer_to_list charlists).
|
||||||
|
%%
|
||||||
|
%% Wire format (netstring-ish; all length headers ASCII decimal):
|
||||||
|
%%
|
||||||
|
%% atom $a Len $: NameBytes
|
||||||
|
%% integer $i Len $: DecimalBytes (negative ints carry leading $-)
|
||||||
|
%% binary $b Len $: RawBytes
|
||||||
|
%% tuple $t Count $: Enc1 Enc2 ... Encn
|
||||||
|
%% list $l Count $: Enc1 Enc2 ... Encn (proper list)
|
||||||
|
%% nil $l $0 $: (empty list)
|
||||||
|
%%
|
||||||
|
%% Each Enc is itself one of these forms — recursive. The format is
|
||||||
|
%% byte-clean: binary bodies may contain any byte (newlines, NULs, etc.),
|
||||||
|
%% so callers can frame entries with a 4-byte big-endian length prefix
|
||||||
|
%% (Step 3b on-disk segment writer's job).
|
||||||
|
|
||||||
|
%% encode/1: term -> binary
|
||||||
|
encode(T) when is_atom(T) ->
|
||||||
|
Cs = atom_to_list(T),
|
||||||
|
list_to_binary([$a, integer_to_list(length(Cs)), $:, Cs]);
|
||||||
|
encode(T) when is_integer(T) ->
|
||||||
|
Cs = integer_to_list(T),
|
||||||
|
list_to_binary([$i, integer_to_list(length(Cs)), $:, Cs]);
|
||||||
|
encode(T) when is_binary(T) ->
|
||||||
|
list_to_binary([$b, integer_to_list(byte_size(T)), $:, T]);
|
||||||
|
encode(T) when is_tuple(T) ->
|
||||||
|
L = tuple_to_list(T),
|
||||||
|
list_to_binary([$t, integer_to_list(length(L)), $:,
|
||||||
|
[encode(E) || E <- L]]);
|
||||||
|
encode([]) ->
|
||||||
|
list_to_binary([$l, $0, $:]);
|
||||||
|
encode(T) when is_list(T) ->
|
||||||
|
list_to_binary([$l, integer_to_list(length(T)), $:,
|
||||||
|
[encode(E) || E <- T]]).
|
||||||
|
|
||||||
|
%% decode/1: binary -> {ok, Term, RestBinary} | {error, badform}
|
||||||
|
%% On success returns the remaining unconsumed bytes so callers can
|
||||||
|
%% stream-decode multiple frames from one buffer.
|
||||||
|
decode(B) when is_binary(B) ->
|
||||||
|
decode_chars(binary_to_list(B)).
|
||||||
|
|
||||||
|
decode_chars([$a | Rest]) ->
|
||||||
|
{Len, Rest1} = read_len(Rest, 0),
|
||||||
|
Rest2 = strip_colon(Rest1),
|
||||||
|
{NameChars, Rest3} = split_at(Len, Rest2),
|
||||||
|
{ok, list_to_atom(NameChars), list_to_binary(Rest3)};
|
||||||
|
decode_chars([$i | Rest]) ->
|
||||||
|
{Len, Rest1} = read_len(Rest, 0),
|
||||||
|
Rest2 = strip_colon(Rest1),
|
||||||
|
{NumChars, Rest3} = split_at(Len, Rest2),
|
||||||
|
{ok, list_to_integer(NumChars), list_to_binary(Rest3)};
|
||||||
|
decode_chars([$b | Rest]) ->
|
||||||
|
{Len, Rest1} = read_len(Rest, 0),
|
||||||
|
Rest2 = strip_colon(Rest1),
|
||||||
|
{Bytes, Rest3} = split_at(Len, Rest2),
|
||||||
|
{ok, list_to_binary(Bytes), list_to_binary(Rest3)};
|
||||||
|
decode_chars([$t | Rest]) ->
|
||||||
|
{N, Rest1} = read_len(Rest, 0),
|
||||||
|
Rest2 = strip_colon(Rest1),
|
||||||
|
{Elems, Rest3} = decode_n(N, Rest2, []),
|
||||||
|
{ok, list_to_tuple(Elems), list_to_binary(Rest3)};
|
||||||
|
decode_chars([$l | Rest]) ->
|
||||||
|
{N, Rest1} = read_len(Rest, 0),
|
||||||
|
Rest2 = strip_colon(Rest1),
|
||||||
|
{Elems, Rest3} = decode_n(N, Rest2, []),
|
||||||
|
{ok, Elems, list_to_binary(Rest3)};
|
||||||
|
decode_chars(_) ->
|
||||||
|
{error, badform}.
|
||||||
|
|
||||||
|
read_len([C | Rest], Acc) when C >= $0, C =< $9 ->
|
||||||
|
read_len(Rest, Acc * 10 + C - $0);
|
||||||
|
read_len([$- | Rest], 0) ->
|
||||||
|
%% Leading minus for negative integer-body lengths is invalid for
|
||||||
|
%% lengths, but appears inside integer-body bytes (handled in
|
||||||
|
%% the body, not here — read_len only consumes digits before $:).
|
||||||
|
{0, [$- | Rest]};
|
||||||
|
read_len(Rest, Acc) ->
|
||||||
|
{Acc, Rest}.
|
||||||
|
|
||||||
|
strip_colon([$: | Rest]) -> Rest;
|
||||||
|
strip_colon(Other) -> erlang:error({badform, Other}).
|
||||||
|
|
||||||
|
split_at(0, Rest) -> {[], Rest};
|
||||||
|
split_at(N, [H | T]) ->
|
||||||
|
{Hs, Tl} = split_at(N - 1, T),
|
||||||
|
{[H | Hs], Tl};
|
||||||
|
split_at(_, []) ->
|
||||||
|
erlang:error({badform, short}).
|
||||||
|
|
||||||
|
decode_n(0, Rest, Acc) ->
|
||||||
|
{lists:reverse(Acc), Rest};
|
||||||
|
decode_n(N, Bytes, Acc) ->
|
||||||
|
{Term, Rest} = decode_one(Bytes),
|
||||||
|
decode_n(N - 1, Rest, [Term | Acc]).
|
||||||
|
|
||||||
|
decode_one(Bytes) ->
|
||||||
|
case decode_chars(Bytes) of
|
||||||
|
{ok, Term, RestBin} -> {Term, binary_to_list(RestBin)};
|
||||||
|
{error, R} -> erlang:error({badform, R})
|
||||||
|
end.
|
||||||
160
next/tests/term_codec.sh
Executable file
160
next/tests/term_codec.sh
Executable file
@@ -0,0 +1,160 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# next/tests/term_codec.sh — Step 3b term codec acceptance test.
|
||||||
|
#
|
||||||
|
# Exercises encode/1 + decode/1 for atoms, integers, binaries, tuples,
|
||||||
|
# lists, nesting, and round-trip equivalence. Built on the substrate-fix
|
||||||
|
# trio: binary_to_list/list_to_binary (24e3bf53), $X literals (3d80bd8c),
|
||||||
|
# atom_to_list/integer_to_list charlists (4852cca9).
|
||||||
|
|
||||||
|
set -uo pipefail
|
||||||
|
cd "$(git rev-parse --show-toplevel)"
|
||||||
|
|
||||||
|
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||||
|
if [ ! -x "$SX_SERVER" ]; then
|
||||||
|
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||||
|
fi
|
||||||
|
if [ ! -x "$SX_SERVER" ]; then
|
||||||
|
echo "ERROR: sx_server.exe not found." >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
VERBOSE="${1:-}"
|
||||||
|
PASS=0; FAIL=0; ERRORS=""
|
||||||
|
TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT
|
||||||
|
|
||||||
|
cat > "$TMPFILE" <<'EPOCHS'
|
||||||
|
(epoch 1)
|
||||||
|
(load "lib/erlang/tokenizer.sx")
|
||||||
|
(load "lib/erlang/parser.sx")
|
||||||
|
(load "lib/erlang/parser-core.sx")
|
||||||
|
(load "lib/erlang/parser-expr.sx")
|
||||||
|
(load "lib/erlang/parser-module.sx")
|
||||||
|
(load "lib/erlang/transpile.sx")
|
||||||
|
(load "lib/erlang/runtime.sx")
|
||||||
|
(load "lib/erlang/vm/dispatcher.sx")
|
||||||
|
|
||||||
|
(epoch 2)
|
||||||
|
(eval "(get (erlang-load-module (file-read \"next/kernel/term_codec.erl\")) :name)")
|
||||||
|
|
||||||
|
;; --- encode produces correct headers ---
|
||||||
|
|
||||||
|
;; atom 'ok' -> bytes "a2:ok"
|
||||||
|
(epoch 10)
|
||||||
|
(eval "(get (erlang-eval-ast \"term_codec:encode(ok) =:= <<97, 50, 58, 111, 107>>\") :name)")
|
||||||
|
|
||||||
|
;; integer 42 -> "i2:42"
|
||||||
|
(epoch 11)
|
||||||
|
(eval "(get (erlang-eval-ast \"term_codec:encode(42) =:= <<105, 50, 58, 52, 50>>\") :name)")
|
||||||
|
|
||||||
|
;; negative integer -99 -> "i3:-99"
|
||||||
|
(epoch 12)
|
||||||
|
(eval "(get (erlang-eval-ast \"term_codec:encode(-99) =:= <<105, 51, 58, 45, 57, 57>>\") :name)")
|
||||||
|
|
||||||
|
;; binary <<1,2,3>> -> "b3:" + 1,2,3
|
||||||
|
(epoch 13)
|
||||||
|
(eval "(get (erlang-eval-ast \"term_codec:encode(<<1, 2, 3>>) =:= <<98, 51, 58, 1, 2, 3>>\") :name)")
|
||||||
|
|
||||||
|
;; empty list -> "l0:"
|
||||||
|
(epoch 14)
|
||||||
|
(eval "(get (erlang-eval-ast \"term_codec:encode([]) =:= <<108, 48, 58>>\") :name)")
|
||||||
|
|
||||||
|
;; tuple {a, b} -> "t2:" + enc(a) + enc(b) = "t2:a1:aa1:b"
|
||||||
|
(epoch 15)
|
||||||
|
(eval "(get (erlang-eval-ast \"term_codec:encode({a, b}) =:= <<116, 50, 58, 97, 49, 58, 97, 97, 49, 58, 98>>\") :name)")
|
||||||
|
|
||||||
|
;; --- round-trip: encode then decode returns original term ---
|
||||||
|
|
||||||
|
(epoch 20)
|
||||||
|
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode(ok)), T =:= ok\") :name)")
|
||||||
|
|
||||||
|
(epoch 21)
|
||||||
|
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode(42)), T =:= 42\") :name)")
|
||||||
|
|
||||||
|
(epoch 22)
|
||||||
|
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode(-99)), T =:= -99\") :name)")
|
||||||
|
|
||||||
|
(epoch 23)
|
||||||
|
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode(<<1, 2, 3, 4, 5>>)), T =:= <<1, 2, 3, 4, 5>>\") :name)")
|
||||||
|
|
||||||
|
(epoch 24)
|
||||||
|
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode([])), T =:= []\") :name)")
|
||||||
|
|
||||||
|
(epoch 25)
|
||||||
|
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode({a, b, c})), T =:= {a, b, c}\") :name)")
|
||||||
|
|
||||||
|
(epoch 26)
|
||||||
|
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode([1, 2, 3])), T =:= [1, 2, 3]\") :name)")
|
||||||
|
|
||||||
|
;; --- nested: activity-shaped term (atoms, ints, binaries, nested tuple+list) ---
|
||||||
|
|
||||||
|
(epoch 30)
|
||||||
|
(eval "(get (erlang-eval-ast \"Act = {create, [{id, 1}, {actor, alice}, {payload, <<104, 105>>}]}, {ok, T, _} = term_codec:decode(term_codec:encode(Act)), T =:= Act\") :name)")
|
||||||
|
|
||||||
|
;; --- decode returns remainder so multiple frames can be streamed ---
|
||||||
|
|
||||||
|
(epoch 31)
|
||||||
|
(eval "(get (erlang-eval-ast \"E1 = term_codec:encode(foo), E2 = term_codec:encode(42), Both = list_to_binary([E1, E2]), {ok, T1, Rest} = term_codec:decode(Both), {ok, T2, _} = term_codec:decode(Rest), {T1, T2} =:= {foo, 42}\") :name)")
|
||||||
|
|
||||||
|
;; --- binary content with embedded zero / newline bytes round-trips ---
|
||||||
|
|
||||||
|
(epoch 32)
|
||||||
|
(eval "(get (erlang-eval-ast \"B = <<0, 10, 0, 10, 0>>, {ok, T, _} = term_codec:decode(term_codec:encode(B)), T =:= B\") :name)")
|
||||||
|
|
||||||
|
;; --- bad form returns {error, _} not a crash ---
|
||||||
|
|
||||||
|
(epoch 40)
|
||||||
|
(eval "(get (erlang-eval-ast \"element(1, term_codec:decode(<<122, 122, 122>>))\") :name)")
|
||||||
|
EPOCHS
|
||||||
|
|
||||||
|
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
|
||||||
|
|
||||||
|
check() {
|
||||||
|
local epoch="$1" desc="$2" expected="$3"
|
||||||
|
local actual
|
||||||
|
actual=$(echo "$OUTPUT" | grep -A1 "^(ok-len $epoch " | tail -1 || true)
|
||||||
|
if echo "$actual" | grep -q "^(ok-len"; then actual=""; fi
|
||||||
|
if [ -z "$actual" ]; then
|
||||||
|
actual=$(echo "$OUTPUT" | grep "^(ok $epoch " | head -1 || true)
|
||||||
|
fi
|
||||||
|
if [ -z "$actual" ]; then
|
||||||
|
actual=$(echo "$OUTPUT" | grep "^(error $epoch " | head -1 || true)
|
||||||
|
fi
|
||||||
|
[ -z "$actual" ] && actual="<no output for epoch $epoch>"
|
||||||
|
|
||||||
|
if echo "$actual" | grep -qF -- "$expected"; then
|
||||||
|
PASS=$((PASS+1))
|
||||||
|
[ "$VERBOSE" = "-v" ] && echo " ok $desc"
|
||||||
|
else
|
||||||
|
FAIL=$((FAIL+1))
|
||||||
|
ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual
|
||||||
|
"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
check 2 "module loads" "term_codec"
|
||||||
|
check 10 "encode atom" "true"
|
||||||
|
check 11 "encode int" "true"
|
||||||
|
check 12 "encode neg int" "true"
|
||||||
|
check 13 "encode binary" "true"
|
||||||
|
check 14 "encode []" "true"
|
||||||
|
check 15 "encode tuple" "true"
|
||||||
|
check 20 "round-trip atom" "true"
|
||||||
|
check 21 "round-trip int" "true"
|
||||||
|
check 22 "round-trip neg int" "true"
|
||||||
|
check 23 "round-trip binary" "true"
|
||||||
|
check 24 "round-trip []" "true"
|
||||||
|
check 25 "round-trip tuple" "true"
|
||||||
|
check 26 "round-trip list" "true"
|
||||||
|
check 30 "round-trip nested activity" "true"
|
||||||
|
check 31 "streaming two frames" "true"
|
||||||
|
check 32 "binary w/ embedded NUL+LF" "true"
|
||||||
|
check 40 "bad form -> error tag" "error"
|
||||||
|
|
||||||
|
TOTAL=$((PASS+FAIL))
|
||||||
|
if [ $FAIL -eq 0 ]; then
|
||||||
|
echo "ok $PASS/$TOTAL term_codec tests passed"
|
||||||
|
else
|
||||||
|
echo "FAIL $PASS/$TOTAL passed, $FAIL failed:"
|
||||||
|
echo "$ERRORS"
|
||||||
|
fi
|
||||||
|
[ $FAIL -eq 0 ]
|
||||||
@@ -197,7 +197,7 @@ verify_signature(Activity, ActorState) ->
|
|||||||
|
|
||||||
**Sub-deliverables:**
|
**Sub-deliverables:**
|
||||||
- [x] **3a** — `log:open/2` + `log:append/2` + `log:tip/1` + `log:replay/3` + `log:entries/1` over an in-memory log state (per-actor seq; replay in append order; round-trip the stored activity). `next/tests/log_memory.sh` (12 cases).
|
- [x] **3a** — `log:open/2` + `log:append/2` + `log:tip/1` + `log:replay/3` + `log:entries/1` over an in-memory log state (per-actor seq; replay in append order; round-trip the stored activity). `next/tests/log_memory.sh` (12 cases).
|
||||||
- [ ] **3b** — *Parked behind substrate gap (see Blockers below).* Term codec + on-disk persistence: serializer/parser writing each activity as a JSONL-style line; restart-resumes-tip from the segment file.
|
- [~] **3b** — Term codec landed (`next/kernel/term_codec.erl`): `encode/1`/`decode/1` for atoms, integers, binaries, tuples, lists, nesting; netstring-ish framing (`a/i/b/t/l` tag + length + body); byte-clean (binary bodies may contain NUL/LF). 18 round-trip + streaming + bad-form tests in `next/tests/term_codec.sh`. On-disk segment writer (open/2 reads existing, append/2 writes-through, replay/3 reads from disk) is the next sub-step — codec is the load-bearing piece.
|
||||||
- [ ] **3c** — Segment rotation at size threshold + gen_server-mediated concurrent appends.
|
- [ ] **3c** — Segment rotation at size threshold + gen_server-mediated concurrent appends.
|
||||||
|
|
||||||
**Blockers (Step 3b) — byte-level path resolved 2026-06-04:** `binary_to_list/1` and `list_to_binary/1` are now registered Erlang BIFs in `lib/erlang/runtime.sx` (Step 3b substrate fix, +9 ffi tests, 738/738 conformance). `list_to_binary` is iolist-aware: accepts nested cons of integer bytes (0-255) and/or binaries; `binary_to_list` returns a proper Erlang charlist of integers. Round-trip verified: `list_to_binary(binary_to_list(B)) =:= B`. On-disk segment writer (3b) can now build segment bytes from `[Header, IoListPayload]` and reconstruct on read — option (c) of the original workaround menu is now cheap. `$X` char literals now decode correctly **as of 2026-06-04**: the Erlang tokenizer's `(= ch "$")` branch (`lib/erlang/tokenizer.sx`) now emits the decimal char code as the token value instead of the raw `$X` text (which `parse-number` couldn't decode → nil). Plain chars use `char->integer` of the first char; the standard escape table (`\n=10 \t=9 \r=13 \s=32 \b=8 \e=27 \f=12 \v=11 \d=127 \0=0 \\=92 \"=34 \'=39`) handles `$\X` forms. So `[$h, $i | T]` patterns and `list_to_binary([$f,$e,$d])` both work end-to-end. +12 eval tests, 750/750. Combined with 3b's `binary_to_list`/`list_to_binary`, Erlang code can now read/write byte sequences and string-shaped char lists fluently. **All three substrate gaps resolved as of 2026-06-05.** `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons of int char codes — standard Erlang semantics) via a new `er-string->charlist` helper in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer). Composition works end-to-end: `list_to_binary(atom_to_list(hello)) =:= <<104,101,108,108,111>>` and `integer_to_list(N)` round-trips through `list_to_integer`. 5 existing eval tests rewritten to charlist semantics, 8 new charlist-aware tests added (759/759). The full term-codec primitive set — `binary_to_list`, `list_to_binary`, `$X`, `atom_to_list`, `integer_to_list` charlist semantics, plus existing `file:read_file`/`write_file`/`list_dir` — is now in place.
|
**Blockers (Step 3b) — byte-level path resolved 2026-06-04:** `binary_to_list/1` and `list_to_binary/1` are now registered Erlang BIFs in `lib/erlang/runtime.sx` (Step 3b substrate fix, +9 ffi tests, 738/738 conformance). `list_to_binary` is iolist-aware: accepts nested cons of integer bytes (0-255) and/or binaries; `binary_to_list` returns a proper Erlang charlist of integers. Round-trip verified: `list_to_binary(binary_to_list(B)) =:= B`. On-disk segment writer (3b) can now build segment bytes from `[Header, IoListPayload]` and reconstruct on read — option (c) of the original workaround menu is now cheap. `$X` char literals now decode correctly **as of 2026-06-04**: the Erlang tokenizer's `(= ch "$")` branch (`lib/erlang/tokenizer.sx`) now emits the decimal char code as the token value instead of the raw `$X` text (which `parse-number` couldn't decode → nil). Plain chars use `char->integer` of the first char; the standard escape table (`\n=10 \t=9 \r=13 \s=32 \b=8 \e=27 \f=12 \v=11 \d=127 \0=0 \\=92 \"=34 \'=39`) handles `$\X` forms. So `[$h, $i | T]` patterns and `list_to_binary([$f,$e,$d])` both work end-to-end. +12 eval tests, 750/750. Combined with 3b's `binary_to_list`/`list_to_binary`, Erlang code can now read/write byte sequences and string-shaped char lists fluently. **All three substrate gaps resolved as of 2026-06-05.** `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons of int char codes — standard Erlang semantics) via a new `er-string->charlist` helper in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer). Composition works end-to-end: `list_to_binary(atom_to_list(hello)) =:= <<104,101,108,108,111>>` and `integer_to_list(N)` round-trips through `list_to_integer`. 5 existing eval tests rewritten to charlist semantics, 8 new charlist-aware tests added (759/759). The full term-codec primitive set — `binary_to_list`, `list_to_binary`, `$X`, `atom_to_list`, `integer_to_list` charlist semantics, plus existing `file:read_file`/`write_file`/`list_dir` — is now in place.
|
||||||
@@ -1003,6 +1003,7 @@ A few things still under-specified; resolve as work begins.
|
|||||||
Newest first. One line per sub-deliverable commit. Erlang conformance gate
|
Newest first. One line per sub-deliverable commit. Erlang conformance gate
|
||||||
(`bash lib/erlang/conformance.sh`) must remain 729/729 on every entry.
|
(`bash lib/erlang/conformance.sh`) must remain 729/729 on every entry.
|
||||||
|
|
||||||
|
- **2026-06-05** — Step 3b codec landed: `next/kernel/term_codec.erl` with `encode/1` + `decode/1` over a netstring-ish wire format (`a` atom / `i` int / `b` binary / `t` tuple / `l` list, each as `tag + decimal-length + ":" + body`; nil = `l0:`). Byte-clean — binary bodies may contain NUL, LF, or any byte; encoding stays parseable. Built end-to-end on the three substrate fixes (binary_to_list/list_to_binary + $X + atom_to_list/integer_to_list charlists). `decode/1` returns `{ok, Term, RestBinary}` so callers can stream multiple frames from one buffer. 18 acceptance tests in `next/tests/term_codec.sh`: encode bytes for every leaf type, round-trip for each, nested activity-shaped term (`{create, [{id,1},{actor,alice},{payload,<<104,105>>}]}`), 2-frame streaming, binary with embedded NUL+LF, bad-form returns `{error, badform}` not crash. Erlang conformance **759/759** unchanged (codec is in `next/`, not lib/erlang/). Step 3b on-disk segment writer (the second half — open/append/replay reading/writing the actual segment file) is the natural next iteration: encode each activity with `term_codec`, frame with a 4-byte big-endian length prefix, append to disk.
|
||||||
- **2026-06-05** — Step 3b substrate fix #3 (final): `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons-of-int-char-codes) instead of SX strings — standard Erlang semantics. New helper `er-string->charlist` in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer, which already handles both shapes). 5 existing eval tests rewritten to match new semantics (e.g. `length(atom_to_list(hello)) =:= 5`, `hd(integer_to_list(42)) =:= 52`). 8 new charlist-coverage tests demonstrating composition: `list_to_binary(atom_to_list(ok)) =:= <<111,107>>`; `list_to_atom([$f,$o,$o])` round-trips; `list_to_integer([$1,$0,$0]) =:= 100`. Erlang conformance **759/759** (eval 397→406, +9 net). The full term-codec primitive set — `binary_to_list`/`list_to_binary` (24e3bf53), `$X` literals (3d80bd8c), and now `atom_to_list`/`integer_to_list` charlists — is in place; Step 3b on-disk segment writer can encode arbitrary Erlang activity terms (atoms, ints, binaries, tuples, lists) into byte sequences using only Erlang-native primitives.
|
- **2026-06-05** — Step 3b substrate fix #3 (final): `atom_to_list/1` and `integer_to_list/1` now return Erlang charlists (cons-of-int-char-codes) instead of SX strings — standard Erlang semantics. New helper `er-string->charlist` in `transpile.sx`. `list_to_atom/1` and `list_to_integer/1` accept either charlists OR SX strings (back-compat via the existing `er-source-to-string` coercer, which already handles both shapes). 5 existing eval tests rewritten to match new semantics (e.g. `length(atom_to_list(hello)) =:= 5`, `hd(integer_to_list(42)) =:= 52`). 8 new charlist-coverage tests demonstrating composition: `list_to_binary(atom_to_list(ok)) =:= <<111,107>>`; `list_to_atom([$f,$o,$o])` round-trips; `list_to_integer([$1,$0,$0]) =:= 100`. Erlang conformance **759/759** (eval 397→406, +9 net). The full term-codec primitive set — `binary_to_list`/`list_to_binary` (24e3bf53), `$X` literals (3d80bd8c), and now `atom_to_list`/`integer_to_list` charlists — is in place; Step 3b on-disk segment writer can encode arbitrary Erlang activity terms (atoms, ints, binaries, tuples, lists) into byte sequences using only Erlang-native primitives.
|
||||||
- **2026-06-04** — Step 3b substrate fix #2: `$X` char-literal decoding. Patched the Erlang tokenizer's `(= ch "$")` branch in `lib/erlang/tokenizer.sx` to emit the decimal char code as the integer token value instead of the raw `$X` source text (which `parse-number` couldn't decode → nil). Plain `$c` uses `char->integer` of the first char; `$\C` consults the standard Erlang escape table (`\n=10 \t=9 \r=13 \s=32 \b=8 \e=27 \f=12 \v=11 \d=127 \0=0 \\=92 \"=34 \'=39`). End-of-file after `$` decodes to 0 defensively. Probes: `$A→65`, `$0→48`, `$\n→10`, `$\\→92`, `[$h,$i]` → cons of 104/105, `list_to_binary([$f,$e,$d])` → `<<102,101,100>>`. +12 eval tests (single chars, each escape, list/binary composition with previous BIFs). Combined with substrate fix #1, Erlang code in fed-sx-m1 can now write `[$h, $i | T]` patterns AND construct/deconstruct binaries — a full term-codec primitive set. Erlang conformance **750/750** (eval 385→397). Plan Blockers note updated; remaining `atom_to_list`/`integer_to_list` charlist gap noted as low-priority for Milestone 1.
|
- **2026-06-04** — Step 3b substrate fix #2: `$X` char-literal decoding. Patched the Erlang tokenizer's `(= ch "$")` branch in `lib/erlang/tokenizer.sx` to emit the decimal char code as the integer token value instead of the raw `$X` source text (which `parse-number` couldn't decode → nil). Plain `$c` uses `char->integer` of the first char; `$\C` consults the standard Erlang escape table (`\n=10 \t=9 \r=13 \s=32 \b=8 \e=27 \f=12 \v=11 \d=127 \0=0 \\=92 \"=34 \'=39`). End-of-file after `$` decodes to 0 defensively. Probes: `$A→65`, `$0→48`, `$\n→10`, `$\\→92`, `[$h,$i]` → cons of 104/105, `list_to_binary([$f,$e,$d])` → `<<102,101,100>>`. +12 eval tests (single chars, each escape, list/binary composition with previous BIFs). Combined with substrate fix #1, Erlang code in fed-sx-m1 can now write `[$h, $i | T]` patterns AND construct/deconstruct binaries — a full term-codec primitive set. Erlang conformance **750/750** (eval 385→397). Plan Blockers note updated; remaining `atom_to_list`/`integer_to_list` charlist gap noted as low-priority for Milestone 1.
|
||||||
- **2026-06-04** — Step 3b substrate fix: registered `erlang:binary_to_list/1` and `erlang:list_to_binary/1` in `lib/erlang/runtime.sx` — the byte-level half of the term-codec gap. `binary_to_list` returns a proper Erlang charlist (`er-mk-cons` chain of byte ints). `list_to_binary` is iolist-aware via a recursive `er-iolist-walk!` that accepts nil / cons / binary / integer 0-255 and flattens nested iolists (e.g. `[1, <<2,3>>, [4, [5]]]` → `<<1,2,3,4,5>>`); out-of-range bytes or non-iolist elements raise `error:badarg`. Round-trip verified: `list_to_binary(binary_to_list(B)) =:= B`. +9 ffi tests (length, hd, empty→[], flat byte_size, nested-iolist, round-trip, 3 badarg paths). On-disk segment writer (3b) now has a complete `[Header | IoListPayload] → Binary` path; the remaining two substrate gaps (`atom_to_list`/`integer_to_list` as Erlang charlists, `$X` char-literal decoding) are still parked but no longer block 3b implementation if the encoding uses byte ints directly. Erlang conformance **738/738** (ffi 28→37). Plan Blockers note for Step 3b updated to reflect the partial resolution.
|
- **2026-06-04** — Step 3b substrate fix: registered `erlang:binary_to_list/1` and `erlang:list_to_binary/1` in `lib/erlang/runtime.sx` — the byte-level half of the term-codec gap. `binary_to_list` returns a proper Erlang charlist (`er-mk-cons` chain of byte ints). `list_to_binary` is iolist-aware via a recursive `er-iolist-walk!` that accepts nil / cons / binary / integer 0-255 and flattens nested iolists (e.g. `[1, <<2,3>>, [4, [5]]]` → `<<1,2,3,4,5>>`); out-of-range bytes or non-iolist elements raise `error:badarg`. Round-trip verified: `list_to_binary(binary_to_list(B)) =:= B`. +9 ffi tests (length, hd, empty→[], flat byte_size, nested-iolist, round-trip, 3 badarg paths). On-disk segment writer (3b) now has a complete `[Header | IoListPayload] → Binary` path; the remaining two substrate gaps (`atom_to_list`/`integer_to_list` as Erlang charlists, `$X` char-literal decoding) are still parked but no longer block 3b implementation if the encoding uses byte ints directly. Erlang conformance **738/738** (ffi 28→37). Plan Blockers note for Step 3b updated to reflect the partial resolution.
|
||||||
|
|||||||
Reference in New Issue
Block a user