fed-sx-m1: Step 3b codec — next/kernel/term_codec.erl encode/decode + 18 round-trip tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
This commit is contained in:
105
next/kernel/term_codec.erl
Normal file
105
next/kernel/term_codec.erl
Normal file
@@ -0,0 +1,105 @@
|
||||
-module(term_codec).
|
||||
-export([encode/1, decode/1]).
|
||||
|
||||
%% Erlang-side term <-> binary codec, built on the substrate fixes from
|
||||
%% commits 24e3bf53 (binary_to_list / list_to_binary), 3d80bd8c ($X char
|
||||
%% literals), 4852cca9 (atom_to_list / integer_to_list charlists).
|
||||
%%
|
||||
%% Wire format (netstring-ish; all length headers ASCII decimal):
|
||||
%%
|
||||
%% atom $a Len $: NameBytes
|
||||
%% integer $i Len $: DecimalBytes (negative ints carry leading $-)
|
||||
%% binary $b Len $: RawBytes
|
||||
%% tuple $t Count $: Enc1 Enc2 ... Encn
|
||||
%% list $l Count $: Enc1 Enc2 ... Encn (proper list)
|
||||
%% nil $l $0 $: (empty list)
|
||||
%%
|
||||
%% Each Enc is itself one of these forms — recursive. The format is
|
||||
%% byte-clean: binary bodies may contain any byte (newlines, NULs, etc.),
|
||||
%% so callers can frame entries with a 4-byte big-endian length prefix
|
||||
%% (Step 3b on-disk segment writer's job).
|
||||
|
||||
%% encode/1: term -> binary
|
||||
encode(T) when is_atom(T) ->
|
||||
Cs = atom_to_list(T),
|
||||
list_to_binary([$a, integer_to_list(length(Cs)), $:, Cs]);
|
||||
encode(T) when is_integer(T) ->
|
||||
Cs = integer_to_list(T),
|
||||
list_to_binary([$i, integer_to_list(length(Cs)), $:, Cs]);
|
||||
encode(T) when is_binary(T) ->
|
||||
list_to_binary([$b, integer_to_list(byte_size(T)), $:, T]);
|
||||
encode(T) when is_tuple(T) ->
|
||||
L = tuple_to_list(T),
|
||||
list_to_binary([$t, integer_to_list(length(L)), $:,
|
||||
[encode(E) || E <- L]]);
|
||||
encode([]) ->
|
||||
list_to_binary([$l, $0, $:]);
|
||||
encode(T) when is_list(T) ->
|
||||
list_to_binary([$l, integer_to_list(length(T)), $:,
|
||||
[encode(E) || E <- T]]).
|
||||
|
||||
%% decode/1: binary -> {ok, Term, RestBinary} | {error, badform}
|
||||
%% On success returns the remaining unconsumed bytes so callers can
|
||||
%% stream-decode multiple frames from one buffer.
|
||||
decode(B) when is_binary(B) ->
|
||||
decode_chars(binary_to_list(B)).
|
||||
|
||||
decode_chars([$a | Rest]) ->
|
||||
{Len, Rest1} = read_len(Rest, 0),
|
||||
Rest2 = strip_colon(Rest1),
|
||||
{NameChars, Rest3} = split_at(Len, Rest2),
|
||||
{ok, list_to_atom(NameChars), list_to_binary(Rest3)};
|
||||
decode_chars([$i | Rest]) ->
|
||||
{Len, Rest1} = read_len(Rest, 0),
|
||||
Rest2 = strip_colon(Rest1),
|
||||
{NumChars, Rest3} = split_at(Len, Rest2),
|
||||
{ok, list_to_integer(NumChars), list_to_binary(Rest3)};
|
||||
decode_chars([$b | Rest]) ->
|
||||
{Len, Rest1} = read_len(Rest, 0),
|
||||
Rest2 = strip_colon(Rest1),
|
||||
{Bytes, Rest3} = split_at(Len, Rest2),
|
||||
{ok, list_to_binary(Bytes), list_to_binary(Rest3)};
|
||||
decode_chars([$t | Rest]) ->
|
||||
{N, Rest1} = read_len(Rest, 0),
|
||||
Rest2 = strip_colon(Rest1),
|
||||
{Elems, Rest3} = decode_n(N, Rest2, []),
|
||||
{ok, list_to_tuple(Elems), list_to_binary(Rest3)};
|
||||
decode_chars([$l | Rest]) ->
|
||||
{N, Rest1} = read_len(Rest, 0),
|
||||
Rest2 = strip_colon(Rest1),
|
||||
{Elems, Rest3} = decode_n(N, Rest2, []),
|
||||
{ok, Elems, list_to_binary(Rest3)};
|
||||
decode_chars(_) ->
|
||||
{error, badform}.
|
||||
|
||||
read_len([C | Rest], Acc) when C >= $0, C =< $9 ->
|
||||
read_len(Rest, Acc * 10 + C - $0);
|
||||
read_len([$- | Rest], 0) ->
|
||||
%% Leading minus for negative integer-body lengths is invalid for
|
||||
%% lengths, but appears inside integer-body bytes (handled in
|
||||
%% the body, not here — read_len only consumes digits before $:).
|
||||
{0, [$- | Rest]};
|
||||
read_len(Rest, Acc) ->
|
||||
{Acc, Rest}.
|
||||
|
||||
strip_colon([$: | Rest]) -> Rest;
|
||||
strip_colon(Other) -> erlang:error({badform, Other}).
|
||||
|
||||
split_at(0, Rest) -> {[], Rest};
|
||||
split_at(N, [H | T]) ->
|
||||
{Hs, Tl} = split_at(N - 1, T),
|
||||
{[H | Hs], Tl};
|
||||
split_at(_, []) ->
|
||||
erlang:error({badform, short}).
|
||||
|
||||
decode_n(0, Rest, Acc) ->
|
||||
{lists:reverse(Acc), Rest};
|
||||
decode_n(N, Bytes, Acc) ->
|
||||
{Term, Rest} = decode_one(Bytes),
|
||||
decode_n(N - 1, Rest, [Term | Acc]).
|
||||
|
||||
decode_one(Bytes) ->
|
||||
case decode_chars(Bytes) of
|
||||
{ok, Term, RestBin} -> {Term, binary_to_list(RestBin)};
|
||||
{error, R} -> erlang:error({badform, R})
|
||||
end.
|
||||
160
next/tests/term_codec.sh
Executable file
160
next/tests/term_codec.sh
Executable file
@@ -0,0 +1,160 @@
|
||||
#!/usr/bin/env bash
|
||||
# next/tests/term_codec.sh — Step 3b term codec acceptance test.
|
||||
#
|
||||
# Exercises encode/1 + decode/1 for atoms, integers, binaries, tuples,
|
||||
# lists, nesting, and round-trip equivalence. Built on the substrate-fix
|
||||
# trio: binary_to_list/list_to_binary (24e3bf53), $X literals (3d80bd8c),
|
||||
# atom_to_list/integer_to_list charlists (4852cca9).
|
||||
|
||||
set -uo pipefail
|
||||
cd "$(git rev-parse --show-toplevel)"
|
||||
|
||||
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||
fi
|
||||
if [ ! -x "$SX_SERVER" ]; then
|
||||
echo "ERROR: sx_server.exe not found." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
VERBOSE="${1:-}"
|
||||
PASS=0; FAIL=0; ERRORS=""
|
||||
TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT
|
||||
|
||||
cat > "$TMPFILE" <<'EPOCHS'
|
||||
(epoch 1)
|
||||
(load "lib/erlang/tokenizer.sx")
|
||||
(load "lib/erlang/parser.sx")
|
||||
(load "lib/erlang/parser-core.sx")
|
||||
(load "lib/erlang/parser-expr.sx")
|
||||
(load "lib/erlang/parser-module.sx")
|
||||
(load "lib/erlang/transpile.sx")
|
||||
(load "lib/erlang/runtime.sx")
|
||||
(load "lib/erlang/vm/dispatcher.sx")
|
||||
|
||||
(epoch 2)
|
||||
(eval "(get (erlang-load-module (file-read \"next/kernel/term_codec.erl\")) :name)")
|
||||
|
||||
;; --- encode produces correct headers ---
|
||||
|
||||
;; atom 'ok' -> bytes "a2:ok"
|
||||
(epoch 10)
|
||||
(eval "(get (erlang-eval-ast \"term_codec:encode(ok) =:= <<97, 50, 58, 111, 107>>\") :name)")
|
||||
|
||||
;; integer 42 -> "i2:42"
|
||||
(epoch 11)
|
||||
(eval "(get (erlang-eval-ast \"term_codec:encode(42) =:= <<105, 50, 58, 52, 50>>\") :name)")
|
||||
|
||||
;; negative integer -99 -> "i3:-99"
|
||||
(epoch 12)
|
||||
(eval "(get (erlang-eval-ast \"term_codec:encode(-99) =:= <<105, 51, 58, 45, 57, 57>>\") :name)")
|
||||
|
||||
;; binary <<1,2,3>> -> "b3:" + 1,2,3
|
||||
(epoch 13)
|
||||
(eval "(get (erlang-eval-ast \"term_codec:encode(<<1, 2, 3>>) =:= <<98, 51, 58, 1, 2, 3>>\") :name)")
|
||||
|
||||
;; empty list -> "l0:"
|
||||
(epoch 14)
|
||||
(eval "(get (erlang-eval-ast \"term_codec:encode([]) =:= <<108, 48, 58>>\") :name)")
|
||||
|
||||
;; tuple {a, b} -> "t2:" + enc(a) + enc(b) = "t2:a1:aa1:b"
|
||||
(epoch 15)
|
||||
(eval "(get (erlang-eval-ast \"term_codec:encode({a, b}) =:= <<116, 50, 58, 97, 49, 58, 97, 97, 49, 58, 98>>\") :name)")
|
||||
|
||||
;; --- round-trip: encode then decode returns original term ---
|
||||
|
||||
(epoch 20)
|
||||
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode(ok)), T =:= ok\") :name)")
|
||||
|
||||
(epoch 21)
|
||||
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode(42)), T =:= 42\") :name)")
|
||||
|
||||
(epoch 22)
|
||||
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode(-99)), T =:= -99\") :name)")
|
||||
|
||||
(epoch 23)
|
||||
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode(<<1, 2, 3, 4, 5>>)), T =:= <<1, 2, 3, 4, 5>>\") :name)")
|
||||
|
||||
(epoch 24)
|
||||
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode([])), T =:= []\") :name)")
|
||||
|
||||
(epoch 25)
|
||||
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode({a, b, c})), T =:= {a, b, c}\") :name)")
|
||||
|
||||
(epoch 26)
|
||||
(eval "(get (erlang-eval-ast \"{ok, T, _} = term_codec:decode(term_codec:encode([1, 2, 3])), T =:= [1, 2, 3]\") :name)")
|
||||
|
||||
;; --- nested: activity-shaped term (atoms, ints, binaries, nested tuple+list) ---
|
||||
|
||||
(epoch 30)
|
||||
(eval "(get (erlang-eval-ast \"Act = {create, [{id, 1}, {actor, alice}, {payload, <<104, 105>>}]}, {ok, T, _} = term_codec:decode(term_codec:encode(Act)), T =:= Act\") :name)")
|
||||
|
||||
;; --- decode returns remainder so multiple frames can be streamed ---
|
||||
|
||||
(epoch 31)
|
||||
(eval "(get (erlang-eval-ast \"E1 = term_codec:encode(foo), E2 = term_codec:encode(42), Both = list_to_binary([E1, E2]), {ok, T1, Rest} = term_codec:decode(Both), {ok, T2, _} = term_codec:decode(Rest), {T1, T2} =:= {foo, 42}\") :name)")
|
||||
|
||||
;; --- binary content with embedded zero / newline bytes round-trips ---
|
||||
|
||||
(epoch 32)
|
||||
(eval "(get (erlang-eval-ast \"B = <<0, 10, 0, 10, 0>>, {ok, T, _} = term_codec:decode(term_codec:encode(B)), T =:= B\") :name)")
|
||||
|
||||
;; --- bad form returns {error, _} not a crash ---
|
||||
|
||||
(epoch 40)
|
||||
(eval "(get (erlang-eval-ast \"element(1, term_codec:decode(<<122, 122, 122>>))\") :name)")
|
||||
EPOCHS
|
||||
|
||||
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>/dev/null)
|
||||
|
||||
check() {
|
||||
local epoch="$1" desc="$2" expected="$3"
|
||||
local actual
|
||||
actual=$(echo "$OUTPUT" | grep -A1 "^(ok-len $epoch " | tail -1 || true)
|
||||
if echo "$actual" | grep -q "^(ok-len"; then actual=""; fi
|
||||
if [ -z "$actual" ]; then
|
||||
actual=$(echo "$OUTPUT" | grep "^(ok $epoch " | head -1 || true)
|
||||
fi
|
||||
if [ -z "$actual" ]; then
|
||||
actual=$(echo "$OUTPUT" | grep "^(error $epoch " | head -1 || true)
|
||||
fi
|
||||
[ -z "$actual" ] && actual="<no output for epoch $epoch>"
|
||||
|
||||
if echo "$actual" | grep -qF -- "$expected"; then
|
||||
PASS=$((PASS+1))
|
||||
[ "$VERBOSE" = "-v" ] && echo " ok $desc"
|
||||
else
|
||||
FAIL=$((FAIL+1))
|
||||
ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual
|
||||
"
|
||||
fi
|
||||
}
|
||||
|
||||
check 2 "module loads" "term_codec"
|
||||
check 10 "encode atom" "true"
|
||||
check 11 "encode int" "true"
|
||||
check 12 "encode neg int" "true"
|
||||
check 13 "encode binary" "true"
|
||||
check 14 "encode []" "true"
|
||||
check 15 "encode tuple" "true"
|
||||
check 20 "round-trip atom" "true"
|
||||
check 21 "round-trip int" "true"
|
||||
check 22 "round-trip neg int" "true"
|
||||
check 23 "round-trip binary" "true"
|
||||
check 24 "round-trip []" "true"
|
||||
check 25 "round-trip tuple" "true"
|
||||
check 26 "round-trip list" "true"
|
||||
check 30 "round-trip nested activity" "true"
|
||||
check 31 "streaming two frames" "true"
|
||||
check 32 "binary w/ embedded NUL+LF" "true"
|
||||
check 40 "bad form -> error tag" "error"
|
||||
|
||||
TOTAL=$((PASS+FAIL))
|
||||
if [ $FAIL -eq 0 ]; then
|
||||
echo "ok $PASS/$TOTAL term_codec tests passed"
|
||||
else
|
||||
echo "FAIL $PASS/$TOTAL passed, $FAIL failed:"
|
||||
echo "$ERRORS"
|
||||
fi
|
||||
[ $FAIL -eq 0 ]
|
||||
Reference in New Issue
Block a user