Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
106 lines
3.8 KiB
Erlang
106 lines
3.8 KiB
Erlang
-module(term_codec).
|
|
-export([encode/1, decode/1]).
|
|
|
|
%% Erlang-side term <-> binary codec, built on the substrate fixes from
|
|
%% commits 24e3bf53 (binary_to_list / list_to_binary), 3d80bd8c ($X char
|
|
%% literals), 4852cca9 (atom_to_list / integer_to_list charlists).
|
|
%%
|
|
%% Wire format (netstring-ish; all length headers ASCII decimal):
|
|
%%
|
|
%% atom $a Len $: NameBytes
|
|
%% integer $i Len $: DecimalBytes (negative ints carry leading $-)
|
|
%% binary $b Len $: RawBytes
|
|
%% tuple $t Count $: Enc1 Enc2 ... Encn
|
|
%% list $l Count $: Enc1 Enc2 ... Encn (proper list)
|
|
%% nil $l $0 $: (empty list)
|
|
%%
|
|
%% Each Enc is itself one of these forms — recursive. The format is
|
|
%% byte-clean: binary bodies may contain any byte (newlines, NULs, etc.),
|
|
%% so callers can frame entries with a 4-byte big-endian length prefix
|
|
%% (Step 3b on-disk segment writer's job).
|
|
|
|
%% encode/1: term -> binary
|
|
encode(T) when is_atom(T) ->
|
|
Cs = atom_to_list(T),
|
|
list_to_binary([$a, integer_to_list(length(Cs)), $:, Cs]);
|
|
encode(T) when is_integer(T) ->
|
|
Cs = integer_to_list(T),
|
|
list_to_binary([$i, integer_to_list(length(Cs)), $:, Cs]);
|
|
encode(T) when is_binary(T) ->
|
|
list_to_binary([$b, integer_to_list(byte_size(T)), $:, T]);
|
|
encode(T) when is_tuple(T) ->
|
|
L = tuple_to_list(T),
|
|
list_to_binary([$t, integer_to_list(length(L)), $:,
|
|
[encode(E) || E <- L]]);
|
|
encode([]) ->
|
|
list_to_binary([$l, $0, $:]);
|
|
encode(T) when is_list(T) ->
|
|
list_to_binary([$l, integer_to_list(length(T)), $:,
|
|
[encode(E) || E <- T]]).
|
|
|
|
%% decode/1: binary -> {ok, Term, RestBinary} | {error, badform}
|
|
%% On success returns the remaining unconsumed bytes so callers can
|
|
%% stream-decode multiple frames from one buffer.
|
|
decode(B) when is_binary(B) ->
|
|
decode_chars(binary_to_list(B)).
|
|
|
|
decode_chars([$a | Rest]) ->
|
|
{Len, Rest1} = read_len(Rest, 0),
|
|
Rest2 = strip_colon(Rest1),
|
|
{NameChars, Rest3} = split_at(Len, Rest2),
|
|
{ok, list_to_atom(NameChars), list_to_binary(Rest3)};
|
|
decode_chars([$i | Rest]) ->
|
|
{Len, Rest1} = read_len(Rest, 0),
|
|
Rest2 = strip_colon(Rest1),
|
|
{NumChars, Rest3} = split_at(Len, Rest2),
|
|
{ok, list_to_integer(NumChars), list_to_binary(Rest3)};
|
|
decode_chars([$b | Rest]) ->
|
|
{Len, Rest1} = read_len(Rest, 0),
|
|
Rest2 = strip_colon(Rest1),
|
|
{Bytes, Rest3} = split_at(Len, Rest2),
|
|
{ok, list_to_binary(Bytes), list_to_binary(Rest3)};
|
|
decode_chars([$t | Rest]) ->
|
|
{N, Rest1} = read_len(Rest, 0),
|
|
Rest2 = strip_colon(Rest1),
|
|
{Elems, Rest3} = decode_n(N, Rest2, []),
|
|
{ok, list_to_tuple(Elems), list_to_binary(Rest3)};
|
|
decode_chars([$l | Rest]) ->
|
|
{N, Rest1} = read_len(Rest, 0),
|
|
Rest2 = strip_colon(Rest1),
|
|
{Elems, Rest3} = decode_n(N, Rest2, []),
|
|
{ok, Elems, list_to_binary(Rest3)};
|
|
decode_chars(_) ->
|
|
{error, badform}.
|
|
|
|
read_len([C | Rest], Acc) when C >= $0, C =< $9 ->
|
|
read_len(Rest, Acc * 10 + C - $0);
|
|
read_len([$- | Rest], 0) ->
|
|
%% Leading minus for negative integer-body lengths is invalid for
|
|
%% lengths, but appears inside integer-body bytes (handled in
|
|
%% the body, not here — read_len only consumes digits before $:).
|
|
{0, [$- | Rest]};
|
|
read_len(Rest, Acc) ->
|
|
{Acc, Rest}.
|
|
|
|
strip_colon([$: | Rest]) -> Rest;
|
|
strip_colon(Other) -> erlang:error({badform, Other}).
|
|
|
|
split_at(0, Rest) -> {[], Rest};
|
|
split_at(N, [H | T]) ->
|
|
{Hs, Tl} = split_at(N - 1, T),
|
|
{[H | Hs], Tl};
|
|
split_at(_, []) ->
|
|
erlang:error({badform, short}).
|
|
|
|
decode_n(0, Rest, Acc) ->
|
|
{lists:reverse(Acc), Rest};
|
|
decode_n(N, Bytes, Acc) ->
|
|
{Term, Rest} = decode_one(Bytes),
|
|
decode_n(N - 1, Rest, [Term | Acc]).
|
|
|
|
decode_one(Bytes) ->
|
|
case decode_chars(Bytes) of
|
|
{ok, Term, RestBin} -> {Term, binary_to_list(RestBin)};
|
|
{error, R} -> erlang:error({badform, R})
|
|
end.
|