-module(term_codec). -export([encode/1, decode/1]). %% Erlang-side term <-> binary codec, built on the substrate fixes from %% commits 24e3bf53 (binary_to_list / list_to_binary), 3d80bd8c ($X char %% literals), 4852cca9 (atom_to_list / integer_to_list charlists). %% %% Wire format (netstring-ish; all length headers ASCII decimal): %% %% atom $a Len $: NameBytes %% integer $i Len $: DecimalBytes (negative ints carry leading $-) %% binary $b Len $: RawBytes %% tuple $t Count $: Enc1 Enc2 ... Encn %% list $l Count $: Enc1 Enc2 ... Encn (proper list) %% nil $l $0 $: (empty list) %% %% Each Enc is itself one of these forms — recursive. The format is %% byte-clean: binary bodies may contain any byte (newlines, NULs, etc.), %% so callers can frame entries with a 4-byte big-endian length prefix %% (Step 3b on-disk segment writer's job). %% encode/1: term -> binary encode(T) when is_atom(T) -> Cs = atom_to_list(T), list_to_binary([$a, integer_to_list(length(Cs)), $:, Cs]); encode(T) when is_integer(T) -> Cs = integer_to_list(T), list_to_binary([$i, integer_to_list(length(Cs)), $:, Cs]); encode(T) when is_binary(T) -> list_to_binary([$b, integer_to_list(byte_size(T)), $:, T]); encode(T) when is_tuple(T) -> L = tuple_to_list(T), list_to_binary([$t, integer_to_list(length(L)), $:, [encode(E) || E <- L]]); encode([]) -> list_to_binary([$l, $0, $:]); encode(T) when is_list(T) -> list_to_binary([$l, integer_to_list(length(T)), $:, [encode(E) || E <- T]]). %% decode/1: binary -> {ok, Term, RestBinary} | {error, badform} %% On success returns the remaining unconsumed bytes so callers can %% stream-decode multiple frames from one buffer. decode(B) when is_binary(B) -> decode_chars(binary_to_list(B)). decode_chars([$a | Rest]) -> {Len, Rest1} = read_len(Rest, 0), Rest2 = strip_colon(Rest1), {NameChars, Rest3} = split_at(Len, Rest2), {ok, list_to_atom(NameChars), list_to_binary(Rest3)}; decode_chars([$i | Rest]) -> {Len, Rest1} = read_len(Rest, 0), Rest2 = strip_colon(Rest1), {NumChars, Rest3} = split_at(Len, Rest2), {ok, list_to_integer(NumChars), list_to_binary(Rest3)}; decode_chars([$b | Rest]) -> {Len, Rest1} = read_len(Rest, 0), Rest2 = strip_colon(Rest1), {Bytes, Rest3} = split_at(Len, Rest2), {ok, list_to_binary(Bytes), list_to_binary(Rest3)}; decode_chars([$t | Rest]) -> {N, Rest1} = read_len(Rest, 0), Rest2 = strip_colon(Rest1), {Elems, Rest3} = decode_n(N, Rest2, []), {ok, list_to_tuple(Elems), list_to_binary(Rest3)}; decode_chars([$l | Rest]) -> {N, Rest1} = read_len(Rest, 0), Rest2 = strip_colon(Rest1), {Elems, Rest3} = decode_n(N, Rest2, []), {ok, Elems, list_to_binary(Rest3)}; decode_chars(_) -> {error, badform}. read_len([C | Rest], Acc) when C >= $0, C =< $9 -> read_len(Rest, Acc * 10 + C - $0); read_len([$- | Rest], 0) -> %% Leading minus for negative integer-body lengths is invalid for %% lengths, but appears inside integer-body bytes (handled in %% the body, not here — read_len only consumes digits before $:). {0, [$- | Rest]}; read_len(Rest, Acc) -> {Acc, Rest}. strip_colon([$: | Rest]) -> Rest; strip_colon(Other) -> erlang:error({badform, Other}). split_at(0, Rest) -> {[], Rest}; split_at(N, [H | T]) -> {Hs, Tl} = split_at(N - 1, T), {[H | Hs], Tl}; split_at(_, []) -> erlang:error({badform, short}). decode_n(0, Rest, Acc) -> {lists:reverse(Acc), Rest}; decode_n(N, Bytes, Acc) -> {Term, Rest} = decode_one(Bytes), decode_n(N - 1, Rest, [Term | Acc]). decode_one(Bytes) -> case decode_chars(Bytes) of {ok, Term, RestBin} -> {Term, binary_to_list(RestBin)}; {error, R} -> erlang:error({badform, R}) end.