fed-prims: Phase C — dag-cbor encode/decode, pure OCaml, RFC 8949 vectors + determinism
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 3m8s

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-18 16:10:36 +00:00
parent 451bd4be62
commit bcaaa11916
4 changed files with 227 additions and 2 deletions

View File

@@ -1336,6 +1336,69 @@ let run_foundation_tests () =
(String "79f38adec5c20307a98ef76e8324afbfd46cfd81b22e3973c65fa1bd9de31787")
(call "crypto-sha3-256" [String (String.make 200 '\xa3')]);
Printf.printf "\nSuite: dag-cbor\n";
let mkdict pairs =
let d = Sx_types.make_dict () in
List.iter (fun (k, v) -> Hashtbl.replace d k v) pairs;
Dict d
in
let enc v = call "cbor-encode" [v] in
(* RFC 8949 Appendix A — minimal-length deterministic encoding. *)
assert_eq "cbor 0" (String "\x00") (enc (Integer 0));
assert_eq "cbor 23" (String "\x17") (enc (Integer 23));
assert_eq "cbor 24" (String "\x18\x18") (enc (Integer 24));
assert_eq "cbor 100" (String "\x18\x64") (enc (Integer 100));
assert_eq "cbor 1000" (String "\x19\x03\xe8") (enc (Integer 1000));
assert_eq "cbor 1000000"
(String "\x1a\x00\x0f\x42\x40") (enc (Integer 1000000));
assert_eq "cbor -1" (String "\x20") (enc (Integer (-1)));
assert_eq "cbor -100" (String "\x38\x63") (enc (Integer (-100)));
assert_eq "cbor -1000" (String "\x39\x03\xe7") (enc (Integer (-1000)));
assert_eq "cbor false" (String "\xf4") (enc (Bool false));
assert_eq "cbor true" (String "\xf5") (enc (Bool true));
assert_eq "cbor null" (String "\xf6") (enc Nil);
assert_eq "cbor \"\"" (String "\x60") (enc (String ""));
assert_eq "cbor \"a\"" (String "\x61\x61") (enc (String "a"));
assert_eq "cbor \"IETF\"" (String "\x64IETF") (enc (String "IETF"));
assert_eq "cbor []" (String "\x80") (enc (List []));
assert_eq "cbor [1,2,3]"
(String "\x83\x01\x02\x03")
(enc (List [Integer 1; Integer 2; Integer 3]));
assert_eq "cbor [1,[2,3],[4,5]]"
(String "\x83\x01\x82\x02\x03\x82\x04\x05")
(enc (List [Integer 1;
List [Integer 2; Integer 3];
List [Integer 4; Integer 5]]));
assert_eq "cbor {}" (String "\xa0") (enc (mkdict []));
assert_eq "cbor {a:1,b:[2,3]}"
(String "\xa2\x61\x61\x01\x61\x62\x82\x02\x03")
(enc (mkdict ["a", Integer 1; "b", List [Integer 2; Integer 3]]));
assert_eq "cbor {a..e:A..E}"
(String "\xa5\x61\x61\x61\x41\x61\x62\x61\x42\x61\x63\x61\x43\x61\x64\x61\x44\x61\x65\x61\x45")
(enc (mkdict ["a", String "A"; "b", String "B"; "c", String "C";
"d", String "D"; "e", String "E"]));
(* Determinism: insertion order + key length must not change bytes.
Sort is length-then-bytewise → a, c, bb. *)
let d1 = mkdict ["bb", Integer 2; "a", Integer 1; "c", Integer 3] in
let d2 = mkdict ["c", Integer 3; "bb", Integer 2; "a", Integer 1] in
assert_eq "cbor det order-invariant" (enc d1) (enc d2);
assert_eq "cbor det length-then-bytewise"
(String "\xa3\x61\x61\x01\x61\x63\x03\x62\x62\x62\x02")
(enc d1);
(* Round-trip: decode . encode = identity (structural). *)
let roundtrip name v =
assert_eq ("cbor rt " ^ name) v (call "cbor-decode" [enc v])
in
roundtrip "int" (Integer 42);
roundtrip "neg" (Integer (-99999));
roundtrip "str" (String "hello world");
roundtrip "bool" (Bool true);
roundtrip "nil" Nil;
roundtrip "nested"
(List [Integer 1; String "x"; List [Bool false; Nil]]);
roundtrip "dict"
(mkdict ["k", List [Integer 7]; "name", String "z"]);
Printf.printf "\nSuite: vm-extension-dispatch\n";
let make_bc op = ({
vc_arity = 0; vc_rest_arity = -1; vc_locals = 0;

142
hosts/ocaml/lib/sx_cbor.ml Normal file
View File

@@ -0,0 +1,142 @@
(** dag-cbor encode / decode — pure OCaml, WASM-safe.
RFC 8949 deterministic subset as constrained by IPLD dag-cbor
(RFC 8742): unsigned/negative ints, text strings, arrays, maps
with keys sorted by **length-then-bytewise**, bool, null, and
tag 42 (CID link, decode-side passthrough). Floats are not
supported (no fed-sx shape needs them yet) — encoding a [Number]
or decoding a float head raises. Reference: RFC 8949 §3, §4.2. *)
open Sx_types
exception Cbor_error of string
(* ---- Encoder ---- *)
let write_head buf major v =
let m = major lsl 5 in
if v < 24 then
Buffer.add_char buf (Char.chr (m lor v))
else if v < 0x100 then begin
Buffer.add_char buf (Char.chr (m lor 24));
Buffer.add_char buf (Char.chr v)
end else if v < 0x10000 then begin
Buffer.add_char buf (Char.chr (m lor 25));
Buffer.add_char buf (Char.chr ((v lsr 8) land 0xFF));
Buffer.add_char buf (Char.chr (v land 0xFF))
end else if v < 0x100000000 then begin
Buffer.add_char buf (Char.chr (m lor 26));
for i = 3 downto 0 do
Buffer.add_char buf (Char.chr ((v lsr (8 * i)) land 0xFF))
done
end else begin
Buffer.add_char buf (Char.chr (m lor 27));
for i = 7 downto 0 do
Buffer.add_char buf (Char.chr ((v lsr (8 * i)) land 0xFF))
done
end
(* dag-cbor map key order: shorter key first, then bytewise. *)
let key_order a b =
let la = String.length a and lb = String.length b in
if la <> lb then compare la lb else compare a b
let rec encode_into buf (v : value) : unit =
match v with
| Integer n ->
if n >= 0 then write_head buf 0 n
else write_head buf 1 (-1 - n)
| String s ->
write_head buf 3 (String.length s);
Buffer.add_string buf s
| Symbol s | Keyword s ->
write_head buf 3 (String.length s);
Buffer.add_string buf s
| Bool false -> Buffer.add_char buf '\xf4'
| Bool true -> Buffer.add_char buf '\xf5'
| Nil -> Buffer.add_char buf '\xf6'
| List items ->
write_head buf 4 (List.length items);
List.iter (encode_into buf) items
| Dict d ->
let keys = Hashtbl.fold (fun k _ acc -> k :: acc) d [] in
let keys = List.sort_uniq key_order keys in
write_head buf 5 (List.length keys);
List.iter (fun k ->
write_head buf 3 (String.length k);
Buffer.add_string buf k;
encode_into buf (Hashtbl.find d k)) keys
| Number _ ->
raise (Cbor_error "cbor-encode: floats unsupported (dag-cbor subset)")
| _ ->
raise (Cbor_error
("cbor-encode: unencodable value " ^ type_of v))
let encode (v : value) : string =
let buf = Buffer.create 64 in
encode_into buf v;
Buffer.contents buf
(* ---- Decoder ---- *)
let decode (s : string) : value =
let pos = ref 0 in
let len = String.length s in
let byte () =
if !pos >= len then raise (Cbor_error "cbor-decode: truncated");
let c = Char.code s.[!pos] in incr pos; c
in
let read_uint ai =
if ai < 24 then ai
else if ai = 24 then byte ()
else if ai = 25 then let a = byte () in let b = byte () in (a lsl 8) lor b
else if ai = 26 then begin
let v = ref 0 in
for _ = 0 to 3 do v := (!v lsl 8) lor byte () done; !v
end else if ai = 27 then begin
let v = ref 0 in
for _ = 0 to 7 do v := (!v lsl 8) lor byte () done; !v
end else raise (Cbor_error "cbor-decode: bad additional info")
in
let read_bytes n =
if !pos + n > len then raise (Cbor_error "cbor-decode: truncated");
let r = String.sub s !pos n in pos := !pos + n; r
in
let rec item () =
let b = byte () in
let major = b lsr 5 and ai = b land 0x1f in
match major with
| 0 -> Integer (read_uint ai)
| 1 -> Integer (-1 - read_uint ai)
| 2 -> String (read_bytes (read_uint ai))
| 3 -> String (read_bytes (read_uint ai))
| 4 ->
let n = read_uint ai in
List (List.init n (fun _ -> item ()))
| 5 ->
let n = read_uint ai in
let d = make_dict () in
for _ = 1 to n do
let k = match item () with
| String k -> k
| _ -> raise (Cbor_error "cbor-decode: non-string map key")
in
Hashtbl.replace d k (item ())
done;
Dict d
| 6 ->
(* Tag: tag-42 CID link → pass the inner item through. *)
ignore (read_uint ai); item ()
| 7 ->
(match ai with
| 20 -> Bool false
| 21 -> Bool true
| 22 -> Nil
| 23 -> Nil
| _ ->
raise (Cbor_error
"cbor-decode: floats/simple unsupported (dag-cbor subset)"))
| _ -> raise (Cbor_error "cbor-decode: bad major type")
in
let v = item () in
v

View File

@@ -4174,4 +4174,18 @@ let () =
register "crypto-sha3-256" (fun args ->
match args with
| [String s] -> String (Sx_sha3.sha3_256_hex s)
| _ -> raise (Eval_error "crypto-sha3-256: (bytes)"))
| _ -> raise (Eval_error "crypto-sha3-256: (bytes)"));
register "cbor-encode" (fun args ->
match args with
| [v] ->
(try String (Sx_cbor.encode v)
with Sx_cbor.Cbor_error m -> raise (Eval_error m))
| _ -> raise (Eval_error "cbor-encode: (value)"));
register "cbor-decode" (fun args ->
match args with
| [String s] ->
(try Sx_cbor.decode s
with Sx_cbor.Cbor_error m -> raise (Eval_error m))
| _ -> raise (Eval_error "cbor-decode: (bytes)"))