fed-prims: Phase C — dag-cbor encode/decode, pure OCaml, RFC 8949 vectors + determinism
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 3m8s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 3m8s
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1336,6 +1336,69 @@ let run_foundation_tests () =
|
||||
(String "79f38adec5c20307a98ef76e8324afbfd46cfd81b22e3973c65fa1bd9de31787")
|
||||
(call "crypto-sha3-256" [String (String.make 200 '\xa3')]);
|
||||
|
||||
Printf.printf "\nSuite: dag-cbor\n";
|
||||
let mkdict pairs =
|
||||
let d = Sx_types.make_dict () in
|
||||
List.iter (fun (k, v) -> Hashtbl.replace d k v) pairs;
|
||||
Dict d
|
||||
in
|
||||
let enc v = call "cbor-encode" [v] in
|
||||
(* RFC 8949 Appendix A — minimal-length deterministic encoding. *)
|
||||
assert_eq "cbor 0" (String "\x00") (enc (Integer 0));
|
||||
assert_eq "cbor 23" (String "\x17") (enc (Integer 23));
|
||||
assert_eq "cbor 24" (String "\x18\x18") (enc (Integer 24));
|
||||
assert_eq "cbor 100" (String "\x18\x64") (enc (Integer 100));
|
||||
assert_eq "cbor 1000" (String "\x19\x03\xe8") (enc (Integer 1000));
|
||||
assert_eq "cbor 1000000"
|
||||
(String "\x1a\x00\x0f\x42\x40") (enc (Integer 1000000));
|
||||
assert_eq "cbor -1" (String "\x20") (enc (Integer (-1)));
|
||||
assert_eq "cbor -100" (String "\x38\x63") (enc (Integer (-100)));
|
||||
assert_eq "cbor -1000" (String "\x39\x03\xe7") (enc (Integer (-1000)));
|
||||
assert_eq "cbor false" (String "\xf4") (enc (Bool false));
|
||||
assert_eq "cbor true" (String "\xf5") (enc (Bool true));
|
||||
assert_eq "cbor null" (String "\xf6") (enc Nil);
|
||||
assert_eq "cbor \"\"" (String "\x60") (enc (String ""));
|
||||
assert_eq "cbor \"a\"" (String "\x61\x61") (enc (String "a"));
|
||||
assert_eq "cbor \"IETF\"" (String "\x64IETF") (enc (String "IETF"));
|
||||
assert_eq "cbor []" (String "\x80") (enc (List []));
|
||||
assert_eq "cbor [1,2,3]"
|
||||
(String "\x83\x01\x02\x03")
|
||||
(enc (List [Integer 1; Integer 2; Integer 3]));
|
||||
assert_eq "cbor [1,[2,3],[4,5]]"
|
||||
(String "\x83\x01\x82\x02\x03\x82\x04\x05")
|
||||
(enc (List [Integer 1;
|
||||
List [Integer 2; Integer 3];
|
||||
List [Integer 4; Integer 5]]));
|
||||
assert_eq "cbor {}" (String "\xa0") (enc (mkdict []));
|
||||
assert_eq "cbor {a:1,b:[2,3]}"
|
||||
(String "\xa2\x61\x61\x01\x61\x62\x82\x02\x03")
|
||||
(enc (mkdict ["a", Integer 1; "b", List [Integer 2; Integer 3]]));
|
||||
assert_eq "cbor {a..e:A..E}"
|
||||
(String "\xa5\x61\x61\x61\x41\x61\x62\x61\x42\x61\x63\x61\x43\x61\x64\x61\x44\x61\x65\x61\x45")
|
||||
(enc (mkdict ["a", String "A"; "b", String "B"; "c", String "C";
|
||||
"d", String "D"; "e", String "E"]));
|
||||
(* Determinism: insertion order + key length must not change bytes.
|
||||
Sort is length-then-bytewise → a, c, bb. *)
|
||||
let d1 = mkdict ["bb", Integer 2; "a", Integer 1; "c", Integer 3] in
|
||||
let d2 = mkdict ["c", Integer 3; "bb", Integer 2; "a", Integer 1] in
|
||||
assert_eq "cbor det order-invariant" (enc d1) (enc d2);
|
||||
assert_eq "cbor det length-then-bytewise"
|
||||
(String "\xa3\x61\x61\x01\x61\x63\x03\x62\x62\x62\x02")
|
||||
(enc d1);
|
||||
(* Round-trip: decode . encode = identity (structural). *)
|
||||
let roundtrip name v =
|
||||
assert_eq ("cbor rt " ^ name) v (call "cbor-decode" [enc v])
|
||||
in
|
||||
roundtrip "int" (Integer 42);
|
||||
roundtrip "neg" (Integer (-99999));
|
||||
roundtrip "str" (String "hello world");
|
||||
roundtrip "bool" (Bool true);
|
||||
roundtrip "nil" Nil;
|
||||
roundtrip "nested"
|
||||
(List [Integer 1; String "x"; List [Bool false; Nil]]);
|
||||
roundtrip "dict"
|
||||
(mkdict ["k", List [Integer 7]; "name", String "z"]);
|
||||
|
||||
Printf.printf "\nSuite: vm-extension-dispatch\n";
|
||||
let make_bc op = ({
|
||||
vc_arity = 0; vc_rest_arity = -1; vc_locals = 0;
|
||||
|
||||
142
hosts/ocaml/lib/sx_cbor.ml
Normal file
142
hosts/ocaml/lib/sx_cbor.ml
Normal file
@@ -0,0 +1,142 @@
|
||||
(** dag-cbor encode / decode — pure OCaml, WASM-safe.
|
||||
|
||||
RFC 8949 deterministic subset as constrained by IPLD dag-cbor
|
||||
(RFC 8742): unsigned/negative ints, text strings, arrays, maps
|
||||
with keys sorted by **length-then-bytewise**, bool, null, and
|
||||
tag 42 (CID link, decode-side passthrough). Floats are not
|
||||
supported (no fed-sx shape needs them yet) — encoding a [Number]
|
||||
or decoding a float head raises. Reference: RFC 8949 §3, §4.2. *)
|
||||
|
||||
open Sx_types
|
||||
|
||||
exception Cbor_error of string
|
||||
|
||||
(* ---- Encoder ---- *)
|
||||
|
||||
let write_head buf major v =
|
||||
let m = major lsl 5 in
|
||||
if v < 24 then
|
||||
Buffer.add_char buf (Char.chr (m lor v))
|
||||
else if v < 0x100 then begin
|
||||
Buffer.add_char buf (Char.chr (m lor 24));
|
||||
Buffer.add_char buf (Char.chr v)
|
||||
end else if v < 0x10000 then begin
|
||||
Buffer.add_char buf (Char.chr (m lor 25));
|
||||
Buffer.add_char buf (Char.chr ((v lsr 8) land 0xFF));
|
||||
Buffer.add_char buf (Char.chr (v land 0xFF))
|
||||
end else if v < 0x100000000 then begin
|
||||
Buffer.add_char buf (Char.chr (m lor 26));
|
||||
for i = 3 downto 0 do
|
||||
Buffer.add_char buf (Char.chr ((v lsr (8 * i)) land 0xFF))
|
||||
done
|
||||
end else begin
|
||||
Buffer.add_char buf (Char.chr (m lor 27));
|
||||
for i = 7 downto 0 do
|
||||
Buffer.add_char buf (Char.chr ((v lsr (8 * i)) land 0xFF))
|
||||
done
|
||||
end
|
||||
|
||||
(* dag-cbor map key order: shorter key first, then bytewise. *)
|
||||
let key_order a b =
|
||||
let la = String.length a and lb = String.length b in
|
||||
if la <> lb then compare la lb else compare a b
|
||||
|
||||
let rec encode_into buf (v : value) : unit =
|
||||
match v with
|
||||
| Integer n ->
|
||||
if n >= 0 then write_head buf 0 n
|
||||
else write_head buf 1 (-1 - n)
|
||||
| String s ->
|
||||
write_head buf 3 (String.length s);
|
||||
Buffer.add_string buf s
|
||||
| Symbol s | Keyword s ->
|
||||
write_head buf 3 (String.length s);
|
||||
Buffer.add_string buf s
|
||||
| Bool false -> Buffer.add_char buf '\xf4'
|
||||
| Bool true -> Buffer.add_char buf '\xf5'
|
||||
| Nil -> Buffer.add_char buf '\xf6'
|
||||
| List items ->
|
||||
write_head buf 4 (List.length items);
|
||||
List.iter (encode_into buf) items
|
||||
| Dict d ->
|
||||
let keys = Hashtbl.fold (fun k _ acc -> k :: acc) d [] in
|
||||
let keys = List.sort_uniq key_order keys in
|
||||
write_head buf 5 (List.length keys);
|
||||
List.iter (fun k ->
|
||||
write_head buf 3 (String.length k);
|
||||
Buffer.add_string buf k;
|
||||
encode_into buf (Hashtbl.find d k)) keys
|
||||
| Number _ ->
|
||||
raise (Cbor_error "cbor-encode: floats unsupported (dag-cbor subset)")
|
||||
| _ ->
|
||||
raise (Cbor_error
|
||||
("cbor-encode: unencodable value " ^ type_of v))
|
||||
|
||||
let encode (v : value) : string =
|
||||
let buf = Buffer.create 64 in
|
||||
encode_into buf v;
|
||||
Buffer.contents buf
|
||||
|
||||
(* ---- Decoder ---- *)
|
||||
|
||||
let decode (s : string) : value =
|
||||
let pos = ref 0 in
|
||||
let len = String.length s in
|
||||
let byte () =
|
||||
if !pos >= len then raise (Cbor_error "cbor-decode: truncated");
|
||||
let c = Char.code s.[!pos] in incr pos; c
|
||||
in
|
||||
let read_uint ai =
|
||||
if ai < 24 then ai
|
||||
else if ai = 24 then byte ()
|
||||
else if ai = 25 then let a = byte () in let b = byte () in (a lsl 8) lor b
|
||||
else if ai = 26 then begin
|
||||
let v = ref 0 in
|
||||
for _ = 0 to 3 do v := (!v lsl 8) lor byte () done; !v
|
||||
end else if ai = 27 then begin
|
||||
let v = ref 0 in
|
||||
for _ = 0 to 7 do v := (!v lsl 8) lor byte () done; !v
|
||||
end else raise (Cbor_error "cbor-decode: bad additional info")
|
||||
in
|
||||
let read_bytes n =
|
||||
if !pos + n > len then raise (Cbor_error "cbor-decode: truncated");
|
||||
let r = String.sub s !pos n in pos := !pos + n; r
|
||||
in
|
||||
let rec item () =
|
||||
let b = byte () in
|
||||
let major = b lsr 5 and ai = b land 0x1f in
|
||||
match major with
|
||||
| 0 -> Integer (read_uint ai)
|
||||
| 1 -> Integer (-1 - read_uint ai)
|
||||
| 2 -> String (read_bytes (read_uint ai))
|
||||
| 3 -> String (read_bytes (read_uint ai))
|
||||
| 4 ->
|
||||
let n = read_uint ai in
|
||||
List (List.init n (fun _ -> item ()))
|
||||
| 5 ->
|
||||
let n = read_uint ai in
|
||||
let d = make_dict () in
|
||||
for _ = 1 to n do
|
||||
let k = match item () with
|
||||
| String k -> k
|
||||
| _ -> raise (Cbor_error "cbor-decode: non-string map key")
|
||||
in
|
||||
Hashtbl.replace d k (item ())
|
||||
done;
|
||||
Dict d
|
||||
| 6 ->
|
||||
(* Tag: tag-42 CID link → pass the inner item through. *)
|
||||
ignore (read_uint ai); item ()
|
||||
| 7 ->
|
||||
(match ai with
|
||||
| 20 -> Bool false
|
||||
| 21 -> Bool true
|
||||
| 22 -> Nil
|
||||
| 23 -> Nil
|
||||
| _ ->
|
||||
raise (Cbor_error
|
||||
"cbor-decode: floats/simple unsupported (dag-cbor subset)"))
|
||||
| _ -> raise (Cbor_error "cbor-decode: bad major type")
|
||||
in
|
||||
let v = item () in
|
||||
v
|
||||
@@ -4174,4 +4174,18 @@ let () =
|
||||
register "crypto-sha3-256" (fun args ->
|
||||
match args with
|
||||
| [String s] -> String (Sx_sha3.sha3_256_hex s)
|
||||
| _ -> raise (Eval_error "crypto-sha3-256: (bytes)"))
|
||||
| _ -> raise (Eval_error "crypto-sha3-256: (bytes)"));
|
||||
|
||||
register "cbor-encode" (fun args ->
|
||||
match args with
|
||||
| [v] ->
|
||||
(try String (Sx_cbor.encode v)
|
||||
with Sx_cbor.Cbor_error m -> raise (Eval_error m))
|
||||
| _ -> raise (Eval_error "cbor-encode: (value)"));
|
||||
|
||||
register "cbor-decode" (fun args ->
|
||||
match args with
|
||||
| [String s] ->
|
||||
(try Sx_cbor.decode s
|
||||
with Sx_cbor.Cbor_error m -> raise (Eval_error m))
|
||||
| _ -> raise (Eval_error "cbor-decode: (bytes)"))
|
||||
|
||||
@@ -82,7 +82,7 @@ check** → tests → commit → tick box → Progress-log line → push.
|
||||
- Tests: sha3-256("") = `a7ffc6f8…0f8434a`; sha3-256("abc") = `3a985da7…11431532`.
|
||||
- **Acceptance:** NIST SHA-3 vectors pass; WASM links.
|
||||
|
||||
### Phase C — dag-cbor encoder + decoder, pure OCaml
|
||||
### Phase C — dag-cbor encoder + decoder, pure OCaml ✅ DONE
|
||||
- RFC 8949 deterministic subset (RFC 8742 dag-cbor): unsigned/negative ints,
|
||||
byte strings, text strings, arrays, maps with **keys sorted by
|
||||
length-then-bytewise**, bool, null, tag 42 (CID link). No floats unless a
|
||||
@@ -205,6 +205,12 @@ printf '(epoch 1)\n(crypto-sha256 "abc")\n' | \
|
||||
|
||||
_Newest first._
|
||||
|
||||
- 2026-05-18 — Phase C: pure-OCaml `lib/sx_cbor.ml` (dag-cbor encode/
|
||||
decode), primitives `cbor-encode`/`cbor-decode`. RFC 8949 Appendix-A
|
||||
vectors, length-then-bytewise key sort + order-invariance determinism,
|
||||
decode∘encode round-trip (30 tests). Floats unsupported (raise, no
|
||||
fed-sx shape needs them); tag-42 decode = inner-item passthrough.
|
||||
WASM boot green with new lib module; Erlang 530/530; run_tests +30.
|
||||
- 2026-05-18 — Phase B: pure-OCaml `lib/sx_sha3.ml` (Keccak-f[1600] +
|
||||
SHA-3 pad, domain 0x06), primitive `crypto-sha3-256`. 4 NIST FIPS 202
|
||||
vectors pass (empty/abc/896-bit + 1600-bit 0xa3 multi-block). WASM boot
|
||||
|
||||
Reference in New Issue
Block a user