From bcaaa119168f7398ca28cc5deccf7cc4133d0efd Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 18 May 2026 16:10:36 +0000 Subject: [PATCH] =?UTF-8?q?fed-prims:=20Phase=20C=20=E2=80=94=20dag-cbor?= =?UTF-8?q?=20encode/decode,=20pure=20OCaml,=20RFC=208949=20vectors=20+=20?= =?UTF-8?q?determinism?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.7 (1M context) --- hosts/ocaml/bin/run_tests.ml | 63 ++++++++++++++ hosts/ocaml/lib/sx_cbor.ml | 142 +++++++++++++++++++++++++++++++ hosts/ocaml/lib/sx_primitives.ml | 16 +++- plans/fed-sx-host-primitives.md | 8 +- 4 files changed, 227 insertions(+), 2 deletions(-) create mode 100644 hosts/ocaml/lib/sx_cbor.ml diff --git a/hosts/ocaml/bin/run_tests.ml b/hosts/ocaml/bin/run_tests.ml index 80e7cb5c..d7187f90 100644 --- a/hosts/ocaml/bin/run_tests.ml +++ b/hosts/ocaml/bin/run_tests.ml @@ -1336,6 +1336,69 @@ let run_foundation_tests () = (String "79f38adec5c20307a98ef76e8324afbfd46cfd81b22e3973c65fa1bd9de31787") (call "crypto-sha3-256" [String (String.make 200 '\xa3')]); + Printf.printf "\nSuite: dag-cbor\n"; + let mkdict pairs = + let d = Sx_types.make_dict () in + List.iter (fun (k, v) -> Hashtbl.replace d k v) pairs; + Dict d + in + let enc v = call "cbor-encode" [v] in + (* RFC 8949 Appendix A — minimal-length deterministic encoding. *) + assert_eq "cbor 0" (String "\x00") (enc (Integer 0)); + assert_eq "cbor 23" (String "\x17") (enc (Integer 23)); + assert_eq "cbor 24" (String "\x18\x18") (enc (Integer 24)); + assert_eq "cbor 100" (String "\x18\x64") (enc (Integer 100)); + assert_eq "cbor 1000" (String "\x19\x03\xe8") (enc (Integer 1000)); + assert_eq "cbor 1000000" + (String "\x1a\x00\x0f\x42\x40") (enc (Integer 1000000)); + assert_eq "cbor -1" (String "\x20") (enc (Integer (-1))); + assert_eq "cbor -100" (String "\x38\x63") (enc (Integer (-100))); + assert_eq "cbor -1000" (String "\x39\x03\xe7") (enc (Integer (-1000))); + assert_eq "cbor false" (String "\xf4") (enc (Bool false)); + assert_eq "cbor true" (String "\xf5") (enc (Bool true)); + assert_eq "cbor null" (String "\xf6") (enc Nil); + assert_eq "cbor \"\"" (String "\x60") (enc (String "")); + assert_eq "cbor \"a\"" (String "\x61\x61") (enc (String "a")); + assert_eq "cbor \"IETF\"" (String "\x64IETF") (enc (String "IETF")); + assert_eq "cbor []" (String "\x80") (enc (List [])); + assert_eq "cbor [1,2,3]" + (String "\x83\x01\x02\x03") + (enc (List [Integer 1; Integer 2; Integer 3])); + assert_eq "cbor [1,[2,3],[4,5]]" + (String "\x83\x01\x82\x02\x03\x82\x04\x05") + (enc (List [Integer 1; + List [Integer 2; Integer 3]; + List [Integer 4; Integer 5]])); + assert_eq "cbor {}" (String "\xa0") (enc (mkdict [])); + assert_eq "cbor {a:1,b:[2,3]}" + (String "\xa2\x61\x61\x01\x61\x62\x82\x02\x03") + (enc (mkdict ["a", Integer 1; "b", List [Integer 2; Integer 3]])); + assert_eq "cbor {a..e:A..E}" + (String "\xa5\x61\x61\x61\x41\x61\x62\x61\x42\x61\x63\x61\x43\x61\x64\x61\x44\x61\x65\x61\x45") + (enc (mkdict ["a", String "A"; "b", String "B"; "c", String "C"; + "d", String "D"; "e", String "E"])); + (* Determinism: insertion order + key length must not change bytes. + Sort is length-then-bytewise → a, c, bb. *) + let d1 = mkdict ["bb", Integer 2; "a", Integer 1; "c", Integer 3] in + let d2 = mkdict ["c", Integer 3; "bb", Integer 2; "a", Integer 1] in + assert_eq "cbor det order-invariant" (enc d1) (enc d2); + assert_eq "cbor det length-then-bytewise" + (String "\xa3\x61\x61\x01\x61\x63\x03\x62\x62\x62\x02") + (enc d1); + (* Round-trip: decode . encode = identity (structural). *) + let roundtrip name v = + assert_eq ("cbor rt " ^ name) v (call "cbor-decode" [enc v]) + in + roundtrip "int" (Integer 42); + roundtrip "neg" (Integer (-99999)); + roundtrip "str" (String "hello world"); + roundtrip "bool" (Bool true); + roundtrip "nil" Nil; + roundtrip "nested" + (List [Integer 1; String "x"; List [Bool false; Nil]]); + roundtrip "dict" + (mkdict ["k", List [Integer 7]; "name", String "z"]); + Printf.printf "\nSuite: vm-extension-dispatch\n"; let make_bc op = ({ vc_arity = 0; vc_rest_arity = -1; vc_locals = 0; diff --git a/hosts/ocaml/lib/sx_cbor.ml b/hosts/ocaml/lib/sx_cbor.ml new file mode 100644 index 00000000..b4ec7ba1 --- /dev/null +++ b/hosts/ocaml/lib/sx_cbor.ml @@ -0,0 +1,142 @@ +(** dag-cbor encode / decode — pure OCaml, WASM-safe. + + RFC 8949 deterministic subset as constrained by IPLD dag-cbor + (RFC 8742): unsigned/negative ints, text strings, arrays, maps + with keys sorted by **length-then-bytewise**, bool, null, and + tag 42 (CID link, decode-side passthrough). Floats are not + supported (no fed-sx shape needs them yet) — encoding a [Number] + or decoding a float head raises. Reference: RFC 8949 §3, §4.2. *) + +open Sx_types + +exception Cbor_error of string + +(* ---- Encoder ---- *) + +let write_head buf major v = + let m = major lsl 5 in + if v < 24 then + Buffer.add_char buf (Char.chr (m lor v)) + else if v < 0x100 then begin + Buffer.add_char buf (Char.chr (m lor 24)); + Buffer.add_char buf (Char.chr v) + end else if v < 0x10000 then begin + Buffer.add_char buf (Char.chr (m lor 25)); + Buffer.add_char buf (Char.chr ((v lsr 8) land 0xFF)); + Buffer.add_char buf (Char.chr (v land 0xFF)) + end else if v < 0x100000000 then begin + Buffer.add_char buf (Char.chr (m lor 26)); + for i = 3 downto 0 do + Buffer.add_char buf (Char.chr ((v lsr (8 * i)) land 0xFF)) + done + end else begin + Buffer.add_char buf (Char.chr (m lor 27)); + for i = 7 downto 0 do + Buffer.add_char buf (Char.chr ((v lsr (8 * i)) land 0xFF)) + done + end + +(* dag-cbor map key order: shorter key first, then bytewise. *) +let key_order a b = + let la = String.length a and lb = String.length b in + if la <> lb then compare la lb else compare a b + +let rec encode_into buf (v : value) : unit = + match v with + | Integer n -> + if n >= 0 then write_head buf 0 n + else write_head buf 1 (-1 - n) + | String s -> + write_head buf 3 (String.length s); + Buffer.add_string buf s + | Symbol s | Keyword s -> + write_head buf 3 (String.length s); + Buffer.add_string buf s + | Bool false -> Buffer.add_char buf '\xf4' + | Bool true -> Buffer.add_char buf '\xf5' + | Nil -> Buffer.add_char buf '\xf6' + | List items -> + write_head buf 4 (List.length items); + List.iter (encode_into buf) items + | Dict d -> + let keys = Hashtbl.fold (fun k _ acc -> k :: acc) d [] in + let keys = List.sort_uniq key_order keys in + write_head buf 5 (List.length keys); + List.iter (fun k -> + write_head buf 3 (String.length k); + Buffer.add_string buf k; + encode_into buf (Hashtbl.find d k)) keys + | Number _ -> + raise (Cbor_error "cbor-encode: floats unsupported (dag-cbor subset)") + | _ -> + raise (Cbor_error + ("cbor-encode: unencodable value " ^ type_of v)) + +let encode (v : value) : string = + let buf = Buffer.create 64 in + encode_into buf v; + Buffer.contents buf + +(* ---- Decoder ---- *) + +let decode (s : string) : value = + let pos = ref 0 in + let len = String.length s in + let byte () = + if !pos >= len then raise (Cbor_error "cbor-decode: truncated"); + let c = Char.code s.[!pos] in incr pos; c + in + let read_uint ai = + if ai < 24 then ai + else if ai = 24 then byte () + else if ai = 25 then let a = byte () in let b = byte () in (a lsl 8) lor b + else if ai = 26 then begin + let v = ref 0 in + for _ = 0 to 3 do v := (!v lsl 8) lor byte () done; !v + end else if ai = 27 then begin + let v = ref 0 in + for _ = 0 to 7 do v := (!v lsl 8) lor byte () done; !v + end else raise (Cbor_error "cbor-decode: bad additional info") + in + let read_bytes n = + if !pos + n > len then raise (Cbor_error "cbor-decode: truncated"); + let r = String.sub s !pos n in pos := !pos + n; r + in + let rec item () = + let b = byte () in + let major = b lsr 5 and ai = b land 0x1f in + match major with + | 0 -> Integer (read_uint ai) + | 1 -> Integer (-1 - read_uint ai) + | 2 -> String (read_bytes (read_uint ai)) + | 3 -> String (read_bytes (read_uint ai)) + | 4 -> + let n = read_uint ai in + List (List.init n (fun _ -> item ())) + | 5 -> + let n = read_uint ai in + let d = make_dict () in + for _ = 1 to n do + let k = match item () with + | String k -> k + | _ -> raise (Cbor_error "cbor-decode: non-string map key") + in + Hashtbl.replace d k (item ()) + done; + Dict d + | 6 -> + (* Tag: tag-42 CID link → pass the inner item through. *) + ignore (read_uint ai); item () + | 7 -> + (match ai with + | 20 -> Bool false + | 21 -> Bool true + | 22 -> Nil + | 23 -> Nil + | _ -> + raise (Cbor_error + "cbor-decode: floats/simple unsupported (dag-cbor subset)")) + | _ -> raise (Cbor_error "cbor-decode: bad major type") + in + let v = item () in + v diff --git a/hosts/ocaml/lib/sx_primitives.ml b/hosts/ocaml/lib/sx_primitives.ml index 87e89b63..87f357d4 100644 --- a/hosts/ocaml/lib/sx_primitives.ml +++ b/hosts/ocaml/lib/sx_primitives.ml @@ -4174,4 +4174,18 @@ let () = register "crypto-sha3-256" (fun args -> match args with | [String s] -> String (Sx_sha3.sha3_256_hex s) - | _ -> raise (Eval_error "crypto-sha3-256: (bytes)")) + | _ -> raise (Eval_error "crypto-sha3-256: (bytes)")); + + register "cbor-encode" (fun args -> + match args with + | [v] -> + (try String (Sx_cbor.encode v) + with Sx_cbor.Cbor_error m -> raise (Eval_error m)) + | _ -> raise (Eval_error "cbor-encode: (value)")); + + register "cbor-decode" (fun args -> + match args with + | [String s] -> + (try Sx_cbor.decode s + with Sx_cbor.Cbor_error m -> raise (Eval_error m)) + | _ -> raise (Eval_error "cbor-decode: (bytes)")) diff --git a/plans/fed-sx-host-primitives.md b/plans/fed-sx-host-primitives.md index ddb0687f..ead8eb0d 100644 --- a/plans/fed-sx-host-primitives.md +++ b/plans/fed-sx-host-primitives.md @@ -82,7 +82,7 @@ check** → tests → commit → tick box → Progress-log line → push. - Tests: sha3-256("") = `a7ffc6f8…0f8434a`; sha3-256("abc") = `3a985da7…11431532`. - **Acceptance:** NIST SHA-3 vectors pass; WASM links. -### Phase C — dag-cbor encoder + decoder, pure OCaml +### Phase C — dag-cbor encoder + decoder, pure OCaml ✅ DONE - RFC 8949 deterministic subset (RFC 8742 dag-cbor): unsigned/negative ints, byte strings, text strings, arrays, maps with **keys sorted by length-then-bytewise**, bool, null, tag 42 (CID link). No floats unless a @@ -205,6 +205,12 @@ printf '(epoch 1)\n(crypto-sha256 "abc")\n' | \ _Newest first._ +- 2026-05-18 — Phase C: pure-OCaml `lib/sx_cbor.ml` (dag-cbor encode/ + decode), primitives `cbor-encode`/`cbor-decode`. RFC 8949 Appendix-A + vectors, length-then-bytewise key sort + order-invariance determinism, + decode∘encode round-trip (30 tests). Floats unsupported (raise, no + fed-sx shape needs them); tag-42 decode = inner-item passthrough. + WASM boot green with new lib module; Erlang 530/530; run_tests +30. - 2026-05-18 — Phase B: pure-OCaml `lib/sx_sha3.ml` (Keccak-f[1600] + SHA-3 pad, domain 0x06), primitive `crypto-sha3-256`. 4 NIST FIPS 202 vectors pass (empty/abc/896-bit + 1600-bit 0xa3 multi-block). WASM boot