diff --git a/hosts/ocaml/bin/run_tests.ml b/hosts/ocaml/bin/run_tests.ml index d7187f90..14ecb408 100644 --- a/hosts/ocaml/bin/run_tests.ml +++ b/hosts/ocaml/bin/run_tests.ml @@ -1399,6 +1399,29 @@ let run_foundation_tests () = roundtrip "dict" (mkdict ["k", List [Integer 7]; "name", String "z"]); + Printf.printf "\nSuite: cid\n"; + let mh_sha256 s = Sx_cid.multihash 0x12 (Sx_cid.unhex (Sx_sha2.sha256_hex s)) in + (* Authoritative vectors (independently derived; match well-known + IPFS CIDs). raw "abc" and raw "" — codec 0x55. *) + assert_eq "cid raw abc" + (String "bafkreif2pall7dybz7vecqka3zo24irdwabwdi4wc55jznaq75q7eaavvu") + (call "cid-from-bytes" [Integer 0x55; String (mh_sha256 "abc")]); + assert_eq "cid raw empty" + (String "bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku") + (call "cid-from-bytes" [Integer 0x55; String (mh_sha256 "")]); + (* dag-cbor {} — canonical empty-map CID (sha2-256, codec 0x71). *) + assert_eq "cid dag-cbor {}" + (String "bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua") + (call "cid-from-sx" [mkdict []]); + (* Determinism: dict key insertion order must not change the CID. *) + let cda = call "cid-from-sx" [mkdict ["b", Integer 2; "a", Integer 1]] in + let cdb = call "cid-from-sx" [mkdict ["a", Integer 1; "b", Integer 2]] in + assert_eq "cid det order-invariant" cda cdb; + assert_true "cid multibase 'b' prefix" + (Bool (match call "cid-from-sx" [mkdict []] with + | String s -> String.length s > 1 && s.[0] = 'b' + | _ -> false)); + Printf.printf "\nSuite: vm-extension-dispatch\n"; let make_bc op = ({ vc_arity = 0; vc_rest_arity = -1; vc_locals = 0; diff --git a/hosts/ocaml/lib/sx_cid.ml b/hosts/ocaml/lib/sx_cid.ml new file mode 100644 index 00000000..380fef01 --- /dev/null +++ b/hosts/ocaml/lib/sx_cid.ml @@ -0,0 +1,66 @@ +(** CIDv1 computation — pure OCaml, WASM-safe. + + Multihash + CIDv1 + multibase base32-lower (RFC 4648, no pad, + multibase prefix 'b'). Codecs: dag-cbor 0x71, raw 0x55. Hash + codes: sha2-256 0x12, sha3-256 0x16. Reference: the multiformats + specs (unsigned-varint, multihash, cid, multibase). No deps. *) + +open Sx_types + +(* Unsigned LEB128 (multiformats unsigned-varint). *) +let varint (n : int) : string = + let buf = Buffer.create 4 in + let n = ref n in + let cont = ref true in + while !cont do + let b = !n land 0x7f in + n := !n lsr 7; + if !n = 0 then (Buffer.add_char buf (Char.chr b); cont := false) + else Buffer.add_char buf (Char.chr (b lor 0x80)) + done; + Buffer.contents buf + +(* RFC 4648 base32 lowercase, no padding. *) +let b32_alpha = "abcdefghijklmnopqrstuvwxyz234567" + +let base32_lower (s : string) : string = + let buf = Buffer.create ((String.length s * 8 + 4) / 5) in + let acc = ref 0 and bits = ref 0 in + String.iter (fun c -> + acc := (!acc lsl 8) lor (Char.code c); + bits := !bits + 8; + while !bits >= 5 do + bits := !bits - 5; + Buffer.add_char buf b32_alpha.[(!acc lsr !bits) land 0x1f] + done) s; + if !bits > 0 then + Buffer.add_char buf b32_alpha.[(!acc lsl (5 - !bits)) land 0x1f]; + Buffer.contents buf + +(* "abef" -> the 2 raw bytes. *) +let unhex (h : string) : string = + let n = String.length h / 2 in + let b = Bytes.create n in + for i = 0 to n - 1 do + Bytes.set b i + (Char.chr (int_of_string ("0x" ^ String.sub h (2 * i) 2))) + done; + Bytes.unsafe_to_string b + +(* multihash = varint(code) || varint(len) || digest *) +let multihash (code : int) (digest : string) : string = + varint code ^ varint (String.length digest) ^ digest + +(* CIDv1 = 0x01 || varint(codec) || multihash ; multibase 'b' base32. *) +let cidv1 (codec : int) (mh : string) : string = + "b" ^ base32_lower ("\x01" ^ varint codec ^ mh) + +let codec_dag_cbor = 0x71 +let mh_sha2_256 = 0x12 + +(* Canonicalize an SX value: dag-cbor encode -> sha2-256 -> + multihash -> CIDv1 (dag-cbor codec). *) +let cid_from_sx (v : value) : string = + let cbor = Sx_cbor.encode v in + let digest = unhex (Sx_sha2.sha256_hex cbor) in + cidv1 codec_dag_cbor (multihash mh_sha2_256 digest) diff --git a/hosts/ocaml/lib/sx_primitives.ml b/hosts/ocaml/lib/sx_primitives.ml index 87f357d4..a05643d4 100644 --- a/hosts/ocaml/lib/sx_primitives.ml +++ b/hosts/ocaml/lib/sx_primitives.ml @@ -4188,4 +4188,17 @@ let () = | [String s] -> (try Sx_cbor.decode s with Sx_cbor.Cbor_error m -> raise (Eval_error m)) - | _ -> raise (Eval_error "cbor-decode: (bytes)")) + | _ -> raise (Eval_error "cbor-decode: (bytes)")); + + register "cid-from-bytes" (fun args -> + match args with + | [Integer codec; String mh] -> + String (Sx_cid.cidv1 codec mh) + | _ -> raise (Eval_error "cid-from-bytes: (codec multihash-bytes)")); + + register "cid-from-sx" (fun args -> + match args with + | [v] -> + (try String (Sx_cid.cid_from_sx v) + with Sx_cbor.Cbor_error m -> raise (Eval_error m)) + | _ -> raise (Eval_error "cid-from-sx: (value)")) diff --git a/plans/fed-sx-host-primitives.md b/plans/fed-sx-host-primitives.md index ead8eb0d..33a4c76d 100644 --- a/plans/fed-sx-host-primitives.md +++ b/plans/fed-sx-host-primitives.md @@ -93,7 +93,7 @@ check** → tests → commit → tick box → Progress-log line → push. appendix-A vectors + a "reordered dict keys → identical bytes" determinism test. - **Acceptance:** vectors + round-trip + determinism pass; WASM links. -### Phase D — CID computation, pure OCaml +### Phase D — CID computation, pure OCaml ✅ DONE - Multihash (sha2-256 = 0x12, sha3-256 = 0x16; varint code + varint len + digest). - CIDv1 = `0x01 || codec-varint || multihash`. Codecs: dag-cbor 0x71, raw 0x55. - Multibase base32 lower (`b` prefix, RFC 4648 no-pad). @@ -205,6 +205,13 @@ printf '(epoch 1)\n(crypto-sha256 "abc")\n' | \ _Newest first._ +- 2026-05-18 — Phase D: pure-OCaml `lib/sx_cid.ml` (unsigned-varint, + multihash, CIDv1, multibase base32-lower), primitives `cid-from-bytes` + / `cid-from-sx` (cbor→sha2-256→mh→cidv1, dag-cbor codec 0x71). 5 tests: + raw "abc"=bafkreif2pall7d…, raw ""=bafkreihdwdcefg…, dag-cbor {}= + bafyreigbtj4x7i… (all match canonical IPFS CIDs; no `ipfs` CLI so + vectors independently derived in Python), key-order determinism. WASM + boot green with new lib module; Erlang 530/530; run_tests +5. - 2026-05-18 — Phase C: pure-OCaml `lib/sx_cbor.ml` (dag-cbor encode/ decode), primitives `cbor-encode`/`cbor-decode`. RFC 8949 Appendix-A vectors, length-then-bytewise key sort + order-invariance determinism,