fed-prims: Phase D — CIDv1 (multihash + base32 multibase), pure OCaml, canonical IPFS vectors
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 3m2s

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-18 16:36:42 +00:00
parent bcaaa11916
commit d8b57784fe
4 changed files with 111 additions and 2 deletions

View File

@@ -1399,6 +1399,29 @@ let run_foundation_tests () =
roundtrip "dict"
(mkdict ["k", List [Integer 7]; "name", String "z"]);
Printf.printf "\nSuite: cid\n";
let mh_sha256 s = Sx_cid.multihash 0x12 (Sx_cid.unhex (Sx_sha2.sha256_hex s)) in
(* Authoritative vectors (independently derived; match well-known
IPFS CIDs). raw "abc" and raw "" — codec 0x55. *)
assert_eq "cid raw abc"
(String "bafkreif2pall7dybz7vecqka3zo24irdwabwdi4wc55jznaq75q7eaavvu")
(call "cid-from-bytes" [Integer 0x55; String (mh_sha256 "abc")]);
assert_eq "cid raw empty"
(String "bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku")
(call "cid-from-bytes" [Integer 0x55; String (mh_sha256 "")]);
(* dag-cbor {} — canonical empty-map CID (sha2-256, codec 0x71). *)
assert_eq "cid dag-cbor {}"
(String "bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua")
(call "cid-from-sx" [mkdict []]);
(* Determinism: dict key insertion order must not change the CID. *)
let cda = call "cid-from-sx" [mkdict ["b", Integer 2; "a", Integer 1]] in
let cdb = call "cid-from-sx" [mkdict ["a", Integer 1; "b", Integer 2]] in
assert_eq "cid det order-invariant" cda cdb;
assert_true "cid multibase 'b' prefix"
(Bool (match call "cid-from-sx" [mkdict []] with
| String s -> String.length s > 1 && s.[0] = 'b'
| _ -> false));
Printf.printf "\nSuite: vm-extension-dispatch\n";
let make_bc op = ({
vc_arity = 0; vc_rest_arity = -1; vc_locals = 0;

66
hosts/ocaml/lib/sx_cid.ml Normal file
View File

@@ -0,0 +1,66 @@
(** CIDv1 computation — pure OCaml, WASM-safe.
Multihash + CIDv1 + multibase base32-lower (RFC 4648, no pad,
multibase prefix 'b'). Codecs: dag-cbor 0x71, raw 0x55. Hash
codes: sha2-256 0x12, sha3-256 0x16. Reference: the multiformats
specs (unsigned-varint, multihash, cid, multibase). No deps. *)
open Sx_types
(* Unsigned LEB128 (multiformats unsigned-varint). *)
let varint (n : int) : string =
let buf = Buffer.create 4 in
let n = ref n in
let cont = ref true in
while !cont do
let b = !n land 0x7f in
n := !n lsr 7;
if !n = 0 then (Buffer.add_char buf (Char.chr b); cont := false)
else Buffer.add_char buf (Char.chr (b lor 0x80))
done;
Buffer.contents buf
(* RFC 4648 base32 lowercase, no padding. *)
let b32_alpha = "abcdefghijklmnopqrstuvwxyz234567"
let base32_lower (s : string) : string =
let buf = Buffer.create ((String.length s * 8 + 4) / 5) in
let acc = ref 0 and bits = ref 0 in
String.iter (fun c ->
acc := (!acc lsl 8) lor (Char.code c);
bits := !bits + 8;
while !bits >= 5 do
bits := !bits - 5;
Buffer.add_char buf b32_alpha.[(!acc lsr !bits) land 0x1f]
done) s;
if !bits > 0 then
Buffer.add_char buf b32_alpha.[(!acc lsl (5 - !bits)) land 0x1f];
Buffer.contents buf
(* "abef" -> the 2 raw bytes. *)
let unhex (h : string) : string =
let n = String.length h / 2 in
let b = Bytes.create n in
for i = 0 to n - 1 do
Bytes.set b i
(Char.chr (int_of_string ("0x" ^ String.sub h (2 * i) 2)))
done;
Bytes.unsafe_to_string b
(* multihash = varint(code) || varint(len) || digest *)
let multihash (code : int) (digest : string) : string =
varint code ^ varint (String.length digest) ^ digest
(* CIDv1 = 0x01 || varint(codec) || multihash ; multibase 'b' base32. *)
let cidv1 (codec : int) (mh : string) : string =
"b" ^ base32_lower ("\x01" ^ varint codec ^ mh)
let codec_dag_cbor = 0x71
let mh_sha2_256 = 0x12
(* Canonicalize an SX value: dag-cbor encode -> sha2-256 ->
multihash -> CIDv1 (dag-cbor codec). *)
let cid_from_sx (v : value) : string =
let cbor = Sx_cbor.encode v in
let digest = unhex (Sx_sha2.sha256_hex cbor) in
cidv1 codec_dag_cbor (multihash mh_sha2_256 digest)

View File

@@ -4188,4 +4188,17 @@ let () =
| [String s] ->
(try Sx_cbor.decode s
with Sx_cbor.Cbor_error m -> raise (Eval_error m))
| _ -> raise (Eval_error "cbor-decode: (bytes)"))
| _ -> raise (Eval_error "cbor-decode: (bytes)"));
register "cid-from-bytes" (fun args ->
match args with
| [Integer codec; String mh] ->
String (Sx_cid.cidv1 codec mh)
| _ -> raise (Eval_error "cid-from-bytes: (codec multihash-bytes)"));
register "cid-from-sx" (fun args ->
match args with
| [v] ->
(try String (Sx_cid.cid_from_sx v)
with Sx_cbor.Cbor_error m -> raise (Eval_error m))
| _ -> raise (Eval_error "cid-from-sx: (value)"))