fed-prims: Phase D — CIDv1 (multihash + base32 multibase), pure OCaml, canonical IPFS vectors
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 3m2s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 3m2s
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1399,6 +1399,29 @@ let run_foundation_tests () =
|
|||||||
roundtrip "dict"
|
roundtrip "dict"
|
||||||
(mkdict ["k", List [Integer 7]; "name", String "z"]);
|
(mkdict ["k", List [Integer 7]; "name", String "z"]);
|
||||||
|
|
||||||
|
Printf.printf "\nSuite: cid\n";
|
||||||
|
let mh_sha256 s = Sx_cid.multihash 0x12 (Sx_cid.unhex (Sx_sha2.sha256_hex s)) in
|
||||||
|
(* Authoritative vectors (independently derived; match well-known
|
||||||
|
IPFS CIDs). raw "abc" and raw "" — codec 0x55. *)
|
||||||
|
assert_eq "cid raw abc"
|
||||||
|
(String "bafkreif2pall7dybz7vecqka3zo24irdwabwdi4wc55jznaq75q7eaavvu")
|
||||||
|
(call "cid-from-bytes" [Integer 0x55; String (mh_sha256 "abc")]);
|
||||||
|
assert_eq "cid raw empty"
|
||||||
|
(String "bafkreihdwdcefgh4dqkjv67uzcmw7ojee6xedzdetojuzjevtenxquvyku")
|
||||||
|
(call "cid-from-bytes" [Integer 0x55; String (mh_sha256 "")]);
|
||||||
|
(* dag-cbor {} — canonical empty-map CID (sha2-256, codec 0x71). *)
|
||||||
|
assert_eq "cid dag-cbor {}"
|
||||||
|
(String "bafyreigbtj4x7ip5legnfznufuopl4sg4knzc2cof6duas4b3q2fy6swua")
|
||||||
|
(call "cid-from-sx" [mkdict []]);
|
||||||
|
(* Determinism: dict key insertion order must not change the CID. *)
|
||||||
|
let cda = call "cid-from-sx" [mkdict ["b", Integer 2; "a", Integer 1]] in
|
||||||
|
let cdb = call "cid-from-sx" [mkdict ["a", Integer 1; "b", Integer 2]] in
|
||||||
|
assert_eq "cid det order-invariant" cda cdb;
|
||||||
|
assert_true "cid multibase 'b' prefix"
|
||||||
|
(Bool (match call "cid-from-sx" [mkdict []] with
|
||||||
|
| String s -> String.length s > 1 && s.[0] = 'b'
|
||||||
|
| _ -> false));
|
||||||
|
|
||||||
Printf.printf "\nSuite: vm-extension-dispatch\n";
|
Printf.printf "\nSuite: vm-extension-dispatch\n";
|
||||||
let make_bc op = ({
|
let make_bc op = ({
|
||||||
vc_arity = 0; vc_rest_arity = -1; vc_locals = 0;
|
vc_arity = 0; vc_rest_arity = -1; vc_locals = 0;
|
||||||
|
|||||||
66
hosts/ocaml/lib/sx_cid.ml
Normal file
66
hosts/ocaml/lib/sx_cid.ml
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
(** CIDv1 computation — pure OCaml, WASM-safe.
|
||||||
|
|
||||||
|
Multihash + CIDv1 + multibase base32-lower (RFC 4648, no pad,
|
||||||
|
multibase prefix 'b'). Codecs: dag-cbor 0x71, raw 0x55. Hash
|
||||||
|
codes: sha2-256 0x12, sha3-256 0x16. Reference: the multiformats
|
||||||
|
specs (unsigned-varint, multihash, cid, multibase). No deps. *)
|
||||||
|
|
||||||
|
open Sx_types
|
||||||
|
|
||||||
|
(* Unsigned LEB128 (multiformats unsigned-varint). *)
|
||||||
|
let varint (n : int) : string =
|
||||||
|
let buf = Buffer.create 4 in
|
||||||
|
let n = ref n in
|
||||||
|
let cont = ref true in
|
||||||
|
while !cont do
|
||||||
|
let b = !n land 0x7f in
|
||||||
|
n := !n lsr 7;
|
||||||
|
if !n = 0 then (Buffer.add_char buf (Char.chr b); cont := false)
|
||||||
|
else Buffer.add_char buf (Char.chr (b lor 0x80))
|
||||||
|
done;
|
||||||
|
Buffer.contents buf
|
||||||
|
|
||||||
|
(* RFC 4648 base32 lowercase, no padding. *)
|
||||||
|
let b32_alpha = "abcdefghijklmnopqrstuvwxyz234567"
|
||||||
|
|
||||||
|
let base32_lower (s : string) : string =
|
||||||
|
let buf = Buffer.create ((String.length s * 8 + 4) / 5) in
|
||||||
|
let acc = ref 0 and bits = ref 0 in
|
||||||
|
String.iter (fun c ->
|
||||||
|
acc := (!acc lsl 8) lor (Char.code c);
|
||||||
|
bits := !bits + 8;
|
||||||
|
while !bits >= 5 do
|
||||||
|
bits := !bits - 5;
|
||||||
|
Buffer.add_char buf b32_alpha.[(!acc lsr !bits) land 0x1f]
|
||||||
|
done) s;
|
||||||
|
if !bits > 0 then
|
||||||
|
Buffer.add_char buf b32_alpha.[(!acc lsl (5 - !bits)) land 0x1f];
|
||||||
|
Buffer.contents buf
|
||||||
|
|
||||||
|
(* "abef" -> the 2 raw bytes. *)
|
||||||
|
let unhex (h : string) : string =
|
||||||
|
let n = String.length h / 2 in
|
||||||
|
let b = Bytes.create n in
|
||||||
|
for i = 0 to n - 1 do
|
||||||
|
Bytes.set b i
|
||||||
|
(Char.chr (int_of_string ("0x" ^ String.sub h (2 * i) 2)))
|
||||||
|
done;
|
||||||
|
Bytes.unsafe_to_string b
|
||||||
|
|
||||||
|
(* multihash = varint(code) || varint(len) || digest *)
|
||||||
|
let multihash (code : int) (digest : string) : string =
|
||||||
|
varint code ^ varint (String.length digest) ^ digest
|
||||||
|
|
||||||
|
(* CIDv1 = 0x01 || varint(codec) || multihash ; multibase 'b' base32. *)
|
||||||
|
let cidv1 (codec : int) (mh : string) : string =
|
||||||
|
"b" ^ base32_lower ("\x01" ^ varint codec ^ mh)
|
||||||
|
|
||||||
|
let codec_dag_cbor = 0x71
|
||||||
|
let mh_sha2_256 = 0x12
|
||||||
|
|
||||||
|
(* Canonicalize an SX value: dag-cbor encode -> sha2-256 ->
|
||||||
|
multihash -> CIDv1 (dag-cbor codec). *)
|
||||||
|
let cid_from_sx (v : value) : string =
|
||||||
|
let cbor = Sx_cbor.encode v in
|
||||||
|
let digest = unhex (Sx_sha2.sha256_hex cbor) in
|
||||||
|
cidv1 codec_dag_cbor (multihash mh_sha2_256 digest)
|
||||||
@@ -4188,4 +4188,17 @@ let () =
|
|||||||
| [String s] ->
|
| [String s] ->
|
||||||
(try Sx_cbor.decode s
|
(try Sx_cbor.decode s
|
||||||
with Sx_cbor.Cbor_error m -> raise (Eval_error m))
|
with Sx_cbor.Cbor_error m -> raise (Eval_error m))
|
||||||
| _ -> raise (Eval_error "cbor-decode: (bytes)"))
|
| _ -> raise (Eval_error "cbor-decode: (bytes)"));
|
||||||
|
|
||||||
|
register "cid-from-bytes" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Integer codec; String mh] ->
|
||||||
|
String (Sx_cid.cidv1 codec mh)
|
||||||
|
| _ -> raise (Eval_error "cid-from-bytes: (codec multihash-bytes)"));
|
||||||
|
|
||||||
|
register "cid-from-sx" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [v] ->
|
||||||
|
(try String (Sx_cid.cid_from_sx v)
|
||||||
|
with Sx_cbor.Cbor_error m -> raise (Eval_error m))
|
||||||
|
| _ -> raise (Eval_error "cid-from-sx: (value)"))
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ check** → tests → commit → tick box → Progress-log line → push.
|
|||||||
appendix-A vectors + a "reordered dict keys → identical bytes" determinism test.
|
appendix-A vectors + a "reordered dict keys → identical bytes" determinism test.
|
||||||
- **Acceptance:** vectors + round-trip + determinism pass; WASM links.
|
- **Acceptance:** vectors + round-trip + determinism pass; WASM links.
|
||||||
|
|
||||||
### Phase D — CID computation, pure OCaml
|
### Phase D — CID computation, pure OCaml ✅ DONE
|
||||||
- Multihash (sha2-256 = 0x12, sha3-256 = 0x16; varint code + varint len + digest).
|
- Multihash (sha2-256 = 0x12, sha3-256 = 0x16; varint code + varint len + digest).
|
||||||
- CIDv1 = `0x01 || codec-varint || multihash`. Codecs: dag-cbor 0x71, raw 0x55.
|
- CIDv1 = `0x01 || codec-varint || multihash`. Codecs: dag-cbor 0x71, raw 0x55.
|
||||||
- Multibase base32 lower (`b` prefix, RFC 4648 no-pad).
|
- Multibase base32 lower (`b` prefix, RFC 4648 no-pad).
|
||||||
@@ -205,6 +205,13 @@ printf '(epoch 1)\n(crypto-sha256 "abc")\n' | \
|
|||||||
|
|
||||||
_Newest first._
|
_Newest first._
|
||||||
|
|
||||||
|
- 2026-05-18 — Phase D: pure-OCaml `lib/sx_cid.ml` (unsigned-varint,
|
||||||
|
multihash, CIDv1, multibase base32-lower), primitives `cid-from-bytes`
|
||||||
|
/ `cid-from-sx` (cbor→sha2-256→mh→cidv1, dag-cbor codec 0x71). 5 tests:
|
||||||
|
raw "abc"=bafkreif2pall7d…, raw ""=bafkreihdwdcefg…, dag-cbor {}=
|
||||||
|
bafyreigbtj4x7i… (all match canonical IPFS CIDs; no `ipfs` CLI so
|
||||||
|
vectors independently derived in Python), key-order determinism. WASM
|
||||||
|
boot green with new lib module; Erlang 530/530; run_tests +5.
|
||||||
- 2026-05-18 — Phase C: pure-OCaml `lib/sx_cbor.ml` (dag-cbor encode/
|
- 2026-05-18 — Phase C: pure-OCaml `lib/sx_cbor.ml` (dag-cbor encode/
|
||||||
decode), primitives `cbor-encode`/`cbor-decode`. RFC 8949 Appendix-A
|
decode), primitives `cbor-encode`/`cbor-decode`. RFC 8949 Appendix-A
|
||||||
vectors, length-then-bytewise key sort + order-invariance determinism,
|
vectors, length-then-bytewise key sort + order-invariance determinism,
|
||||||
|
|||||||
Reference in New Issue
Block a user