The kernel's sha2/cbor/cid/ed25519 modules were labelled 'WASM-safe' but assumed 63-bit native int. On the web targets — js_of_ocaml (32-bit int) and wasm_of_ocaml (31-bit int) — they truncated, producing wrong digests/CIDs and a Char.chr crash at kernel INIT (ed25519 precomputes sqrtm1 + base_point at module load, driving the base-2^26 bignum). This is why a freshly-built browser kernel crashed on boot while the stale committed artifact (older toolchain) still ran. Fixes (all verified bit-identical to the 63-bit native build, conformance 271/271): - sx_sha2: SHA-256 round words via Int32 (were native int + land 0xFFFFFFFF, which is a no-op on 31-bit and overflows the constants); both SHA-256/512 length-encoding via Int64 shifts (native "lsr 32" is shift-mod-32 on js, which leaked the length byte into a higher word). NIST vectors pass native/js/wasm. - sx_cbor: write_head width selection + byte emission via Int64 (the 0x100000000 literal truncated to 0 on js, sending small ints to the 8-byte branch; and "v lsr (8*i)" with i>=4 was shift-mod-32). - sx_cid: base32_lower keeps acc bounded to the unconsumed low bits (it grew 8 bits/byte and overflowed). cid_from_sx now matches native<->js exactly. - sx_ed25519: bignum mul accumulates in Int64 (26x26=52-bit products overflow); div_small running remainder in Int64 (rem<<26 ~= 2^34). This was the boot gate — the browser kernel now boots (SxKernel live, crypto-sha256 correct on js). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
71 lines
2.4 KiB
OCaml
71 lines
2.4 KiB
OCaml
(** CIDv1 computation — pure OCaml, WASM-safe.
|
|
|
|
Multihash + CIDv1 + multibase base32-lower (RFC 4648, no pad,
|
|
multibase prefix 'b'). Codecs: dag-cbor 0x71, raw 0x55. Hash
|
|
codes: sha2-256 0x12, sha3-256 0x16. Reference: the multiformats
|
|
specs (unsigned-varint, multihash, cid, multibase). No deps. *)
|
|
|
|
open Sx_types
|
|
|
|
(* Unsigned LEB128 (multiformats unsigned-varint). *)
|
|
let varint (n : int) : string =
|
|
let buf = Buffer.create 4 in
|
|
let n = ref n in
|
|
let cont = ref true in
|
|
while !cont do
|
|
let b = !n land 0x7f in
|
|
n := !n lsr 7;
|
|
if !n = 0 then (Buffer.add_char buf (Char.chr b); cont := false)
|
|
else Buffer.add_char buf (Char.chr (b lor 0x80))
|
|
done;
|
|
Buffer.contents buf
|
|
|
|
(* RFC 4648 base32 lowercase, no padding. *)
|
|
let b32_alpha = "abcdefghijklmnopqrstuvwxyz234567"
|
|
|
|
let base32_lower (s : string) : string =
|
|
let buf = Buffer.create ((String.length s * 8 + 4) / 5) in
|
|
let acc = ref 0 and bits = ref 0 in
|
|
String.iter (fun c ->
|
|
acc := (!acc lsl 8) lor (Char.code c);
|
|
bits := !bits + 8;
|
|
while !bits >= 5 do
|
|
bits := !bits - 5;
|
|
Buffer.add_char buf b32_alpha.[(!acc lsr !bits) land 0x1f]
|
|
done;
|
|
(* Keep only the unconsumed low [bits] bits, so [acc] stays tiny (< 2^13).
|
|
Without this it grows by 8 bits per byte and overflows native [int] on
|
|
the 32-bit web targets, corrupting the emitted symbols. *)
|
|
acc := !acc land ((1 lsl !bits) - 1)) s;
|
|
if !bits > 0 then
|
|
Buffer.add_char buf b32_alpha.[(!acc lsl (5 - !bits)) land 0x1f];
|
|
Buffer.contents buf
|
|
|
|
(* "abef" -> the 2 raw bytes. *)
|
|
let unhex (h : string) : string =
|
|
let n = String.length h / 2 in
|
|
let b = Bytes.create n in
|
|
for i = 0 to n - 1 do
|
|
Bytes.set b i
|
|
(Char.chr (int_of_string ("0x" ^ String.sub h (2 * i) 2)))
|
|
done;
|
|
Bytes.unsafe_to_string b
|
|
|
|
(* multihash = varint(code) || varint(len) || digest *)
|
|
let multihash (code : int) (digest : string) : string =
|
|
varint code ^ varint (String.length digest) ^ digest
|
|
|
|
(* CIDv1 = 0x01 || varint(codec) || multihash ; multibase 'b' base32. *)
|
|
let cidv1 (codec : int) (mh : string) : string =
|
|
"b" ^ base32_lower ("\x01" ^ varint codec ^ mh)
|
|
|
|
let codec_dag_cbor = 0x71
|
|
let mh_sha2_256 = 0x12
|
|
|
|
(* Canonicalize an SX value: dag-cbor encode -> sha2-256 ->
|
|
multihash -> CIDv1 (dag-cbor codec). *)
|
|
let cid_from_sx (v : value) : string =
|
|
let cbor = Sx_cbor.encode v in
|
|
let digest = unhex (Sx_sha2.sha256_hex cbor) in
|
|
cidv1 codec_dag_cbor (multihash mh_sha2_256 digest)
|