Files
rose-ash/hosts/ocaml/lib/sx_cid.ml
giles fce9e0c617 kernel: make the crypto/content-addressing stack actually WASM-safe (32-bit ints)
The kernel's sha2/cbor/cid/ed25519 modules were labelled 'WASM-safe' but assumed
63-bit native int. On the web targets — js_of_ocaml (32-bit int) and
wasm_of_ocaml (31-bit int) — they truncated, producing wrong digests/CIDs and a
Char.chr crash at kernel INIT (ed25519 precomputes sqrtm1 + base_point at module
load, driving the base-2^26 bignum). This is why a freshly-built browser kernel
crashed on boot while the stale committed artifact (older toolchain) still ran.

Fixes (all verified bit-identical to the 63-bit native build, conformance 271/271):
- sx_sha2: SHA-256 round words via Int32 (were native int + land 0xFFFFFFFF,
  which is a no-op on 31-bit and overflows the constants); both SHA-256/512
  length-encoding via Int64 shifts (native "lsr 32" is shift-mod-32 on js, which
  leaked the length byte into a higher word). NIST vectors pass native/js/wasm.
- sx_cbor: write_head width selection + byte emission via Int64 (the 0x100000000
  literal truncated to 0 on js, sending small ints to the 8-byte branch; and
  "v lsr (8*i)" with i>=4 was shift-mod-32).
- sx_cid: base32_lower keeps acc bounded to the unconsumed low bits (it grew 8
  bits/byte and overflowed). cid_from_sx now matches native<->js exactly.
- sx_ed25519: bignum mul accumulates in Int64 (26x26=52-bit products overflow);
  div_small running remainder in Int64 (rem<<26 ~= 2^34). This was the boot gate
  — the browser kernel now boots (SxKernel live, crypto-sha256 correct on js).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 07:51:50 +00:00

71 lines
2.4 KiB
OCaml

(** CIDv1 computation — pure OCaml, WASM-safe.
Multihash + CIDv1 + multibase base32-lower (RFC 4648, no pad,
multibase prefix 'b'). Codecs: dag-cbor 0x71, raw 0x55. Hash
codes: sha2-256 0x12, sha3-256 0x16. Reference: the multiformats
specs (unsigned-varint, multihash, cid, multibase). No deps. *)
open Sx_types
(* Unsigned LEB128 (multiformats unsigned-varint). *)
let varint (n : int) : string =
let buf = Buffer.create 4 in
let n = ref n in
let cont = ref true in
while !cont do
let b = !n land 0x7f in
n := !n lsr 7;
if !n = 0 then (Buffer.add_char buf (Char.chr b); cont := false)
else Buffer.add_char buf (Char.chr (b lor 0x80))
done;
Buffer.contents buf
(* RFC 4648 base32 lowercase, no padding. *)
let b32_alpha = "abcdefghijklmnopqrstuvwxyz234567"
let base32_lower (s : string) : string =
let buf = Buffer.create ((String.length s * 8 + 4) / 5) in
let acc = ref 0 and bits = ref 0 in
String.iter (fun c ->
acc := (!acc lsl 8) lor (Char.code c);
bits := !bits + 8;
while !bits >= 5 do
bits := !bits - 5;
Buffer.add_char buf b32_alpha.[(!acc lsr !bits) land 0x1f]
done;
(* Keep only the unconsumed low [bits] bits, so [acc] stays tiny (< 2^13).
Without this it grows by 8 bits per byte and overflows native [int] on
the 32-bit web targets, corrupting the emitted symbols. *)
acc := !acc land ((1 lsl !bits) - 1)) s;
if !bits > 0 then
Buffer.add_char buf b32_alpha.[(!acc lsl (5 - !bits)) land 0x1f];
Buffer.contents buf
(* "abef" -> the 2 raw bytes. *)
let unhex (h : string) : string =
let n = String.length h / 2 in
let b = Bytes.create n in
for i = 0 to n - 1 do
Bytes.set b i
(Char.chr (int_of_string ("0x" ^ String.sub h (2 * i) 2)))
done;
Bytes.unsafe_to_string b
(* multihash = varint(code) || varint(len) || digest *)
let multihash (code : int) (digest : string) : string =
varint code ^ varint (String.length digest) ^ digest
(* CIDv1 = 0x01 || varint(codec) || multihash ; multibase 'b' base32. *)
let cidv1 (codec : int) (mh : string) : string =
"b" ^ base32_lower ("\x01" ^ varint codec ^ mh)
let codec_dag_cbor = 0x71
let mh_sha2_256 = 0x12
(* Canonicalize an SX value: dag-cbor encode -> sha2-256 ->
multihash -> CIDv1 (dag-cbor codec). *)
let cid_from_sx (v : value) : string =
let cbor = Sx_cbor.encode v in
let digest = unhex (Sx_sha2.sha256_hex cbor) in
cidv1 codec_dag_cbor (multihash mh_sha2_256 digest)