Files
rose-ash/hosts/ocaml/lib/sx_cbor.ml
giles fce9e0c617 kernel: make the crypto/content-addressing stack actually WASM-safe (32-bit ints)
The kernel's sha2/cbor/cid/ed25519 modules were labelled 'WASM-safe' but assumed
63-bit native int. On the web targets — js_of_ocaml (32-bit int) and
wasm_of_ocaml (31-bit int) — they truncated, producing wrong digests/CIDs and a
Char.chr crash at kernel INIT (ed25519 precomputes sqrtm1 + base_point at module
load, driving the base-2^26 bignum). This is why a freshly-built browser kernel
crashed on boot while the stale committed artifact (older toolchain) still ran.

Fixes (all verified bit-identical to the 63-bit native build, conformance 271/271):
- sx_sha2: SHA-256 round words via Int32 (were native int + land 0xFFFFFFFF,
  which is a no-op on 31-bit and overflows the constants); both SHA-256/512
  length-encoding via Int64 shifts (native "lsr 32" is shift-mod-32 on js, which
  leaked the length byte into a higher word). NIST vectors pass native/js/wasm.
- sx_cbor: write_head width selection + byte emission via Int64 (the 0x100000000
  literal truncated to 0 on js, sending small ints to the 8-byte branch; and
  "v lsr (8*i)" with i>=4 was shift-mod-32).
- sx_cid: base32_lower keeps acc bounded to the unconsumed low bits (it grew 8
  bits/byte and overflowed). cid_from_sx now matches native<->js exactly.
- sx_ed25519: bignum mul accumulates in Int64 (26x26=52-bit products overflow);
  div_small running remainder in Int64 (rem<<26 ~= 2^34). This was the boot gate
  — the browser kernel now boots (SxKernel live, crypto-sha256 correct on js).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-29 07:51:50 +00:00

147 lines
4.8 KiB
OCaml

(** dag-cbor encode / decode — pure OCaml, WASM-safe.
RFC 8949 deterministic subset as constrained by IPLD dag-cbor
(RFC 8742): unsigned/negative ints, text strings, arrays, maps
with keys sorted by **length-then-bytewise**, bool, null, and
tag 42 (CID link, decode-side passthrough). Floats are not
supported (no fed-sx shape needs them yet) — encoding a [Number]
or decoding a float head raises. Reference: RFC 8949 §3, §4.2. *)
open Sx_types
exception Cbor_error of string
(* ---- Encoder ---- *)
let write_head buf major v =
let m = major lsl 5 in
(* Width selection + big-endian byte emission via Int64, so the web targets
compute identically to native: on js_of_ocaml [int] is 32-bit, so the
literal 0x100000000 (2^32) truncates to 0 (sending small values to the
8-byte branch) and [v lsr (8*i)] with i>=4 is shift-mod-32. Int64 has the
full 64-bit width and well-defined shifts on every target. *)
let v64 = Int64.of_int v in
let put_be nbytes =
for i = nbytes - 1 downto 0 do
Buffer.add_char buf
(Char.chr (Int64.to_int
(Int64.logand (Int64.shift_right_logical v64 (8 * i)) 0xFFL)))
done
in
if v < 24 then
Buffer.add_char buf (Char.chr (m lor v))
else if v < 0x100 then begin
Buffer.add_char buf (Char.chr (m lor 24)); put_be 1
end else if v < 0x10000 then begin
Buffer.add_char buf (Char.chr (m lor 25)); put_be 2
end else if Int64.compare v64 0x100000000L < 0 then begin
Buffer.add_char buf (Char.chr (m lor 26)); put_be 4
end else begin
Buffer.add_char buf (Char.chr (m lor 27)); put_be 8
end
(* dag-cbor map key order: shorter key first, then bytewise. *)
let key_order a b =
let la = String.length a and lb = String.length b in
if la <> lb then compare la lb else compare a b
let rec encode_into buf (v : value) : unit =
match v with
| Integer n ->
if n >= 0 then write_head buf 0 n
else write_head buf 1 (-1 - n)
| String s ->
write_head buf 3 (String.length s);
Buffer.add_string buf s
| Symbol s | Keyword s ->
write_head buf 3 (String.length s);
Buffer.add_string buf s
| Bool false -> Buffer.add_char buf '\xf4'
| Bool true -> Buffer.add_char buf '\xf5'
| Nil -> Buffer.add_char buf '\xf6'
| List items ->
write_head buf 4 (List.length items);
List.iter (encode_into buf) items
| Dict d ->
let keys = Hashtbl.fold (fun k _ acc -> k :: acc) d [] in
let keys = List.sort_uniq key_order keys in
write_head buf 5 (List.length keys);
List.iter (fun k ->
write_head buf 3 (String.length k);
Buffer.add_string buf k;
encode_into buf (Hashtbl.find d k)) keys
| Number _ ->
raise (Cbor_error "cbor-encode: floats unsupported (dag-cbor subset)")
| _ ->
raise (Cbor_error
("cbor-encode: unencodable value " ^ type_of v))
let encode (v : value) : string =
let buf = Buffer.create 64 in
encode_into buf v;
Buffer.contents buf
(* ---- Decoder ---- *)
let decode (s : string) : value =
let pos = ref 0 in
let len = String.length s in
let byte () =
if !pos >= len then raise (Cbor_error "cbor-decode: truncated");
let c = Char.code s.[!pos] in incr pos; c
in
let read_uint ai =
if ai < 24 then ai
else if ai = 24 then byte ()
else if ai = 25 then let a = byte () in let b = byte () in (a lsl 8) lor b
else if ai = 26 then begin
let v = ref 0 in
for _ = 0 to 3 do v := (!v lsl 8) lor byte () done; !v
end else if ai = 27 then begin
let v = ref 0 in
for _ = 0 to 7 do v := (!v lsl 8) lor byte () done; !v
end else raise (Cbor_error "cbor-decode: bad additional info")
in
let read_bytes n =
if !pos + n > len then raise (Cbor_error "cbor-decode: truncated");
let r = String.sub s !pos n in pos := !pos + n; r
in
let rec item () =
let b = byte () in
let major = b lsr 5 and ai = b land 0x1f in
match major with
| 0 -> Integer (read_uint ai)
| 1 -> Integer (-1 - read_uint ai)
| 2 -> String (read_bytes (read_uint ai))
| 3 -> String (read_bytes (read_uint ai))
| 4 ->
let n = read_uint ai in
List (List.init n (fun _ -> item ()))
| 5 ->
let n = read_uint ai in
let d = make_dict () in
for _ = 1 to n do
let k = match item () with
| String k -> k
| _ -> raise (Cbor_error "cbor-decode: non-string map key")
in
Hashtbl.replace d k (item ())
done;
Dict d
| 6 ->
(* Tag: tag-42 CID link → pass the inner item through. *)
ignore (read_uint ai); item ()
| 7 ->
(match ai with
| 20 -> Bool false
| 21 -> Bool true
| 22 -> Nil
| 23 -> Nil
| _ ->
raise (Cbor_error
"cbor-decode: floats/simple unsupported (dag-cbor subset)"))
| _ -> raise (Cbor_error "cbor-decode: bad major type")
in
let v = item () in
v