diff --git a/next/kernel/envelope.erl b/next/kernel/envelope.erl index 669a4735..f5b63efa 100644 --- a/next/kernel/envelope.erl +++ b/next/kernel/envelope.erl @@ -1,5 +1,5 @@ -module(envelope). --export([validate_shape/1, get_field/2]). +-export([validate_shape/1, get_field/2, canonical_bytes/1]). %% Activity envelope per design §3.1. %% @@ -51,3 +51,35 @@ validate_signature_shape(Env) -> false -> {error, {bad_signature, not_a_proplist}} end. + +%% canonical_bytes/1 — the byte string the signature covers. +%% +%% Real fed-sx will use dag-cbor over a JSON-LD-canonicalised form +%% (design §3.2). For milestone 1 we stand in for that with the host +%% BIF `cid:to_string/1`, which produces a CIDv1 over the deterministic +%% textual form of the term. Two prior steps make this work: +%% 1. The signature pair is stripped (sig covers everything except +%% itself). +%% 2. The top-level property list is sorted by key so field order in +%% the source envelope is not load-bearing. +%% +%% The result is an Erlang binary suitable as the sig-cover input. + +canonical_bytes(Env) when is_list(Env) -> + Stripped = strip_signature(Env), + Sorted = sort_pairs(Stripped), + cid:to_string(Sorted). + +strip_signature([]) -> []; +strip_signature([{signature, _} | Rest]) -> strip_signature(Rest); +strip_signature([P | Rest]) -> [P | strip_signature(Rest)]. + +sort_pairs([]) -> []; +sort_pairs([H | T]) -> insert_pair(H, sort_pairs(T)). + +insert_pair(P, []) -> [P]; +insert_pair({K1, V1}, [{K2, V2} | Rest]) -> + case K1 < K2 of + true -> [{K1, V1}, {K2, V2} | Rest]; + false -> [{K2, V2} | insert_pair({K1, V1}, Rest)] + end. diff --git a/next/tests/envelope_canonical.sh b/next/tests/envelope_canonical.sh new file mode 100755 index 00000000..ea3053db --- /dev/null +++ b/next/tests/envelope_canonical.sh @@ -0,0 +1,105 @@ +#!/usr/bin/env bash +# next/tests/envelope_canonical.sh — Step 2b acceptance test. +# +# Loads next/kernel/envelope.erl and checks canonical_bytes/1 contract: +# returns a binary, deterministic across runs, invariant under +# field-order permutation, invariant under signature changes, and +# different for different covered content. 7 cases. + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found." >&2 + exit 1 +fi + +VERBOSE="${1:-}" +PASS=0; FAIL=0; ERRORS="" +TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT + +cat > "$TMPFILE" <<'EPOCHS' +(epoch 1) +(load "lib/erlang/tokenizer.sx") +(load "lib/erlang/parser.sx") +(load "lib/erlang/parser-core.sx") +(load "lib/erlang/parser-expr.sx") +(load "lib/erlang/parser-module.sx") +(load "lib/erlang/transpile.sx") +(load "lib/erlang/runtime.sx") +(load "lib/erlang/vm/dispatcher.sx") + +(epoch 2) +(eval "(get (erlang-load-module (file-read \"next/kernel/envelope.erl\")) :name)") + +;; canonical_bytes returns a binary +(epoch 10) +(eval "(get (erlang-eval-ast \"is_binary(envelope:canonical_bytes([{id,1},{type,create},{actor,alice},{published,1000},{signature,whatever}]))\") :name)") + +;; Determinism: same envelope twice -> same bytes +(epoch 11) +(eval "(get (erlang-eval-ast \"envelope:canonical_bytes([{id,1},{type,create},{actor,alice}]) =:= envelope:canonical_bytes([{id,1},{type,create},{actor,alice}])\") :name)") + +;; Signature stripping: different signatures -> same canonical bytes +(epoch 12) +(eval "(get (erlang-eval-ast \"envelope:canonical_bytes([{id,1},{type,create},{actor,alice},{signature,sig_one}]) =:= envelope:canonical_bytes([{id,1},{type,create},{actor,alice},{signature,sig_two}])\") :name)") + +;; No signature vs some signature -> same canonical bytes +(epoch 13) +(eval "(get (erlang-eval-ast \"envelope:canonical_bytes([{id,1},{type,create},{actor,alice}]) =:= envelope:canonical_bytes([{id,1},{type,create},{actor,alice},{signature,whatever}])\") :name)") + +;; Key-order invariance: reordering top-level fields -> same bytes +(epoch 14) +(eval "(get (erlang-eval-ast \"envelope:canonical_bytes([{id,1},{type,create},{actor,alice}]) =:= envelope:canonical_bytes([{actor,alice},{type,create},{id,1}])\") :name)") + +;; Changing a covered field changes the bytes +(epoch 15) +(eval "(get (erlang-eval-ast \"envelope:canonical_bytes([{id,1},{type,create},{actor,alice}]) =/= envelope:canonical_bytes([{id,2},{type,create},{actor,alice}])\") :name)") + +;; Distinct envelopes -> distinct bytes +(epoch 16) +(eval "(get (erlang-eval-ast \"envelope:canonical_bytes([{id,1},{type,create},{actor,alice}]) =/= envelope:canonical_bytes([{id,1},{type,update},{actor,bob}])\") :name)") +EPOCHS + +OUTPUT=$(timeout 120 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) + +check() { + local epoch="$1" desc="$2" expected="$3" + local actual + actual=$(echo "$OUTPUT" | awk -v e="$epoch" ' + $0 ~ "^\\(ok-len " e " " { getline; print; exit } + $0 ~ "^\\(ok " e " " { print; exit } + $0 ~ "^\\(error " e " " { print; exit } + ') + [ -z "$actual" ] && actual="" + if echo "$actual" | grep -qF -- "$expected"; then + PASS=$((PASS+1)) + [ "$VERBOSE" = "-v" ] && echo " ok $desc" + else + FAIL=$((FAIL+1)) + ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual +" + fi +} + +check 2 "module load name" "envelope" +check 10 "canonical_bytes returns binary" "true" +check 11 "deterministic" "true" +check 12 "signature stripped (changes)" "true" +check 13 "signature stripped (absent)" "true" +check 14 "key-order invariant" "true" +check 15 "covered field change visible" "true" +check 16 "distinct envelopes distinct" "true" + +TOTAL=$((PASS+FAIL)) +if [ $FAIL -eq 0 ]; then + echo "ok $PASS/$TOTAL next/tests/envelope_canonical.sh passed" +else + echo "FAIL $PASS/$TOTAL passed, $FAIL failed:" + echo "$ERRORS" +fi +[ $FAIL -eq 0 ] diff --git a/plans/fed-sx-milestone-1.md b/plans/fed-sx-milestone-1.md index 312b3bf8..9742da1d 100644 --- a/plans/fed-sx-milestone-1.md +++ b/plans/fed-sx-milestone-1.md @@ -152,7 +152,7 @@ canonicalize_sx(V) -> ... % sorts dict keys, normalizes strings **Sub-deliverables:** - [x] **2a** — `next/kernel/envelope.erl` `validate_shape/1` + `get_field/2` (property-list envelope; Erlang maps `#{}` not supported in this port) + `next/tests/envelope_shape.sh` (15 cases) -- [ ] **2b** — `canonical_bytes/1` over sig-stripped envelope (deterministic textual form via `cid:to_string` substrate) + tests +- [x] **2b** — `canonical_bytes/1` over sig-stripped, key-sorted envelope (deterministic textual form via `cid:to_string` substrate; dag-cbor stand-in for v1) + `next/tests/envelope_canonical.sh` (8 cases) - [ ] **2c** — `verify_signature/2` against actor key set, time-aware key validity per design §9.6 + tests **Deliverables:** @@ -937,6 +937,7 @@ A few things still under-specified; resolve as work begins. Newest first. One line per sub-deliverable commit. Erlang conformance gate (`bash lib/erlang/conformance.sh`) must remain 729/729 on every entry. +- **2026-05-26** — Step 2b: `envelope:canonical_bytes/1` — strip signature, insertion-sort property list by key, return host-CID-string as deterministic byte form (dag-cbor stand-in). `next/tests/envelope_canonical.sh` 8/8 pass. Erlang conformance 729/729 preserved. - **2026-05-26** — Step 2a: `next/kernel/envelope.erl` `validate_shape/1` + `get_field/2` over property-list envelopes (Erlang `#{}` maps not supported in this port). `next/tests/envelope_shape.sh` 15/15 pass. Erlang conformance 729/729 preserved. - **2026-05-26** — Step 1b: `next/kernel/nx_cid.erl` (from_sx/to_string/from_string/equals) — thin Erlang wrapper around the `cid:to_string/1` BIF. `next/tests/cid.sh` 13/13 pass. Module named `nx_cid` to avoid shadowing the `cid` BIF (user-module dispatch takes precedence over BIFs by module name). Erlang conformance 729/729 preserved. - **2026-05-26** — Step 1a: `next/` skeleton created (kernel/, genesis/, tests/, data/), README, `.gitignore data/`. Erlang conformance 729/729 preserved.