From 203a3a3c6748dd70d0af4a5ae963f363d53c8d7f Mon Sep 17 00:00:00 2001 From: giles Date: Thu, 28 May 2026 00:19:11 +0000 Subject: [PATCH] =?UTF-8?q?fed-sx-m1:=20Step=204d=20=E2=80=94=20bootstrap:?= =?UTF-8?q?build=5Fgenesis/verify=5Fgenesis=20+=20cidhash=20helpers=20+=20?= =?UTF-8?q?12=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- next/kernel/bootstrap.erl | 48 ++++++++++++- next/tests/bootstrap_build.sh | 127 ++++++++++++++++++++++++++++++++++ plans/fed-sx-milestone-1.md | 3 +- 3 files changed, 176 insertions(+), 2 deletions(-) create mode 100755 next/tests/bootstrap_build.sh diff --git a/next/kernel/bootstrap.erl b/next/kernel/bootstrap.erl index 1de176b1..19023c0b 100644 --- a/next/kernel/bootstrap.erl +++ b/next/kernel/bootstrap.erl @@ -1,7 +1,9 @@ -module(bootstrap). -export([read_genesis/0, read_genesis/1, read_section/2, sections/0, section_subdir/1, - default_base/0, ends_with_sx/1]). + default_base/0, ends_with_sx/1, + build_genesis/1, verify_genesis/2, + cidhash_path/1, write_cidhash/2, read_cidhash/1]). %% Genesis bundle reader per design §12.2. %% @@ -94,3 +96,47 @@ read_one(DirPath, Name) -> {ok, Bytes} -> {Name, Bytes}; {error, R} -> {Name, {error, R}} end. + +%% ── Step 4d: bundle CID compute + verify ──────────────────────── +%% +%% The bundle CID is the canonical content-address of everything in +%% read_genesis/0's result. We delegate to the host `cid:to_string/1` +%% BIF (Step 1b substrate): it walks the term via `er-format-value`, +%% feeds the deterministic textual form into `cid-from-sx`, returns +%% a CIDv1 (raw codec, sha2-256 multihash) as a binary. +%% +%% Design §12.3: at startup the kernel computes this CID and +%% compares against a hardcoded value (here: a sibling `.cidhash` +%% file). A mismatch is a hard refuse-to-start. + +build_genesis(ReadResult) -> + case ReadResult of + {ok, Sections} -> + Cid = cid:to_string({genesis_bundle, Sections}), + {ok, [{cid, Cid}, {sections, Sections}]}; + Other -> + {error, {bad_read_result, Other}} + end. + +verify_genesis(ReadResult, ExpectedCid) -> + case build_genesis(ReadResult) of + {ok, [{cid, Cid}, _]} -> + case Cid =:= ExpectedCid of + true -> ok; + false -> {error, {cid_mismatch, Cid, ExpectedCid}} + end; + Err -> Err + end. + +%% Sibling-file CID storage. "/.cidhash" appended to BasePath as +%% an integer-segment binary (string-literal segments are broken). + +%% "/.cidhash" — 47='/' 46='.' c i d h a s h +cidhash_path(BasePath) -> + <>. + +write_cidhash(BasePath, Cid) -> + file:write_file(cidhash_path(BasePath), Cid). + +read_cidhash(BasePath) -> + file:read_file(cidhash_path(BasePath)). diff --git a/next/tests/bootstrap_build.sh b/next/tests/bootstrap_build.sh new file mode 100755 index 00000000..bfb5433e --- /dev/null +++ b/next/tests/bootstrap_build.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash +# next/tests/bootstrap_build.sh — Step 4d acceptance test. +# +# Exercises bootstrap:build_genesis/1, verify_genesis/2, +# cidhash_path/1, write_cidhash/2, read_cidhash/1. The bundle CID +# is computed by delegating to the host cid:to_string BIF (Step 1b +# substrate) over the read_genesis result. 11 cases. + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found." >&2 + exit 1 +fi + +# Clean any stale .cidhash from previous runs before tests touch +# the filesystem. +rm -f next/genesis/.cidhash + +VERBOSE="${1:-}" +PASS=0; FAIL=0; ERRORS="" +TMPFILE=$(mktemp); trap "rm -f $TMPFILE; rm -f next/genesis/.cidhash" EXIT + +cat > "$TMPFILE" <<'EPOCHS' +(epoch 1) +(load "lib/erlang/tokenizer.sx") +(load "lib/erlang/parser.sx") +(load "lib/erlang/parser-core.sx") +(load "lib/erlang/parser-expr.sx") +(load "lib/erlang/parser-module.sx") +(load "lib/erlang/transpile.sx") +(load "lib/erlang/runtime.sx") +(load "lib/erlang/vm/dispatcher.sx") + +(epoch 2) +(eval "(get (erlang-load-module (file-read \"next/kernel/bootstrap.erl\")) :name)") + +;; build_genesis returns {ok, [{cid, _}, {sections, _}]} +(epoch 10) +(eval "(erlang-eval-ast \"{ok, B} = bootstrap:build_genesis(bootstrap:read_genesis()), {Tag, _} = hd(B), Tag\")") + +;; The CID is a non-empty binary +(epoch 11) +(eval "(get (erlang-eval-ast \"{ok, [{cid, C}, _]} = bootstrap:build_genesis(bootstrap:read_genesis()), is_binary(C)\") :name)") +(epoch 12) +(eval "(get (erlang-eval-ast \"{ok, [{cid, C}, _]} = bootstrap:build_genesis(bootstrap:read_genesis()), byte_size(C) > 50\") :name)") + +;; build_genesis is deterministic across calls +(epoch 13) +(eval "(get (erlang-eval-ast \"{ok, [{cid, C1}, _]} = bootstrap:build_genesis(bootstrap:read_genesis()), {ok, [{cid, C2}, _]} = bootstrap:build_genesis(bootstrap:read_genesis()), C1 =:= C2\") :name)") + +;; build_genesis preserves the sections list +(epoch 14) +(eval "(erlang-eval-ast \"{ok, [_, {sections, S}]} = bootstrap:build_genesis(bootstrap:read_genesis()), length(S)\")") + +;; build_genesis rejects bad input shapes +(epoch 15) +(eval "(get (erlang-eval-ast \"case bootstrap:build_genesis({error, broken}) of {error, {bad_read_result, _}} -> ok; _ -> bad end\") :name)") + +;; verify_genesis returns ok when CID matches +(epoch 20) +(eval "(get (erlang-eval-ast \"{ok, [{cid, C}, _]} = bootstrap:build_genesis(bootstrap:read_genesis()), bootstrap:verify_genesis(bootstrap:read_genesis(), C) =:= ok\") :name)") + +;; verify_genesis returns {error, {cid_mismatch, _, _}} when CID doesn't match +(epoch 21) +(eval "(get (erlang-eval-ast \"case bootstrap:verify_genesis(bootstrap:read_genesis(), <<99,99,99>>) of {error, {cid_mismatch, _, _}} -> ok; _ -> bad end\") :name)") + +;; cidhash_path concatenation +(epoch 22) +(eval "(get (erlang-eval-ast \"bootstrap:cidhash_path(<<110,101,120,116>>) =:= <<110,101,120,116,47,46,99,105,100,104,97,115,104>>\") :name)") + +;; write_cidhash + read_cidhash round-trip the bundle CID +(epoch 23) +(eval "(get (erlang-eval-ast \"{ok, [{cid, C}, _]} = bootstrap:build_genesis(bootstrap:read_genesis()), Base = bootstrap:default_base(), ok = bootstrap:write_cidhash(Base, C), {ok, Stored} = bootstrap:read_cidhash(Base), Stored =:= C\") :name)") + +;; Full verify path against the persisted .cidhash +(epoch 24) +(eval "(get (erlang-eval-ast \"Base = bootstrap:default_base(), {ok, [{cid, C}, _]} = bootstrap:build_genesis(bootstrap:read_genesis()), ok = bootstrap:write_cidhash(Base, C), {ok, Stored} = bootstrap:read_cidhash(Base), bootstrap:verify_genesis(bootstrap:read_genesis(), Stored) =:= ok\") :name)") +EPOCHS + +OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) + +check() { + local epoch="$1" desc="$2" expected="$3" + local actual + actual=$(echo "$OUTPUT" | awk -v e="$epoch" ' + $0 ~ "^\\(ok-len " e " " { getline; print; exit } + $0 ~ "^\\(ok " e " " { print; exit } + $0 ~ "^\\(error " e " " { print; exit } + ') + [ -z "$actual" ] && actual="" + if echo "$actual" | grep -qF -- "$expected"; then + PASS=$((PASS+1)) + [ "$VERBOSE" = "-v" ] && echo " ok $desc" + else + FAIL=$((FAIL+1)) + ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual +" + fi +} + +check 2 "module load name" "bootstrap" +check 10 "build_genesis head tag" "cid" +check 11 "CID is a binary" "true" +check 12 "CID length > 50" "true" +check 13 "build_genesis deterministic" "true" +check 14 "sections preserved (7 entries)" "7" +check 15 "build_genesis rejects bad shape" "ok" +check 20 "verify_genesis ok when match" "true" +check 21 "verify_genesis errs on mismatch" "ok" +check 22 "cidhash_path concatenation" "true" +check 23 "write/read_cidhash round-trip" "true" +check 24 "verify against persisted hash" "true" + +TOTAL=$((PASS+FAIL)) +if [ $FAIL -eq 0 ]; then + echo "ok $PASS/$TOTAL next/tests/bootstrap_build.sh passed" +else + echo "FAIL $PASS/$TOTAL passed, $FAIL failed:" + echo "$ERRORS" +fi +[ $FAIL -eq 0 ] diff --git a/plans/fed-sx-milestone-1.md b/plans/fed-sx-milestone-1.md index f3f4e13e..1d439578 100644 --- a/plans/fed-sx-milestone-1.md +++ b/plans/fed-sx-milestone-1.md @@ -251,7 +251,7 @@ replay(LogState, InitAcc, Fun) -> ... - [x] **4b-vld** — Validators: envelope-shape, signature, type-schema — 3 `DefineValidator` files + manifest updated + 5 new parse tests - [x] **4b-cod** — Codecs (dag-cbor, raw, dag-json) + sig-suites (rsa-sha256-2018, ed25519-2020) + audience predicates (Public, Followers, Direct) — 8 SX files + manifest fully populated + 14 new parse tests - [x] **4c** — `bootstrap:read_genesis/0,1` + `read_section/2` + `sections/0` + `section_subdir/1` + `ends_with_sx/1` in Erlang: walk seven hardcoded section subdirs, filter `.sx` files via byte-pattern suffix match, read each into a binary. Returns `{ok, [{Section, [{Name, Bytes}, ...]}, ...]}`. Skips SX parsing — the substrate has no in-Erlang binary→SX-term path (same gap as Step 3b); bundle CID over raw bytes is enough for Step 4d. `next/tests/bootstrap_read.sh` (15 cases). -- [ ] **4d** — `bootstrap:build_genesis/1` + `bootstrap:verify_genesis/1`: compute bundle CID over the read forms via the host `cid:to_string` substrate; verify against a stored `bundle.cidhash` +- [x] **4d** — `bootstrap:build_genesis/1` + `verify_genesis/2` + `cidhash_path/1` + `write_cidhash/2` + `read_cidhash/1`: bundle CID via host `cid:to_string` over `{genesis_bundle, Sections}`; mismatch returns `{error, {cid_mismatch, Got, Expected}}`; `.cidhash` sibling file persists between runs. `next/tests/bootstrap_build.sh` (12 cases). - [ ] **4e** — `bootstrap:load_genesis/1`: register parsed definitions into the in-memory registry (depends on Step 5) **Deliverables:** @@ -955,6 +955,7 @@ A few things still under-specified; resolve as work begins. Newest first. One line per sub-deliverable commit. Erlang conformance gate (`bash lib/erlang/conformance.sh`) must remain 729/729 on every entry. +- **2026-05-28** — Step 4d: `bootstrap:build_genesis/1` + `verify_genesis/2` + `.cidhash` helpers in `next/kernel/bootstrap.erl`. Bundle CID delegated to host `cid:to_string` over `{genesis_bundle, Sections}` — deterministic, ~59 byte CIDv1 binary. `verify_genesis/2` returns `ok` on match, `{error, {cid_mismatch, Got, Expected}}` on drift. `write_cidhash`/`read_cidhash` persist the CID to a `.cidhash` sibling file (path hand-spelled `<<...,47,46,99,...>>` per the string-literal-in-binary substrate quirk). `next/tests/bootstrap_build.sh` 12/12. Erlang conformance 729/729. - **2026-05-27** — Step 4c: `next/kernel/bootstrap.erl` — Erlang module that enumerates the genesis bundle by walking seven hardcoded section subdirs via `file:list_dir/1`, filters `.sx` files via byte-pattern suffix match (`ends_with_sx/1`), reads each into a binary via `file:read_file/1`. Returns `{ok, [{Section, [{Name, Bytes}, ...]}]}`. Hits the same SX-parser substrate gap as Step 3b — kept the surface byte-only; parsing happens via SX-side helpers in later steps. Port gotchas: `fun name/arity` references unsupported (use anonymous fun wrappers); `<<"...">>` string-literal segments truncate to one byte (paths hand-spelled as integer-segment binaries). `next/tests/bootstrap_read.sh` 15/15. Erlang conformance 729/729. - **2026-05-27** — Step 4b-cod: bootstrap codecs + sig-suites + audience predicates complete. 3 `DefineCodec` files (dag-cbor + raw + dag-json, dag-cbor + dag-json deferring to host-codec primitive when wired), 2 `DefineSigSuite` files (rsa-sha256-2018 PEM-keyed, ed25519-2020 multibase-keyed, both :verify returning false as m2-deferred stand-in), 3 `DefineAudience` files (Public/Followers/Direct member-of predicates per design §16). Manifest now lists 26 bootstrap files across all eight sections; `next/tests/genesis_parse.sh` 50/50. Step 4b complete; remaining Step 4 is bundler code (4c–4e). Erlang conformance 729/729. - **2026-05-27** — Step 4b-vld: bootstrap validators complete — 3 `DefineValidator` SX files (envelope-shape mirroring Step 2a, signature stub delegating to envelope:verify_signature/2 per design §9.6, type-schema looking up the object-type schema from define-registry). Manifest `:validators` populated; `next/tests/genesis_parse.sh` 36/36. Erlang conformance 729/729.