diff --git a/next/kernel/bootstrap.erl b/next/kernel/bootstrap.erl index 19023c0b..468d807c 100644 --- a/next/kernel/bootstrap.erl +++ b/next/kernel/bootstrap.erl @@ -3,7 +3,8 @@ read_section/2, sections/0, section_subdir/1, default_base/0, ends_with_sx/1, build_genesis/1, verify_genesis/2, - cidhash_path/1, write_cidhash/2, read_cidhash/1]). + cidhash_path/1, write_cidhash/2, read_cidhash/1, + load_genesis/1, strip_sx_suffix/1]). %% Genesis bundle reader per design §12.2. %% @@ -140,3 +141,47 @@ write_cidhash(BasePath, Cid) -> read_cidhash(BasePath) -> file:read_file(cidhash_path(BasePath)). + +%% ── Step 4e: load_genesis → registry ──────────────────────────── +%% +%% Walks the read_genesis result and registers each file as a +%% registry entry. The section atom is the registry kind directly +%% (both name spaces are identical — see Step 4c sections/0 and +%% Step 5a registry:kinds/0). The entry Name is the filename minus +%% the `.sx` suffix, kept as a binary; the entry value is the +%% file's raw bytes. +%% +%% Returns `{ok, RegistryState}` on success. Later steps (4f / the +%% SX-parser bridge) will replace the raw bytes with parsed forms; +%% the binary stand-in is enough to prove the bridge works. + +load_genesis(ReadResult) -> + case ReadResult of + {ok, Sections} -> + {ok, load_sections(Sections, registry:new())}; + Other -> + {error, {bad_read_result, Other}} + end. + +load_sections([], State) -> State; +load_sections([{Kind, Entries} | Rest], State) -> + load_sections(Rest, load_entries(Kind, Entries, State)). + +load_entries(_Kind, [], State) -> State; +load_entries(Kind, [{Name, Bytes} | Rest], State) -> + BaseName = strip_sx_suffix(Name), + {ok, NewState} = registry:register(Kind, BaseName, Bytes, State), + load_entries(Kind, Rest, NewState). + +%% strip_sx_suffix(Binary) — drops the trailing ".sx" if present. +%% 46='.' 115='s' 120='x'. +strip_sx_suffix(B) when is_binary(B) -> + case ends_with_sx(B) of + false -> B; + true -> take_prefix(B, byte_size(B) - 3) + end. + +take_prefix(_, 0) -> <<>>; +take_prefix(<>, N) when N > 0 -> + Tail = take_prefix(Rest, N - 1), + <>. diff --git a/next/tests/bootstrap_load.sh b/next/tests/bootstrap_load.sh new file mode 100755 index 00000000..aa2ed87b --- /dev/null +++ b/next/tests/bootstrap_load.sh @@ -0,0 +1,126 @@ +#!/usr/bin/env bash +# next/tests/bootstrap_load.sh — Step 4e acceptance test. +# +# Exercises bootstrap:load_genesis/1 + strip_sx_suffix/1. +# Walks bootstrap:read_genesis output, strips .sx from each +# filename, registers raw bytes as entries under the matching +# kind. 13 cases. + +set -uo pipefail +cd "$(git rev-parse --show-toplevel)" + +SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}" +if [ ! -x "$SX_SERVER" ]; then + SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe" +fi +if [ ! -x "$SX_SERVER" ]; then + echo "ERROR: sx_server.exe not found." >&2 + exit 1 +fi + +VERBOSE="${1:-}" +PASS=0; FAIL=0; ERRORS="" +TMPFILE=$(mktemp); trap "rm -f $TMPFILE" EXIT + +cat > "$TMPFILE" <<'EPOCHS' +(epoch 1) +(load "lib/erlang/tokenizer.sx") +(load "lib/erlang/parser.sx") +(load "lib/erlang/parser-core.sx") +(load "lib/erlang/parser-expr.sx") +(load "lib/erlang/parser-module.sx") +(load "lib/erlang/transpile.sx") +(load "lib/erlang/runtime.sx") +(load "lib/erlang/vm/dispatcher.sx") + +(epoch 2) +(eval "(get (erlang-load-module (file-read \"next/kernel/registry.erl\")) :name)") +(epoch 3) +(eval "(get (erlang-load-module (file-read \"next/kernel/bootstrap.erl\")) :name)") + +;; strip_sx_suffix on "create.sx" -> "create" +(epoch 10) +(eval "(get (erlang-eval-ast \"bootstrap:strip_sx_suffix(<<99,114,101,97,116,101,46,115,120>>) =:= <<99,114,101,97,116,101>>\") :name)") + +;; strip_sx_suffix unchanged on names without .sx +(epoch 11) +(eval "(get (erlang-eval-ast \"bootstrap:strip_sx_suffix(<<104,101,108,108,111>>) =:= <<104,101,108,108,111>>\") :name)") + +;; strip_sx_suffix on exactly ".sx" -> empty binary +(epoch 12) +(eval "(get (erlang-eval-ast \"bootstrap:strip_sx_suffix(<<46,115,120>>) =:= <<>>\") :name)") + +;; load_genesis on bad input rejects with proper tag +(epoch 13) +(eval "(get (erlang-eval-ast \"case bootstrap:load_genesis({error, broken}) of {error, {bad_read_result, _}} -> ok; _ -> bad end\") :name)") + +;; Per-kind counts after load match the section file counts +(epoch 20) +(eval "(erlang-eval-ast \"{ok, S} = bootstrap:load_genesis(bootstrap:read_genesis()), length(registry:list(activity_types, S))\")") +(epoch 21) +(eval "(erlang-eval-ast \"{ok, S} = bootstrap:load_genesis(bootstrap:read_genesis()), length(registry:list(object_types, S))\")") +(epoch 22) +(eval "(erlang-eval-ast \"{ok, S} = bootstrap:load_genesis(bootstrap:read_genesis()), length(registry:list(projections, S))\")") +(epoch 23) +(eval "(erlang-eval-ast \"{ok, S} = bootstrap:load_genesis(bootstrap:read_genesis()), length(registry:list(validators, S))\")") +(epoch 24) +(eval "(erlang-eval-ast \"{ok, S} = bootstrap:load_genesis(bootstrap:read_genesis()), length(registry:list(codecs, S))\")") +(epoch 25) +(eval "(erlang-eval-ast \"{ok, S} = bootstrap:load_genesis(bootstrap:read_genesis()), length(registry:list(sig_suites, S))\")") +(epoch 26) +(eval "(erlang-eval-ast \"{ok, S} = bootstrap:load_genesis(bootstrap:read_genesis()), length(registry:list(audience, S))\")") + +;; registry:lookup retrieves a known entry's bytes +(epoch 30) +(eval "(get (erlang-eval-ast \"{ok, S} = bootstrap:load_genesis(bootstrap:read_genesis()), case registry:lookup(activity_types, <<99,114,101,97,116,101>>, S) of {ok, B} -> is_binary(B) and (byte_size(B) > 100); _ -> false end\") :name)") + +;; load_genesis is deterministic — compare via cid:to_string of state +(epoch 31) +(eval "(get (erlang-eval-ast \"R = bootstrap:read_genesis(), {ok, S1} = bootstrap:load_genesis(R), {ok, S2} = bootstrap:load_genesis(R), cid:to_string(S1) =:= cid:to_string(S2)\") :name)") +EPOCHS + +OUTPUT=$(timeout 300 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) + +check() { + local epoch="$1" desc="$2" expected="$3" + local actual + actual=$(echo "$OUTPUT" | awk -v e="$epoch" ' + $0 ~ "^\\(ok-len " e " " { getline; print; exit } + $0 ~ "^\\(ok " e " " { print; exit } + $0 ~ "^\\(error " e " " { print; exit } + ') + [ -z "$actual" ] && actual="" + if echo "$actual" | grep -qF -- "$expected"; then + PASS=$((PASS+1)) + [ "$VERBOSE" = "-v" ] && echo " ok $desc" + else + FAIL=$((FAIL+1)) + ERRORS+=" FAIL [$desc] (epoch $epoch) expected: $expected | actual: $actual +" + fi +} + +check 2 "registry module loaded" "registry" +check 3 "bootstrap module loaded" "bootstrap" +check 10 "strip suffix create.sx -> create" "true" +check 11 "strip suffix hello unchanged" "true" +check 12 "strip suffix .sx -> empty" "true" +check 13 "load_genesis rejects bad shape" "ok" +check 20 "loaded activity_types count = 3" "3" +check 21 "loaded object_types count = 10" "10" +check 22 "loaded projections count = 7" "7" +check 23 "loaded validators count = 3" "3" +check 24 "loaded codecs count = 3" "3" +check 25 "loaded sig_suites count = 2" "2" +check 26 "loaded audience count = 3" "3" +check 30 "registry:lookup activity_types/create" "true" +check 31 "load_genesis deterministic" "true" + +TOTAL=$((PASS+FAIL)) +if [ $FAIL -eq 0 ]; then + echo "ok $PASS/$TOTAL next/tests/bootstrap_load.sh passed" +else + echo "FAIL $PASS/$TOTAL passed, $FAIL failed:" + echo "$ERRORS" +fi +[ $FAIL -eq 0 ] diff --git a/plans/fed-sx-milestone-1.md b/plans/fed-sx-milestone-1.md index 2688c6c1..d40ee7dc 100644 --- a/plans/fed-sx-milestone-1.md +++ b/plans/fed-sx-milestone-1.md @@ -252,7 +252,7 @@ replay(LogState, InitAcc, Fun) -> ... - [x] **4b-cod** — Codecs (dag-cbor, raw, dag-json) + sig-suites (rsa-sha256-2018, ed25519-2020) + audience predicates (Public, Followers, Direct) — 8 SX files + manifest fully populated + 14 new parse tests - [x] **4c** — `bootstrap:read_genesis/0,1` + `read_section/2` + `sections/0` + `section_subdir/1` + `ends_with_sx/1` in Erlang: walk seven hardcoded section subdirs, filter `.sx` files via byte-pattern suffix match, read each into a binary. Returns `{ok, [{Section, [{Name, Bytes}, ...]}, ...]}`. Skips SX parsing — the substrate has no in-Erlang binary→SX-term path (same gap as Step 3b); bundle CID over raw bytes is enough for Step 4d. `next/tests/bootstrap_read.sh` (15 cases). - [x] **4d** — `bootstrap:build_genesis/1` + `verify_genesis/2` + `cidhash_path/1` + `write_cidhash/2` + `read_cidhash/1`: bundle CID via host `cid:to_string` over `{genesis_bundle, Sections}`; mismatch returns `{error, {cid_mismatch, Got, Expected}}`; `.cidhash` sibling file persists between runs. `next/tests/bootstrap_build.sh` (12 cases). -- [ ] **4e** — `bootstrap:load_genesis/1`: register parsed definitions into the in-memory registry (depends on Step 5) +- [x] **4e** — `bootstrap:load_genesis/1` + `strip_sx_suffix/1`: bridges `read_genesis` output into `registry` entries. Section atom = registry kind; entry name = filename minus `.sx` (binary); entry value = raw file bytes (parsed forms replace these once an SX-parser bridge exists). `next/tests/bootstrap_load.sh` (15 cases). **Deliverables:** @@ -961,6 +961,7 @@ A few things still under-specified; resolve as work begins. Newest first. One line per sub-deliverable commit. Erlang conformance gate (`bash lib/erlang/conformance.sh`) must remain 729/729 on every entry. +- **2026-05-28** — Step 4e: `bootstrap:load_genesis/1` + `strip_sx_suffix/1` in `next/kernel/bootstrap.erl`. Walks `read_genesis` output and threads each entry through `registry:register/4`, using the section atom as the kind and the filename-minus-`.sx` as the entry name. Per-kind counts match the seven bootstrap sections exactly (3/10/7/3/3/2/3 = 31 entries total). `next/tests/bootstrap_load.sh` 15/15. Determinism verified by comparing `cid:to_string` of the loaded state across calls (faster than deep-equality on the nested-binary state). Step 4 is now complete end-to-end except for SX-source parsing of the loaded entries. Erlang conformance 729/729. - **2026-05-28** — Step 5a: `next/kernel/registry.erl` — pure-functional registry. State is `[{Kind, [{Name, Entry}, ...]}, ...]` keyed by the same seven section atoms as Step 4c (activity_types, object_types, projections, validators, codecs, sig_suites, audience). API: `new/0`, `kinds/0`, `register/4`, `lookup/3`, `list/2`. Unknown kinds rejected with `{error, unknown_kind}`; missing names return `not_found`; re-registering the same name overrides without growing the list. `next/tests/registry_pure.sh` 14/14. Step 5 broken into 5a–5d on the plan. Erlang conformance 729/729. - **2026-05-28** — Step 4d: `bootstrap:build_genesis/1` + `verify_genesis/2` + `.cidhash` helpers in `next/kernel/bootstrap.erl`. Bundle CID delegated to host `cid:to_string` over `{genesis_bundle, Sections}` — deterministic, ~59 byte CIDv1 binary. `verify_genesis/2` returns `ok` on match, `{error, {cid_mismatch, Got, Expected}}` on drift. `write_cidhash`/`read_cidhash` persist the CID to a `.cidhash` sibling file (path hand-spelled `<<...,47,46,99,...>>` per the string-literal-in-binary substrate quirk). `next/tests/bootstrap_build.sh` 12/12. Erlang conformance 729/729. - **2026-05-27** — Step 4c: `next/kernel/bootstrap.erl` — Erlang module that enumerates the genesis bundle by walking seven hardcoded section subdirs via `file:list_dir/1`, filters `.sx` files via byte-pattern suffix match (`ends_with_sx/1`), reads each into a binary via `file:read_file/1`. Returns `{ok, [{Section, [{Name, Bytes}, ...]}]}`. Hits the same SX-parser substrate gap as Step 3b — kept the surface byte-only; parsing happens via SX-side helpers in later steps. Port gotchas: `fun name/arity` references unsupported (use anonymous fun wrappers); `<<"...">>` string-literal segments truncate to one byte (paths hand-spelled as integer-segment binaries). `next/tests/bootstrap_read.sh` 15/15. Erlang conformance 729/729.