From 5e0abced32fa70bd1e316ed379d68acafbb1b518 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 4 Jul 2026 05:03:03 +0000 Subject: [PATCH] =?UTF-8?q?W14:=20F8=20cross-host=20differential=20battery?= =?UTF-8?q?=20(test-only)=20=E2=80=94=20CHECKLIST=20COMPLETE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Committed replacement for the review's ephemeral 130-probe corpus: spec/tests/differential-probes.txt (49 probes: F-1 int/float display, K18 overflow, F-3 apply + dict order, S-4 float printing, strings, collections, special forms, error normalization) evaluated on the native server (epoch protocol printer) and the SHIPPED WASM kernel (eval_wasm_probes.js via guest sx-serialize), diffed by scripts/test-differential.sh with a KNOWN_DIVERGENT heal-detecting ledger. Result: 46/49 agree. All 3 divergences share one root cause, verified live: bare sx_server's `apply` does not spread its argument list — (apply + (list 1 2 3)) errors "Expected number, got list", (apply str l) returns the serialized list; the WASM kernel spreads correctly and the test runner masks the bug with its own apply binding (F-7 class). Finding refinement: F-1's float-display divergence (0.3 vs 0.30000000000000004) is a K.eval JS-boundary artifact — guest-serialized output agrees across hosts; the battery therefore compares guest serialization. This completes the W14 checklist: 7 pin suites, 6 gate scripts/runners, 2 harness capabilities, C9 label cleanup, adapter-dom render coverage. Test-only: no semantics edits, no push. Co-Authored-By: Claude Fable 5 --- hosts/ocaml/browser/eval_wasm_probes.js | 68 ++++++++++++++++++ plans/agent-briefings/sx-gate-loop.md | 32 ++++++++- scripts/test-differential.sh | 94 +++++++++++++++++++++++++ spec/tests/differential-probes.txt | 65 +++++++++++++++++ 4 files changed, 258 insertions(+), 1 deletion(-) create mode 100755 hosts/ocaml/browser/eval_wasm_probes.js create mode 100755 scripts/test-differential.sh create mode 100644 spec/tests/differential-probes.txt diff --git a/hosts/ocaml/browser/eval_wasm_probes.js b/hosts/ocaml/browser/eval_wasm_probes.js new file mode 100755 index 00000000..91034a49 --- /dev/null +++ b/hosts/ocaml/browser/eval_wasm_probes.js @@ -0,0 +1,68 @@ +#!/usr/bin/env node +// eval_wasm_probes.js — W14/F8: evaluate a file of probe expressions (one +// per line, '#'-comments allowed) on the SHIPPED browser kernel and print +// PROBE +// per line, for diffing against the native server (scripts/test-differential.sh). +// Boot stubs mirror test_wasm_native.js / run_wasm_corpus.js. + +const fs = require('fs'); +const path = require('path'); + +const PROJECT_ROOT = path.resolve(__dirname, '../../..'); +const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm'); + +const probeFile = process.argv[2]; +if (!probeFile) { console.error('usage: eval_wasm_probes.js '); process.exit(2); } + +global.window = global; +global.document = { + createElement: () => ({ style: {}, setAttribute() {}, appendChild() {}, children: [] }), + createDocumentFragment: () => ({ appendChild() {}, children: [], childNodes: [] }), + head: { appendChild() {} }, body: { appendChild() {} }, + querySelector: () => null, querySelectorAll: () => [], + createTextNode: s => ({ textContent: s }), addEventListener() {}, + createComment: s => ({ textContent: s || '' }), + getElementsByTagName: () => [], +}; +global.localStorage = { getItem: () => null, setItem() {}, removeItem() {} }; +global.CustomEvent = class { constructor(n, o) { this.type = n; this.detail = (o || {}).detail || {}; } }; +global.MutationObserver = class { observe() {} disconnect() {} }; +global.requestIdleCallback = fn => setTimeout(fn, 0); +global.matchMedia = () => ({ matches: false }); +global.navigator = { serviceWorker: { register: () => Promise.resolve() } }; +global.location = { href: '', pathname: '/', hostname: 'localhost' }; +global.history = { pushState() {}, replaceState() {} }; +global.fetch = () => Promise.resolve({ ok: true, text: () => Promise.resolve('') }); + +async function main() { + require(path.join(WASM_DIR, 'sx_browser.bc.wasm.js')); + const K = await new Promise((resolve, reject) => { + let tries = 0; + const poll = setInterval(() => { + if (globalThis.SxKernel) { clearInterval(poll); resolve(globalThis.SxKernel); } + else if (++tries > 200) { clearInterval(poll); reject(new Error('SxKernel not found')); } + }, 50); + }); + + const lines = fs.readFileSync(probeFile, 'utf8').split('\n'); + let n = 0; + for (const raw of lines) { + const line = raw.trim(); + if (!line || line.startsWith('#')) continue; + n++; + let out; + try { + // Serialize through the kernel's own printer so both hosts emit SX + // text (K.eval returns raw JS values otherwise — [object Object]). + const r = K.eval(`(sx-serialize ${line})`); + out = (typeof r === 'string') ? r : String(r); + } catch (e) { + out = 'ERROR'; + } + // errors normalized: kernel returns "Error: ..." strings for eval errors + if (typeof out === 'string' && out.startsWith('Error')) out = 'ERROR'; + console.log(`PROBE ${n} ${out.replace(/\n/g, '\\n')}`); + } +} + +main().catch(e => { console.error('FATAL:', e.message); process.exit(1); }); diff --git a/plans/agent-briefings/sx-gate-loop.md b/plans/agent-briefings/sx-gate-loop.md index 06efb551..68335aa1 100644 --- a/plans/agent-briefings/sx-gate-loop.md +++ b/plans/agent-briefings/sx-gate-loop.md @@ -104,10 +104,40 @@ Pin each confirmed-and-fixed finding with a minimal repro. Add suites to baseline identities updated in the same commit ### F. Differential battery -- [ ] F8 — cross-host differential battery (same source, all hosts agree) +- [x] F8 — cross-host differential battery: `spec/tests/differential-probes.txt` + (49 probes) × native server vs shipped WASM kernel via + `scripts/test-differential.sh` + `eval_wasm_probes.js`. 46 agree, + 3 ledgered KNOWN_DIVERGENT (F-3: bare-server `apply` does not spread — + runner masks it, F-7 class). Refinement: the F-1 float-display + divergence is a K.eval JS-boundary artifact — guest `sx-serialize` + output AGREES across hosts + +**CHECKLIST COMPLETE 2026-07-04** — all W14 items delivered. Open handoffs: +sx_render.ml regen drift (Blocked, hosts lane), adapter-dom depth tests, +3 WASM load-error bisects (hash-table/r7rs/sets), CI wiring of the four +gate scripts (D3 maintainer decision). ## Progress log (newest first) +- 2026-07-04 — **F8 differential battery — CHECKLIST COMPLETE**. Committed + replacement for the review's ephemeral 130-probe corpus: + `spec/tests/differential-probes.txt` (49 probes across F-1 int/float + display, K18 overflow, F-3 apply + dict order, S-4 float printing, + strings/collections/special forms/error cases) evaluated on the native + server (epoch protocol) and the shipped WASM kernel + (`eval_wasm_probes.js`, guest `sx-serialize`), diffed by + `scripts/test-differential.sh` with a KNOWN_DIVERGENT ledger (heal → + red → delete entry). Result: 46/49 agree; 3 divergences, all one root + cause — **bare sx_server's `apply` does not spread its arg list** + ((apply + (list 1 2 3)) → "Expected number, got list"; WASM spreads + correctly; the test runner masks it with its own apply — F-7 class). + Finding refinement: F-1's float-display divergence (0.3 vs 0.3000…4) is + purely a K.eval JS-boundary artifact — guest-serialized output agrees. + W14 delivered: 7 pin suites (spec/tests/test-gate-pins.sx, 29 tests), + 4 gate scripts (protocol-gate 11, env-parity 7, harness-parity 12, + wasm-corpus 83-file, suite-baseline 273-pin, differential 49-probe), + 2 harness capabilities (C22 log-first, C21 perform-mode), C9 label + cleanup, adapter-dom render coverage. Test-only throughout. - 2026-07-04 — **C9 empty-suite labels (item E.3) — section E COMPLETE**. The sweep found the defect much wider than the finding: SIX files carried suite-less top-level deftests (test-chars 43, test-import-bind 14, diff --git a/scripts/test-differential.sh b/scripts/test-differential.sh new file mode 100755 index 00000000..6fe366b3 --- /dev/null +++ b/scripts/test-differential.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# test-differential.sh — W14/F8: cross-host differential battery. +# +# Evaluates every expression in spec/tests/differential-probes.txt on: +# A) the native server (sx_server.exe, epoch protocol) — its printer +# B) the SHIPPED browser kernel (eval_wasm_probes.js, guest sx-serialize) +# and diffs the outputs. The review's original 130-probe corpus was +# ephemeral (F-8); this is the committed replacement. +# +# KNOWN_DIVERGENT is the ledger of confirmed, still-open divergences — +# keyed by the probe EXPRESSION. Red on a NEW divergence (host drift) and +# red on a HEALED one (fix landed: delete the entry, locking in parity). +# +# Method note (finding refinement, 2026-07-04): comparing raw K.eval +# JS-boundary values shows float-display divergences (0.3 vs +# 0.30000000000000004) that DISAPPEAR under guest-level (sx-serialize …) — +# the F-1 float-display class is a JS-boundary artifact, not a kernel +# serialization divergence. This battery compares guest serialization. +set -uo pipefail +cd "$(dirname "$0")/.." + +SERVER=hosts/ocaml/_build/default/bin/sx_server.exe +WASM=shared/static/wasm/sx_browser.bc.wasm.js +PROBES=spec/tests/differential-probes.txt +[[ -x "$SERVER" ]] || { echo "SKIP: $SERVER not built" >&2; exit 2; } +[[ -f "$WASM" ]] || { echo "SKIP: $WASM missing" >&2; exit 2; } + +# --- KNOWN_DIVERGENT ledger (verified live 2026-07-04) ------------------- +# F-3/K53: bare sx_server's `apply` does NOT spread its argument list — +# (apply + (list 1 2 3)) errors "Expected number, got list"; (apply str l) +# returns the serialized list as one string. The WASM kernel spreads +# correctly. The test runner masks this with its own apply (F-7 class). +declare -A KNOWN_DIVERGENT +KNOWN_DIVERGENT['(apply + (list 1 2 3))']="F-3: native apply does not spread" +KNOWN_DIVERGENT['(apply max (list 1 5 2))']="F-3: native apply does not spread" +KNOWN_DIVERGENT['(apply str (list "a" "b"))']="F-3: native apply does not spread" + +native=$(mktemp); wasm=$(mktemp) + +python3 - "$SERVER" "$PROBES" > "$native" <<'PY' +import json, subprocess, sys +server, probefile = sys.argv[1], sys.argv[2] +probes = [l.strip() for l in open(probefile) if l.strip() and not l.startswith('#')] +inp = [] +for i, p in enumerate(probes): + inp.append(f"(epoch {i+1})") + inp.append(f"(eval {json.dumps(p)})") +out = subprocess.run(["timeout", "120", server], input="\n".join(inp) + "\n", + capture_output=True, text=True).stdout +res, cur = {}, None +for l in out.splitlines(): + if l.startswith("(ok-len "): + cur = int(l.split()[1]); res[cur] = None + elif l.startswith("(error "): + idx = int(l.split()[1]); res[idx] = "ERROR"; cur = None + elif cur is not None and res.get(cur) is None: + res[cur] = l; cur = None +for i, p in enumerate(probes): + print(f"PROBE {i+1} {res.get(i+1, '')}") +PY + +timeout 300 node hosts/ocaml/browser/eval_wasm_probes.js "$PROBES" > "$wasm" 2>/dev/null + +pass=0; fail=0; i=0 +while IFS= read -r expr; do + [[ -z "$expr" || "$expr" == \#* ]] && continue + i=$((i+1)) + a=$(sed -n "s/^PROBE $i //p" "$native") + b=$(sed -n "s/^PROBE $i //p" "$wasm") + known="${KNOWN_DIVERGENT[$expr]:-}" + if [[ "$a" == "$b" ]]; then + if [[ -n "$known" ]]; then + echo "RED: $expr — KNOWN_DIVERGENT now AGREES ($known); delete from ledger" + fail=$((fail+1)) + else + pass=$((pass+1)) + fi + else + if [[ -n "$known" ]]; then + echo "KNOWN-DIVERGENT: $expr ($known)" + pass=$((pass+1)) + else + echo "RED: $expr" + echo " native: $a" + echo " wasm: $b" + fail=$((fail+1)) + fi + fi +done < <(grep -v '^\s*#' "$PROBES" | grep -v '^\s*$') + +rm -f "$native" "$wasm" +echo +echo "differential: $i probes, $pass in agreement/ledgered, $fail red" +[[ $fail -eq 0 ]] diff --git a/spec/tests/differential-probes.txt b/spec/tests/differential-probes.txt new file mode 100644 index 00000000..45a90c9a --- /dev/null +++ b/spec/tests/differential-probes.txt @@ -0,0 +1,65 @@ +# W14/F8 differential probe corpus — one expression per line. +# Same expression evaluated on the native server (epoch protocol) and the +# shipped WASM kernel (K.eval); scripts/test-differential.sh diffs results. +# Classes drawn from review findings F-1 (integer arithmetic), F-3 (apply, +# dict key order), F-8 itemization, S-4 (float printing), K18/K53. +# integers & display (F-1) +(+ 1 2) +(- 10 3) +(* 6 7) +(/ 4 2) +(/ 1 2) +(/ 10 4) +(quotient 13 4) +(mod 10 3) +# float printing (S-4) +(+ 0.1 0.2) +(* 3 0.1) +(/ 1 3) +(str 0.3) +(str 1.5) +(str 2.0) +# overflow / expt (K18) +(expt 2 10) +(expt 2 62) +(expt 2 100) +(+ 9223372036854775807 1) +# apply (F-3) +(apply + (list 1 2 3)) +(apply max (list 1 5 2)) +(apply str (list "a" "b")) +# dict key order (F-3) +(keys {:b 2 :a 1 :c 3}) +(str {:b 2 :a 1}) +(vals {:b 2 :a 1}) +# strings +(split "a,b,c" ",") +(split "a--b" "--") +(len "héllo") +(upcase "abc") +(str (char-code "A")) +(substring "hello" 1 3) +(join "-" (list "x" "y")) +# equality & comparison +(= 1 1.0) +(= (list 1 2) (list 1 2)) +(equal? (list 1) (list 1)) +(< 1 2 3) +# collections +(sort (list 3 1 2)) +(range 3) +(reverse (list 1 2 3)) +(nth (list 10 20 30) 1) +(contains? {:a 1} :a) +(get {:a 1} :zz 99) +# quasiquote / quote +(quasiquote (1 (unquote (+ 1 1)) 3)) +(str (quote sym)) +# conditionals & special forms +(if true 1 2) +(and 1 2 3) +(or nil false 7) +(do ((fn (x) x) 5) 99) +# error normalization (both sides should error) +(undefined-symbol-xyz) +(/ 1 0)