W14: F8 cross-host differential battery (test-only) — CHECKLIST COMPLETE
Committed replacement for the review's ephemeral 130-probe corpus: spec/tests/differential-probes.txt (49 probes: F-1 int/float display, K18 overflow, F-3 apply + dict order, S-4 float printing, strings, collections, special forms, error normalization) evaluated on the native server (epoch protocol printer) and the SHIPPED WASM kernel (eval_wasm_probes.js via guest sx-serialize), diffed by scripts/test-differential.sh with a KNOWN_DIVERGENT heal-detecting ledger. Result: 46/49 agree. All 3 divergences share one root cause, verified live: bare sx_server's `apply` does not spread its argument list — (apply + (list 1 2 3)) errors "Expected number, got list", (apply str l) returns the serialized list; the WASM kernel spreads correctly and the test runner masks the bug with its own apply binding (F-7 class). Finding refinement: F-1's float-display divergence (0.3 vs 0.30000000000000004) is a K.eval JS-boundary artifact — guest-serialized output agrees across hosts; the battery therefore compares guest serialization. This completes the W14 checklist: 7 pin suites, 6 gate scripts/runners, 2 harness capabilities, C9 label cleanup, adapter-dom render coverage. Test-only: no semantics edits, no push. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
68
hosts/ocaml/browser/eval_wasm_probes.js
Executable file
68
hosts/ocaml/browser/eval_wasm_probes.js
Executable file
@@ -0,0 +1,68 @@
|
||||
#!/usr/bin/env node
|
||||
// eval_wasm_probes.js — W14/F8: evaluate a file of probe expressions (one
|
||||
// per line, '#'-comments allowed) on the SHIPPED browser kernel and print
|
||||
// PROBE <n> <result-or-ERROR>
|
||||
// per line, for diffing against the native server (scripts/test-differential.sh).
|
||||
// Boot stubs mirror test_wasm_native.js / run_wasm_corpus.js.
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
|
||||
const PROJECT_ROOT = path.resolve(__dirname, '../../..');
|
||||
const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm');
|
||||
|
||||
const probeFile = process.argv[2];
|
||||
if (!probeFile) { console.error('usage: eval_wasm_probes.js <probes.txt>'); process.exit(2); }
|
||||
|
||||
global.window = global;
|
||||
global.document = {
|
||||
createElement: () => ({ style: {}, setAttribute() {}, appendChild() {}, children: [] }),
|
||||
createDocumentFragment: () => ({ appendChild() {}, children: [], childNodes: [] }),
|
||||
head: { appendChild() {} }, body: { appendChild() {} },
|
||||
querySelector: () => null, querySelectorAll: () => [],
|
||||
createTextNode: s => ({ textContent: s }), addEventListener() {},
|
||||
createComment: s => ({ textContent: s || '' }),
|
||||
getElementsByTagName: () => [],
|
||||
};
|
||||
global.localStorage = { getItem: () => null, setItem() {}, removeItem() {} };
|
||||
global.CustomEvent = class { constructor(n, o) { this.type = n; this.detail = (o || {}).detail || {}; } };
|
||||
global.MutationObserver = class { observe() {} disconnect() {} };
|
||||
global.requestIdleCallback = fn => setTimeout(fn, 0);
|
||||
global.matchMedia = () => ({ matches: false });
|
||||
global.navigator = { serviceWorker: { register: () => Promise.resolve() } };
|
||||
global.location = { href: '', pathname: '/', hostname: 'localhost' };
|
||||
global.history = { pushState() {}, replaceState() {} };
|
||||
global.fetch = () => Promise.resolve({ ok: true, text: () => Promise.resolve('') });
|
||||
|
||||
async function main() {
|
||||
require(path.join(WASM_DIR, 'sx_browser.bc.wasm.js'));
|
||||
const K = await new Promise((resolve, reject) => {
|
||||
let tries = 0;
|
||||
const poll = setInterval(() => {
|
||||
if (globalThis.SxKernel) { clearInterval(poll); resolve(globalThis.SxKernel); }
|
||||
else if (++tries > 200) { clearInterval(poll); reject(new Error('SxKernel not found')); }
|
||||
}, 50);
|
||||
});
|
||||
|
||||
const lines = fs.readFileSync(probeFile, 'utf8').split('\n');
|
||||
let n = 0;
|
||||
for (const raw of lines) {
|
||||
const line = raw.trim();
|
||||
if (!line || line.startsWith('#')) continue;
|
||||
n++;
|
||||
let out;
|
||||
try {
|
||||
// Serialize through the kernel's own printer so both hosts emit SX
|
||||
// text (K.eval returns raw JS values otherwise — [object Object]).
|
||||
const r = K.eval(`(sx-serialize ${line})`);
|
||||
out = (typeof r === 'string') ? r : String(r);
|
||||
} catch (e) {
|
||||
out = 'ERROR';
|
||||
}
|
||||
// errors normalized: kernel returns "Error: ..." strings for eval errors
|
||||
if (typeof out === 'string' && out.startsWith('Error')) out = 'ERROR';
|
||||
console.log(`PROBE ${n} ${out.replace(/\n/g, '\\n')}`);
|
||||
}
|
||||
}
|
||||
|
||||
main().catch(e => { console.error('FATAL:', e.message); process.exit(1); });
|
||||
@@ -104,10 +104,40 @@ Pin each confirmed-and-fixed finding with a minimal repro. Add suites to
|
||||
baseline identities updated in the same commit
|
||||
|
||||
### F. Differential battery
|
||||
- [ ] F8 — cross-host differential battery (same source, all hosts agree)
|
||||
- [x] F8 — cross-host differential battery: `spec/tests/differential-probes.txt`
|
||||
(49 probes) × native server vs shipped WASM kernel via
|
||||
`scripts/test-differential.sh` + `eval_wasm_probes.js`. 46 agree,
|
||||
3 ledgered KNOWN_DIVERGENT (F-3: bare-server `apply` does not spread —
|
||||
runner masks it, F-7 class). Refinement: the F-1 float-display
|
||||
divergence is a K.eval JS-boundary artifact — guest `sx-serialize`
|
||||
output AGREES across hosts
|
||||
|
||||
**CHECKLIST COMPLETE 2026-07-04** — all W14 items delivered. Open handoffs:
|
||||
sx_render.ml regen drift (Blocked, hosts lane), adapter-dom depth tests,
|
||||
3 WASM load-error bisects (hash-table/r7rs/sets), CI wiring of the four
|
||||
gate scripts (D3 maintainer decision).
|
||||
|
||||
## Progress log (newest first)
|
||||
|
||||
- 2026-07-04 — **F8 differential battery — CHECKLIST COMPLETE**. Committed
|
||||
replacement for the review's ephemeral 130-probe corpus:
|
||||
`spec/tests/differential-probes.txt` (49 probes across F-1 int/float
|
||||
display, K18 overflow, F-3 apply + dict order, S-4 float printing,
|
||||
strings/collections/special forms/error cases) evaluated on the native
|
||||
server (epoch protocol) and the shipped WASM kernel
|
||||
(`eval_wasm_probes.js`, guest `sx-serialize`), diffed by
|
||||
`scripts/test-differential.sh` with a KNOWN_DIVERGENT ledger (heal →
|
||||
red → delete entry). Result: 46/49 agree; 3 divergences, all one root
|
||||
cause — **bare sx_server's `apply` does not spread its arg list**
|
||||
((apply + (list 1 2 3)) → "Expected number, got list"; WASM spreads
|
||||
correctly; the test runner masks it with its own apply — F-7 class).
|
||||
Finding refinement: F-1's float-display divergence (0.3 vs 0.3000…4) is
|
||||
purely a K.eval JS-boundary artifact — guest-serialized output agrees.
|
||||
W14 delivered: 7 pin suites (spec/tests/test-gate-pins.sx, 29 tests),
|
||||
4 gate scripts (protocol-gate 11, env-parity 7, harness-parity 12,
|
||||
wasm-corpus 83-file, suite-baseline 273-pin, differential 49-probe),
|
||||
2 harness capabilities (C22 log-first, C21 perform-mode), C9 label
|
||||
cleanup, adapter-dom render coverage. Test-only throughout.
|
||||
- 2026-07-04 — **C9 empty-suite labels (item E.3) — section E COMPLETE**.
|
||||
The sweep found the defect much wider than the finding: SIX files carried
|
||||
suite-less top-level deftests (test-chars 43, test-import-bind 14,
|
||||
|
||||
94
scripts/test-differential.sh
Executable file
94
scripts/test-differential.sh
Executable file
@@ -0,0 +1,94 @@
|
||||
#!/bin/bash
|
||||
# test-differential.sh — W14/F8: cross-host differential battery.
|
||||
#
|
||||
# Evaluates every expression in spec/tests/differential-probes.txt on:
|
||||
# A) the native server (sx_server.exe, epoch protocol) — its printer
|
||||
# B) the SHIPPED browser kernel (eval_wasm_probes.js, guest sx-serialize)
|
||||
# and diffs the outputs. The review's original 130-probe corpus was
|
||||
# ephemeral (F-8); this is the committed replacement.
|
||||
#
|
||||
# KNOWN_DIVERGENT is the ledger of confirmed, still-open divergences —
|
||||
# keyed by the probe EXPRESSION. Red on a NEW divergence (host drift) and
|
||||
# red on a HEALED one (fix landed: delete the entry, locking in parity).
|
||||
#
|
||||
# Method note (finding refinement, 2026-07-04): comparing raw K.eval
|
||||
# JS-boundary values shows float-display divergences (0.3 vs
|
||||
# 0.30000000000000004) that DISAPPEAR under guest-level (sx-serialize …) —
|
||||
# the F-1 float-display class is a JS-boundary artifact, not a kernel
|
||||
# serialization divergence. This battery compares guest serialization.
|
||||
set -uo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
|
||||
WASM=shared/static/wasm/sx_browser.bc.wasm.js
|
||||
PROBES=spec/tests/differential-probes.txt
|
||||
[[ -x "$SERVER" ]] || { echo "SKIP: $SERVER not built" >&2; exit 2; }
|
||||
[[ -f "$WASM" ]] || { echo "SKIP: $WASM missing" >&2; exit 2; }
|
||||
|
||||
# --- KNOWN_DIVERGENT ledger (verified live 2026-07-04) -------------------
|
||||
# F-3/K53: bare sx_server's `apply` does NOT spread its argument list —
|
||||
# (apply + (list 1 2 3)) errors "Expected number, got list"; (apply str l)
|
||||
# returns the serialized list as one string. The WASM kernel spreads
|
||||
# correctly. The test runner masks this with its own apply (F-7 class).
|
||||
declare -A KNOWN_DIVERGENT
|
||||
KNOWN_DIVERGENT['(apply + (list 1 2 3))']="F-3: native apply does not spread"
|
||||
KNOWN_DIVERGENT['(apply max (list 1 5 2))']="F-3: native apply does not spread"
|
||||
KNOWN_DIVERGENT['(apply str (list "a" "b"))']="F-3: native apply does not spread"
|
||||
|
||||
native=$(mktemp); wasm=$(mktemp)
|
||||
|
||||
python3 - "$SERVER" "$PROBES" > "$native" <<'PY'
|
||||
import json, subprocess, sys
|
||||
server, probefile = sys.argv[1], sys.argv[2]
|
||||
probes = [l.strip() for l in open(probefile) if l.strip() and not l.startswith('#')]
|
||||
inp = []
|
||||
for i, p in enumerate(probes):
|
||||
inp.append(f"(epoch {i+1})")
|
||||
inp.append(f"(eval {json.dumps(p)})")
|
||||
out = subprocess.run(["timeout", "120", server], input="\n".join(inp) + "\n",
|
||||
capture_output=True, text=True).stdout
|
||||
res, cur = {}, None
|
||||
for l in out.splitlines():
|
||||
if l.startswith("(ok-len "):
|
||||
cur = int(l.split()[1]); res[cur] = None
|
||||
elif l.startswith("(error "):
|
||||
idx = int(l.split()[1]); res[idx] = "ERROR"; cur = None
|
||||
elif cur is not None and res.get(cur) is None:
|
||||
res[cur] = l; cur = None
|
||||
for i, p in enumerate(probes):
|
||||
print(f"PROBE {i+1} {res.get(i+1, '<none>')}")
|
||||
PY
|
||||
|
||||
timeout 300 node hosts/ocaml/browser/eval_wasm_probes.js "$PROBES" > "$wasm" 2>/dev/null
|
||||
|
||||
pass=0; fail=0; i=0
|
||||
while IFS= read -r expr; do
|
||||
[[ -z "$expr" || "$expr" == \#* ]] && continue
|
||||
i=$((i+1))
|
||||
a=$(sed -n "s/^PROBE $i //p" "$native")
|
||||
b=$(sed -n "s/^PROBE $i //p" "$wasm")
|
||||
known="${KNOWN_DIVERGENT[$expr]:-}"
|
||||
if [[ "$a" == "$b" ]]; then
|
||||
if [[ -n "$known" ]]; then
|
||||
echo "RED: $expr — KNOWN_DIVERGENT now AGREES ($known); delete from ledger"
|
||||
fail=$((fail+1))
|
||||
else
|
||||
pass=$((pass+1))
|
||||
fi
|
||||
else
|
||||
if [[ -n "$known" ]]; then
|
||||
echo "KNOWN-DIVERGENT: $expr ($known)"
|
||||
pass=$((pass+1))
|
||||
else
|
||||
echo "RED: $expr"
|
||||
echo " native: $a"
|
||||
echo " wasm: $b"
|
||||
fail=$((fail+1))
|
||||
fi
|
||||
fi
|
||||
done < <(grep -v '^\s*#' "$PROBES" | grep -v '^\s*$')
|
||||
|
||||
rm -f "$native" "$wasm"
|
||||
echo
|
||||
echo "differential: $i probes, $pass in agreement/ledgered, $fail red"
|
||||
[[ $fail -eq 0 ]]
|
||||
65
spec/tests/differential-probes.txt
Normal file
65
spec/tests/differential-probes.txt
Normal file
@@ -0,0 +1,65 @@
|
||||
# W14/F8 differential probe corpus — one expression per line.
|
||||
# Same expression evaluated on the native server (epoch protocol) and the
|
||||
# shipped WASM kernel (K.eval); scripts/test-differential.sh diffs results.
|
||||
# Classes drawn from review findings F-1 (integer arithmetic), F-3 (apply,
|
||||
# dict key order), F-8 itemization, S-4 (float printing), K18/K53.
|
||||
# integers & display (F-1)
|
||||
(+ 1 2)
|
||||
(- 10 3)
|
||||
(* 6 7)
|
||||
(/ 4 2)
|
||||
(/ 1 2)
|
||||
(/ 10 4)
|
||||
(quotient 13 4)
|
||||
(mod 10 3)
|
||||
# float printing (S-4)
|
||||
(+ 0.1 0.2)
|
||||
(* 3 0.1)
|
||||
(/ 1 3)
|
||||
(str 0.3)
|
||||
(str 1.5)
|
||||
(str 2.0)
|
||||
# overflow / expt (K18)
|
||||
(expt 2 10)
|
||||
(expt 2 62)
|
||||
(expt 2 100)
|
||||
(+ 9223372036854775807 1)
|
||||
# apply (F-3)
|
||||
(apply + (list 1 2 3))
|
||||
(apply max (list 1 5 2))
|
||||
(apply str (list "a" "b"))
|
||||
# dict key order (F-3)
|
||||
(keys {:b 2 :a 1 :c 3})
|
||||
(str {:b 2 :a 1})
|
||||
(vals {:b 2 :a 1})
|
||||
# strings
|
||||
(split "a,b,c" ",")
|
||||
(split "a--b" "--")
|
||||
(len "héllo")
|
||||
(upcase "abc")
|
||||
(str (char-code "A"))
|
||||
(substring "hello" 1 3)
|
||||
(join "-" (list "x" "y"))
|
||||
# equality & comparison
|
||||
(= 1 1.0)
|
||||
(= (list 1 2) (list 1 2))
|
||||
(equal? (list 1) (list 1))
|
||||
(< 1 2 3)
|
||||
# collections
|
||||
(sort (list 3 1 2))
|
||||
(range 3)
|
||||
(reverse (list 1 2 3))
|
||||
(nth (list 10 20 30) 1)
|
||||
(contains? {:a 1} :a)
|
||||
(get {:a 1} :zz 99)
|
||||
# quasiquote / quote
|
||||
(quasiquote (1 (unquote (+ 1 1)) 3))
|
||||
(str (quote sym))
|
||||
# conditionals & special forms
|
||||
(if true 1 2)
|
||||
(and 1 2 3)
|
||||
(or nil false 7)
|
||||
(do ((fn (x) x) 5) 99)
|
||||
# error normalization (both sides should error)
|
||||
(undefined-symbol-xyz)
|
||||
(/ 1 0)
|
||||
Reference in New Issue
Block a user