W14: F2 WASM corpus runner — spec tests on the SHIPPED browser kernel
conformance.md F-2: no runner fed spec/tests through the shipped sx_browser.bc.wasm.js — the F-1/F-3 native/WASM divergences existed undetected because of exactly this gap. Add hosts/ocaml/browser/run_wasm_corpus.js: boots the shipped kernel headless in Node (stub block + module preload mirroring test_wasm_native.js, the blessed boot path), registers the test-framework hooks, runs ONE test file per process and emits a parseable CORPUS-RESULT line — process isolation means a hanging file is killed by the driver's per-file timeout without ending the sweep. Add scripts/test-wasm-corpus.sh: sweeps spec/tests, applies a SKIP / KNOWN_FAIL ledger (green-flip on a KNOWN_FAIL fails the run so the ledger cannot rot), gates on everything else. Empirical baseline (2026-07-04): 83 files, 80 fully green, 5192 passes, zero test failures on the shipped kernel — including test-gate-pins (29/29). KNOWN_FAIL: test-hash-table/test-r7rs/test-sets hit an opaque jsoo load-error mid-file (22/87/30 tests pass first). Full sweep ~13 min; sx-build-all.sh wiring deferred to the D3 gate-definition decision. Test-only: no semantics edits, no push. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
82
scripts/test-wasm-corpus.sh
Executable file
82
scripts/test-wasm-corpus.sh
Executable file
@@ -0,0 +1,82 @@
|
||||
#!/bin/bash
|
||||
# test-wasm-corpus.sh — W14/F2: sweep the spec test corpus through the
|
||||
# SHIPPED browser kernel (sx_browser.bc.wasm.js) headless in Node.
|
||||
#
|
||||
# The review (conformance.md F-2) found the shipped browser artifact never
|
||||
# runs the corpus — F-1/F-3 native/WASM divergences existed undetected.
|
||||
# Each file runs in its OWN node process via run_wasm_corpus.js (a hang is
|
||||
# killed by per-file timeout without ending the sweep).
|
||||
#
|
||||
# The SKIP list documents files that structurally cannot run on the browser
|
||||
# kernel (runner-only bindings, native-only machinery) — the F-5/F-6/F-10
|
||||
# "one-host-gated" theme, recorded honestly per file with the reason.
|
||||
# KNOWN_FAIL documents files that RUN but currently have failing tests on
|
||||
# the shipped kernel (host divergence, F-1/F-3 class): they execute and
|
||||
# report, but don't gate. Everything else must be GREEN — exit 1 otherwise;
|
||||
# a KNOWN_FAIL going green also fails (ledger must be updated).
|
||||
#
|
||||
# Usage: bash scripts/test-wasm-corpus.sh [file.sx ...]
|
||||
set -uo pipefail
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
RUNNER=hosts/ocaml/browser/run_wasm_corpus.js
|
||||
KERNEL=shared/static/wasm/sx_browser.bc.wasm.js
|
||||
[[ -f "$KERNEL" ]] || { echo "SKIP: $KERNEL missing (run sx-build-all first)" >&2; exit 2; }
|
||||
|
||||
# --- classification (empirical sweep 2026-07-04; see sx-gate-loop.md) ---
|
||||
# Sweep baseline: 83 files, 80 fully green, 5192 passes, 0 test failures.
|
||||
# The shipped kernel even provides the CEK driver bindings (make-env,
|
||||
# cek-step-loop, ...) — broader than a bare sx_server.
|
||||
declare -A SKIP KNOWN_FAIL
|
||||
skip() { SKIP[$1]=$2; }
|
||||
known() { KNOWN_FAIL[$1]=$2; }
|
||||
# Partial load-errors: the kernel throws mid-file (opaque jsoo exception,
|
||||
# message "undefined"); tests before the failing form pass and report.
|
||||
known test-hash-table.sx "partial: 22 pass then load-error mid-file"
|
||||
known test-r7rs.sx "partial: 87 pass then load-error mid-file"
|
||||
known test-sets.sx "partial: 30 pass then load-error mid-file"
|
||||
|
||||
pass_total=0; fail_total=0; red=0; files=0
|
||||
declare -a targets
|
||||
if [[ $# -gt 0 ]]; then targets=("$@");
|
||||
else for f in spec/tests/test-*.sx; do
|
||||
[[ "$(basename "$f")" == "test-framework.sx" ]] && continue
|
||||
targets+=("$f")
|
||||
done; fi
|
||||
|
||||
for f in "${targets[@]}"; do
|
||||
base=$(basename "$f")
|
||||
if [[ -n "${SKIP[$base]:-}" ]]; then
|
||||
echo "SKIP: $base — ${SKIP[$base]}"
|
||||
continue
|
||||
fi
|
||||
files=$((files+1))
|
||||
line=$(timeout 120 node "$RUNNER" "$f" 2>/dev/null | grep '^CORPUS-RESULT' || true)
|
||||
if [[ -z "$line" ]]; then
|
||||
echo "RED: $base — timeout or crash (no CORPUS-RESULT)"
|
||||
red=$((red+1)); continue
|
||||
fi
|
||||
p=$(sed -n 's/.*pass=\([0-9]*\).*/\1/p' <<<"$line")
|
||||
fl=$(sed -n 's/.*fail=\([0-9]*\).*/\1/p' <<<"$line")
|
||||
st=$(sed -n 's/.*status=\([a-z-]*\).*/\1/p' <<<"$line")
|
||||
pass_total=$((pass_total+p)); fail_total=$((fail_total+fl))
|
||||
if [[ -n "${KNOWN_FAIL[$base]:-}" ]]; then
|
||||
if [[ "$fl" -eq 0 && "$st" == "ok" ]]; then
|
||||
echo "RED: $base — KNOWN_FAIL is now GREEN (${KNOWN_FAIL[$base]}); update the ledger"
|
||||
red=$((red+1))
|
||||
else
|
||||
echo "KNOWN-FAIL: $base pass=$p fail=$fl ($( echo "${KNOWN_FAIL[$base]}" ))"
|
||||
fi
|
||||
continue
|
||||
fi
|
||||
if [[ "$st" != "ok" || "$fl" -ne 0 ]]; then
|
||||
echo "RED: $base pass=$p fail=$fl status=$st"
|
||||
red=$((red+1))
|
||||
else
|
||||
echo "OK: $base pass=$p"
|
||||
fi
|
||||
done
|
||||
|
||||
echo
|
||||
echo "wasm-corpus: $files files run, $pass_total passed, $fail_total failed, $red red"
|
||||
[[ $red -eq 0 ]]
|
||||
Reference in New Issue
Block a user