diff --git a/hosts/ocaml/browser/run_wasm_corpus.js b/hosts/ocaml/browser/run_wasm_corpus.js new file mode 100755 index 00000000..92c6141f --- /dev/null +++ b/hosts/ocaml/browser/run_wasm_corpus.js @@ -0,0 +1,170 @@ +#!/usr/bin/env node +// run_wasm_corpus.js — W14/F2: run spec-test files through the SHIPPED +// browser kernel (sx_browser.bc.wasm.js), headless in Node. +// +// The review (conformance.md F-2) found no runner feeds spec/tests through +// the shipped browser artifact — F-1/F-3 host divergences existed +// undetected precisely because of that. This runs ONE test file per +// invocation (process isolation: a hanging file is killed by the driver's +// timeout without taking down the sweep) and prints a parseable summary: +// CORPUS-RESULT pass= fail= status=ok|load-error +// +// Usage: node hosts/ocaml/browser/run_wasm_corpus.js spec/tests/test-eval.sx +// Driver: scripts/test-wasm-corpus.sh (sweeps the corpus, applies skips). +// +// Boot stubs and module preload mirror test_wasm_native.js (the blessed +// boot path for the shipped kernel). + +const fs = require('fs'); +const path = require('path'); + +const PROJECT_ROOT = path.resolve(__dirname, '../../..'); +const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm'); + +const target = process.argv[2]; +if (!target) { console.error('usage: run_wasm_corpus.js '); process.exit(2); } + +// --- DOM stubs (as test_wasm_native.js) --- +function makeElement(tag) { + const el = { + tagName: tag, _attrs: {}, _children: [], style: {}, + childNodes: [], children: [], textContent: '', + nodeType: 1, + setAttribute(k, v) { el._attrs[k] = String(v); }, + getAttribute(k) { return el._attrs[k] || null; }, + removeAttribute(k) { delete el._attrs[k]; }, + appendChild(c) { el._children.push(c); el.childNodes.push(c); el.children.push(c); return c; }, + insertBefore(c) { el._children.push(c); el.childNodes.push(c); el.children.push(c); return c; }, + removeChild(c) { return c; }, + replaceChild(n) { return n; }, + cloneNode() { return makeElement(tag); }, + addEventListener() {}, removeEventListener() {}, dispatchEvent() {}, + get innerHTML() { + return el._children.map(c => { + if (c._isText) return c.textContent || ''; + if (c._isComment) return ''; + return c.outerHTML || ''; + }).join(''); + }, + set innerHTML(v) { el._children = []; el.childNodes = []; el.children = []; }, + get outerHTML() { + let s = '<' + tag; + for (const k of Object.keys(el._attrs).sort()) s += ` ${k}="${el._attrs[k]}"`; + s += '>'; + if (['br', 'hr', 'img', 'input', 'meta', 'link'].includes(tag)) return s; + return s + el.innerHTML + ''; + }, + dataset: new Proxy({}, { + get(_, k) { return el._attrs['data-' + k.replace(/[A-Z]/g, c => '-' + c.toLowerCase())]; }, + set(_, k, v) { el._attrs['data-' + k.replace(/[A-Z]/g, c => '-' + c.toLowerCase())] = v; return true; } + }), + querySelectorAll() { return []; }, + querySelector() { return null; }, + }; + return el; +} + +global.window = global; +global.document = { + createElement: makeElement, + createDocumentFragment() { return makeElement('fragment'); }, + head: makeElement('head'), body: makeElement('body'), + querySelector() { return null; }, querySelectorAll() { return []; }, + createTextNode(s) { return { _isText: true, textContent: String(s), nodeType: 3 }; }, + addEventListener() {}, + createComment(s) { return { _isComment: true, textContent: s || '', nodeType: 8 }; }, + getElementsByTagName() { return []; }, +}; +global.localStorage = { getItem() { return null; }, setItem() {}, removeItem() {} }; +global.CustomEvent = class { constructor(n, o) { this.type = n; this.detail = (o || {}).detail || {}; } }; +global.MutationObserver = class { observe() {} disconnect() {} }; +global.requestIdleCallback = fn => setTimeout(fn, 0); +global.matchMedia = () => ({ matches: false }); +global.navigator = { serviceWorker: { register() { return Promise.resolve(); } } }; +global.location = { href: '', pathname: '/', hostname: 'localhost' }; +global.history = { pushState() {}, replaceState() {} }; +global.fetch = () => Promise.resolve({ ok: true, text() { return Promise.resolve(''); } }); +global.XMLHttpRequest = class { open() {} send() {} }; + +async function main() { + require(path.join(WASM_DIR, 'sx_browser.bc.wasm.js')); + const K = await new Promise((resolve, reject) => { + let tries = 0; + const poll = setInterval(() => { + if (globalThis.SxKernel) { clearInterval(poll); resolve(globalThis.SxKernel); } + else if (++tries > 200) { clearInterval(poll); reject(new Error('SxKernel not found after 10s')); } + }, 50); + }); + + // --- 8 FFI host primitives (as test_wasm_native.js) --- + K.registerNative('host-global', args => (args[0] in globalThis) ? globalThis[args[0]] : null); + K.registerNative('host-get', args => { + const [obj, prop] = args; + if (obj == null) return null; + const v = obj[prop]; + return v === undefined ? null : v; + }); + K.registerNative('host-set!', args => { if (args[0] != null) args[0][args[1]] = args[2]; return args[2]; }); + K.registerNative('host-call', args => { + const [obj, method, ...rest] = args; + if (obj == null || typeof obj[method] !== 'function') return null; + const r = obj[method].apply(obj, rest); + return r === undefined ? null : r; + }); + K.registerNative('host-new', args => new (Function.prototype.bind.apply(args[0], [null, ...args.slice(1)]))); + K.registerNative('host-callback', args => function () { return K.callFn(args[0], Array.from(arguments)); }); + K.registerNative('host-typeof', args => typeof args[0]); + K.registerNative('host-await', args => args[0]); + + K.eval('(define SX_VERSION "wasm-corpus-1.0")'); + K.eval('(define SX_ENGINE "ocaml-vm-wasm-corpus")'); + K.eval('(define parse sx-parse)'); + K.eval('(define serialize sx-serialize)'); + + // --- Web stack modules (source form; bytecode covered elsewhere) --- + const sxDir = path.join(WASM_DIR, 'sx'); + const modules = [ + 'render', 'core-signals', 'signals', 'deps', 'router', 'page-helpers', 'freeze', + 'bytecode', 'compiler', 'vm', 'dom', 'browser', + 'adapter-html', 'adapter-sx', 'adapter-dom', + 'boot-helpers', 'hypersx', + 'harness', 'harness-reactive', 'harness-web', + 'engine', 'orchestration', 'boot', + ]; + if (K.beginModuleLoad) K.beginModuleLoad(); + for (const mod of modules) { + K.load(fs.readFileSync(path.join(sxDir, mod + '.sx'), 'utf8')); + } + if (K.endModuleLoad) K.endModuleLoad(); + + // --- Test framework hooks --- + let pass = 0, fail = 0; + const suiteStack = []; + K.registerNative('report-pass', () => { pass++; return null; }); + K.registerNative('report-fail', args => { + fail++; + const suitePath = suiteStack.join(' > '); + console.error(`FAIL: ${suitePath ? suitePath + ' > ' : ''}${args[0]}\n ${args[1]}`); + return null; + }); + K.registerNative('push-suite', args => { suiteStack.push(args[0]); return null; }); + K.registerNative('pop-suite', () => { suiteStack.pop(); return null; }); + K.eval('(define test-allowed? (fn (name) true))'); + K.eval('(define try-call (fn (thunk) (let ((result (cek-try thunk (fn (err) err)))) (if (and (= (type-of result) "string") (starts-with? result "Error")) {"ok" false "error" result} {"ok" true "error" nil}))))'); + + K.load(fs.readFileSync(path.join(PROJECT_ROOT, 'spec/tests/test-framework.sx'), 'utf8')); + + // --- Run the target file --- + const rel = path.relative(PROJECT_ROOT, path.resolve(target)); + let status = 'ok'; + try { + K.load(fs.readFileSync(path.resolve(target), 'utf8')); + } catch (e) { + status = 'load-error'; + console.error(`LOAD-ERROR: ${rel}: ${e.message}`); + } + console.log(`CORPUS-RESULT ${rel} pass=${pass} fail=${fail} status=${status}`); + process.exit(status !== 'ok' || fail > 0 ? 1 : 0); +} + +main().catch(e => { console.error('FATAL:', e.message); process.exit(1); }); diff --git a/plans/agent-briefings/sx-gate-loop.md b/plans/agent-briefings/sx-gate-loop.md index 29c1a97e..5598673c 100644 --- a/plans/agent-briefings/sx-gate-loop.md +++ b/plans/agent-briefings/sx-gate-loop.md @@ -82,7 +82,13 @@ Pin each confirmed-and-fixed finding with a minimal repro. Add suites to reactive attrs, hydration cursor) ### D. WASM corpus runner -- [ ] F2 — promote conformance's `run_wasm.js` prototype into CI +- [x] F2 — BUILT `hosts/ocaml/browser/run_wasm_corpus.js` (one file per + node process, shipped-kernel boot per test_wasm_native.js) + + `scripts/test-wasm-corpus.sh` sweep driver with SKIP/KNOWN_FAIL + ledger. Baseline: 83 files, 80 fully green, 5192 passes, 0 test + failures; 3 partial load-errors (hash-table/r7rs/sets, opaque jsoo + exception mid-file). Full sweep ~13 min — wiring into + sx-build-all.sh left as maintainer call (gate definition D3) ### E. Epoch-loop protocol fuzz + skip-list - [ ] C3/C4/C5/C6/C7 — epoch protocol fuzz suite @@ -94,6 +100,21 @@ Pin each confirmed-and-fixed finding with a minimal repro. Add suites to ## Progress log (newest first) +- 2026-07-04 — **F2 WASM corpus runner (section D COMPLETE)**. The review's + headline conformance gap: no runner ever fed spec/tests through the + SHIPPED browser artifact (F-1/F-3 divergences existed undetected). Built + `run_wasm_corpus.js` (boots sx_browser.bc.wasm.js headless in Node with + the test_wasm_native.js stub block, loads the 23 web-stack modules, + registers framework hooks, runs ONE file per process → parseable + `CORPUS-RESULT` line; process isolation means a hung file can't kill the + sweep) + `scripts/test-wasm-corpus.sh` (sweep driver, SKIP/KNOWN_FAIL + ledger with green-flip detection). **Empirical baseline: 83 files, 80 + fully green, 5192 passes, ZERO test failures on the shipped kernel** — + including test-gate-pins (29/29) and test-letrec-resume (the kernel + provides cek-* driver bindings, broader than bare sx_server). 3 partial + load-errors (test-hash-table 22p, test-r7rs 87p, test-sets 30p — opaque + jsoo exception mid-file, diagnosing which form = follow-up). Full sweep + ~13 min; CI wiring deferred to the D3 gate-definition decision. Test-only. - 2026-07-04 — **C23 adapter-dom render-output tests (item C.4) — section C COMPLETE**. Key discovery: the "browser-only" exclusion of adapter-dom testing is FALSE for render output — `(import (web adapter-dom))` diff --git a/scripts/test-wasm-corpus.sh b/scripts/test-wasm-corpus.sh new file mode 100755 index 00000000..dd36ae47 --- /dev/null +++ b/scripts/test-wasm-corpus.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# test-wasm-corpus.sh — W14/F2: sweep the spec test corpus through the +# SHIPPED browser kernel (sx_browser.bc.wasm.js) headless in Node. +# +# The review (conformance.md F-2) found the shipped browser artifact never +# runs the corpus — F-1/F-3 native/WASM divergences existed undetected. +# Each file runs in its OWN node process via run_wasm_corpus.js (a hang is +# killed by per-file timeout without ending the sweep). +# +# The SKIP list documents files that structurally cannot run on the browser +# kernel (runner-only bindings, native-only machinery) — the F-5/F-6/F-10 +# "one-host-gated" theme, recorded honestly per file with the reason. +# KNOWN_FAIL documents files that RUN but currently have failing tests on +# the shipped kernel (host divergence, F-1/F-3 class): they execute and +# report, but don't gate. Everything else must be GREEN — exit 1 otherwise; +# a KNOWN_FAIL going green also fails (ledger must be updated). +# +# Usage: bash scripts/test-wasm-corpus.sh [file.sx ...] +set -uo pipefail +cd "$(dirname "$0")/.." + +RUNNER=hosts/ocaml/browser/run_wasm_corpus.js +KERNEL=shared/static/wasm/sx_browser.bc.wasm.js +[[ -f "$KERNEL" ]] || { echo "SKIP: $KERNEL missing (run sx-build-all first)" >&2; exit 2; } + +# --- classification (empirical sweep 2026-07-04; see sx-gate-loop.md) --- +# Sweep baseline: 83 files, 80 fully green, 5192 passes, 0 test failures. +# The shipped kernel even provides the CEK driver bindings (make-env, +# cek-step-loop, ...) — broader than a bare sx_server. +declare -A SKIP KNOWN_FAIL +skip() { SKIP[$1]=$2; } +known() { KNOWN_FAIL[$1]=$2; } +# Partial load-errors: the kernel throws mid-file (opaque jsoo exception, +# message "undefined"); tests before the failing form pass and report. +known test-hash-table.sx "partial: 22 pass then load-error mid-file" +known test-r7rs.sx "partial: 87 pass then load-error mid-file" +known test-sets.sx "partial: 30 pass then load-error mid-file" + +pass_total=0; fail_total=0; red=0; files=0 +declare -a targets +if [[ $# -gt 0 ]]; then targets=("$@"); +else for f in spec/tests/test-*.sx; do + [[ "$(basename "$f")" == "test-framework.sx" ]] && continue + targets+=("$f") +done; fi + +for f in "${targets[@]}"; do + base=$(basename "$f") + if [[ -n "${SKIP[$base]:-}" ]]; then + echo "SKIP: $base — ${SKIP[$base]}" + continue + fi + files=$((files+1)) + line=$(timeout 120 node "$RUNNER" "$f" 2>/dev/null | grep '^CORPUS-RESULT' || true) + if [[ -z "$line" ]]; then + echo "RED: $base — timeout or crash (no CORPUS-RESULT)" + red=$((red+1)); continue + fi + p=$(sed -n 's/.*pass=\([0-9]*\).*/\1/p' <<<"$line") + fl=$(sed -n 's/.*fail=\([0-9]*\).*/\1/p' <<<"$line") + st=$(sed -n 's/.*status=\([a-z-]*\).*/\1/p' <<<"$line") + pass_total=$((pass_total+p)); fail_total=$((fail_total+fl)) + if [[ -n "${KNOWN_FAIL[$base]:-}" ]]; then + if [[ "$fl" -eq 0 && "$st" == "ok" ]]; then + echo "RED: $base — KNOWN_FAIL is now GREEN (${KNOWN_FAIL[$base]}); update the ledger" + red=$((red+1)) + else + echo "KNOWN-FAIL: $base pass=$p fail=$fl ($( echo "${KNOWN_FAIL[$base]}" ))" + fi + continue + fi + if [[ "$st" != "ok" || "$fl" -ne 0 ]]; then + echo "RED: $base pass=$p fail=$fl status=$st" + red=$((red+1)) + else + echo "OK: $base pass=$p" + fi +done + +echo +echo "wasm-corpus: $files files run, $pass_total passed, $fail_total failed, $red red" +[[ $red -eq 0 ]]