Test infra: deferred execution, per-test timeout, error classification

424/831 (51%): 290 crash, 111 stub, 6 timeout.
Deferred architecture: tests register thunks during load, run individually
with 3s Promise.race timeout. Page reboots after hangs.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-10 08:53:54 +00:00
parent 2805e0077b
commit e6def8b6cd

View File

@@ -1,12 +1,11 @@
// @ts-check // @ts-check
/** /**
* Hyperscript behavioral tests — SX tests running in Playwright sandbox. * Hyperscript behavioral tests — SX tests in Playwright sandbox.
* *
* Architecture: * Tests are registered during file load (deferred), then each is run
* - SX test file registers test thunks via deftest-deferred (doesn't run them) * individually via page.evaluate with a 3s Promise.race timeout.
* - This spec iterates over registered tests, running each with a 3s timeout * Hanging tests fail with TIMEOUT. After a timeout, the page is
* - Hanging tests (parser infinite loops) fail with TIMEOUT, page is rebooted * closed and a fresh one is created to avoid cascading hangs.
* - Results are reported per-category with a hard gate on pass count
*/ */
const { test, expect } = require('playwright/test'); const { test, expect } = require('playwright/test');
const fs = require('fs'); const fs = require('fs');
@@ -16,28 +15,41 @@ const PROJECT_ROOT = path.resolve(__dirname, '../..');
const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm'); const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm');
const SX_DIR = path.join(WASM_DIR, 'sx'); const SX_DIR = path.join(WASM_DIR, 'sx');
const SANDBOX_STACKS = { const WEB_MODULES = [
web: [ 'render', 'core-signals', 'signals', 'deps', 'router',
'render', 'core-signals', 'signals', 'deps', 'router', 'page-helpers', 'freeze', 'dom', 'browser',
'page-helpers', 'freeze', 'dom', 'browser', 'adapter-html', 'adapter-sx', 'adapter-dom',
'adapter-html', 'adapter-sx', 'adapter-dom', 'boot-helpers', 'hypersx', 'engine', 'orchestration', 'boot',
'boot-helpers', 'hypersx', 'engine', 'orchestration', 'boot', ];
], const HS_MODULES = [
hs: [ 'hs-tokenizer', 'hs-parser', 'hs-compiler', 'hs-runtime', 'hs-integration',
'hs-tokenizer', 'hs-parser', 'hs-compiler', 'hs-runtime', 'hs-integration', ];
],
}; // Cache module sources — avoid re-reading files on reboot
const MODULE_CACHE = {};
function getModuleSrc(mod) {
if (MODULE_CACHE[mod]) return MODULE_CACHE[mod];
const sxPath = path.join(SX_DIR, mod + '.sx');
const libPath = path.join(PROJECT_ROOT, 'lib/hyperscript', mod.replace(/^hs-/, '') + '.sx');
try {
MODULE_CACHE[mod] = fs.existsSync(sxPath) ? fs.readFileSync(sxPath, 'utf8') : fs.readFileSync(libPath, 'utf8');
} catch(e) { MODULE_CACHE[mod] = null; }
return MODULE_CACHE[mod];
}
// Cache test file sources
const TEST_FILES = ['spec/harness.sx', 'spec/tests/test-framework.sx', 'spec/tests/test-hyperscript-behavioral.sx'];
const TEST_FILE_CACHE = {};
for (const f of TEST_FILES) {
TEST_FILE_CACHE[f] = fs.readFileSync(path.join(PROJECT_ROOT, f), 'utf8');
}
/**
* Boot WASM kernel with hs stack. Returns loadErrors array.
*/
async function bootSandbox(page) { async function bootSandbox(page) {
await page.goto('about:blank'); await page.goto('about:blank');
const kernelSrc = fs.readFileSync(path.join(WASM_DIR, 'sx_browser.bc.js'), 'utf8'); const kernelSrc = fs.readFileSync(path.join(WASM_DIR, 'sx_browser.bc.js'), 'utf8');
await page.addScriptTag({ content: kernelSrc }); await page.addScriptTag({ content: kernelSrc });
await page.waitForFunction('!!window.SxKernel', { timeout: 10000 }); await page.waitForFunction('!!window.SxKernel', { timeout: 10000 });
// Register FFI + IO driver (compact)
await page.evaluate(() => { await page.evaluate(() => {
const K = window.SxKernel; const K = window.SxKernel;
K.registerNative('host-global', a => { const n=a[0]; return (n in globalThis)?globalThis[n]:null; }); K.registerNative('host-global', a => { const n=a[0]; return (n in globalThis)?globalThis[n]:null; });
@@ -57,11 +69,7 @@ async function bootSandbox(page) {
const fn=a[0]; const fn=a[0];
if(typeof fn==='function'&&fn.__sx_handle===undefined)return fn; if(typeof fn==='function'&&fn.__sx_handle===undefined)return fn;
if(fn&&fn.__sx_handle!==undefined){ if(fn&&fn.__sx_handle!==undefined){
return function(){ return function(){const r=K.callFn(fn,Array.from(arguments));if(window._driveAsync)window._driveAsync(r);return r;};
const r=K.callFn(fn,Array.from(arguments));
if(window._driveAsync)window._driveAsync(r);
return r;
};
} }
return function(){}; return function(){};
}); });
@@ -73,214 +81,183 @@ async function bootSandbox(page) {
return typeof o; return typeof o;
}); });
K.registerNative('host-await', a => { K.registerNative('host-await', a => {
const[p,cb]=a; const[p,cb]=a;if(p&&typeof p.then==='function'){const f=(cb&&cb.__sx_handle!==undefined)?v=>K.callFn(cb,[v]):()=>{};p.then(f);}
if(p&&typeof p.then==='function'){
const f=(cb&&cb.__sx_handle!==undefined)?v=>K.callFn(cb,[v]):()=>{};
p.then(f);
}
}); });
K.registerNative('load-library!', () => false); K.registerNative('load-library!', () => false);
// IO suspension driver
window._driveAsync = function driveAsync(result) { window._driveAsync = function driveAsync(result) {
if(!result||!result.suspended)return; if(!result||!result.suspended)return;
const req=result.request;const items=req&&(req.items||req); const req=result.request;const items=req&&(req.items||req);
const op=items&&items[0];const opName=typeof op==='string'?op:(op&&op.name)||String(op); const op=items&&items[0];const opName=typeof op==='string'?op:(op&&op.name)||String(op);
const arg=items&&items[1]; const arg=items&&items[1];
function doResume(val,delay){ function doResume(val,delay){setTimeout(()=>{try{const r=result.resume(val);driveAsync(r);}catch(e){}},delay);}
setTimeout(()=>{try{const r=result.resume(val);driveAsync(r);}catch(e){}},delay);
}
if(opName==='io-sleep'||opName==='wait')doResume(null,Math.min(typeof arg==='number'?arg:0,10)); if(opName==='io-sleep'||opName==='wait')doResume(null,Math.min(typeof arg==='number'?arg:0,10));
else if(opName==='io-navigate'){}
else if(opName==='io-fetch')doResume({ok:true,text:''},1); else if(opName==='io-fetch')doResume({ok:true,text:''},1);
else{}
}; };
K.eval('(define SX_VERSION "hs-test-1.0")'); K.eval('(define SX_VERSION "hs-test-1.0")');
K.eval('(define SX_ENGINE "ocaml-vm-sandbox")'); K.eval('(define SX_ENGINE "ocaml-vm-sandbox")');
K.eval('(define parse sx-parse)'); K.eval('(define parse sx-parse)');
K.eval('(define serialize sx-serialize)'); K.eval('(define serialize sx-serialize)');
}); });
// Load web + hs modules
const allModules = [...SANDBOX_STACKS.web, ...SANDBOX_STACKS.hs];
const loadErrors = []; const loadErrors = [];
await page.evaluate(() => { if (window.SxKernel.beginModuleLoad) window.SxKernel.beginModuleLoad(); }); await page.evaluate(() => { if (window.SxKernel.beginModuleLoad) window.SxKernel.beginModuleLoad(); });
for (const mod of allModules) { for (const mod of [...WEB_MODULES, ...HS_MODULES]) {
const sxPath = path.join(SX_DIR, mod + '.sx'); const src = getModuleSrc(mod);
const libPath = path.join(PROJECT_ROOT, 'lib/hyperscript', mod.replace(/^hs-/, '') + '.sx'); if (!src) { loadErrors.push(mod); continue; }
let src;
try { src = fs.existsSync(sxPath) ? fs.readFileSync(sxPath, 'utf8') : fs.readFileSync(libPath, 'utf8'); }
catch(e) { loadErrors.push(mod + ': file not found'); continue; }
const err = await page.evaluate(s => { const err = await page.evaluate(s => {
try { window.SxKernel.load(s); return null; } try { window.SxKernel.load(s); return null; } catch(e) { return e.message; }
catch(e) { return e.message; }
}, src); }, src);
if (err) loadErrors.push(mod + ': ' + err); if (err) loadErrors.push(mod + ': ' + err);
} }
await page.evaluate(() => { if (window.SxKernel.endModuleLoad) window.SxKernel.endModuleLoad(); }); await page.evaluate(() => { if (window.SxKernel.endModuleLoad) window.SxKernel.endModuleLoad(); });
// Define deferred test infrastructure // Deferred test registration
await page.evaluate(() => { await page.evaluate(() => {
const K = window.SxKernel; const K = window.SxKernel;
// _test_registry: list of {suite, name, thunk}
K.eval('(define _test-registry (list))'); K.eval('(define _test-registry (list))');
K.eval('(define _test-suite "")'); K.eval('(define _test-suite "")');
K.eval('(define push-suite (fn (name) (set! _test-suite name)))');
// deftest-deferred: register thunk, don't run it K.eval('(define pop-suite (fn () (set! _test-suite "")))');
// The SX file uses standard defsuite/deftest but we redefine them to defer
K.eval(`(define push-suite (fn (name) (set! _test-suite name)))`);
K.eval(`(define pop-suite (fn () (set! _test-suite "")))`);
// try-call just runs the thunk — no error handling needed since we defer
K.eval(`(define try-call (fn (thunk) K.eval(`(define try-call (fn (thunk)
(set! _test-registry (set! _test-registry (append _test-registry (list {:suite _test-suite :thunk thunk})))
(append _test-registry
(list {:suite _test-suite :thunk thunk})))
{:ok true}))`); {:ok true}))`);
// report-pass/report-fail record the NAME for the just-registered thunk
// Since try-call always returns {:ok true}, report-pass is always called.
// We patch the last entry with the name.
K.eval(`(define report-pass (fn (name) K.eval(`(define report-pass (fn (name)
(let ((last-idx (- (len _test-registry) 1))) (let ((i (- (len _test-registry) 1)))
(when (>= last-idx 0) (when (>= i 0) (dict-set! (nth _test-registry i) "name" name)))))`);
(let ((entry (nth _test-registry last-idx)))
(dict-set! entry "name" name))))))`);
K.eval(`(define report-fail (fn (name error) K.eval(`(define report-fail (fn (name error)
(let ((last-idx (- (len _test-registry) 1))) (let ((i (- (len _test-registry) 1)))
(when (>= last-idx 0) (when (>= i 0) (dict-set! (nth _test-registry i) "name" name)))))`);
(let ((entry (nth _test-registry last-idx)))
(dict-set! entry "name" name))))))`);
}); });
// Load harness (for assert/assert=) + test framework + behavioral tests for (const f of TEST_FILES) {
for (const f of ['spec/harness.sx', 'spec/tests/test-framework.sx', 'spec/tests/test-hyperscript-behavioral.sx']) {
const src = fs.readFileSync(path.join(PROJECT_ROOT, f), 'utf8');
const err = await page.evaluate(s => { const err = await page.evaluate(s => {
try { window.SxKernel.load(s); return null; } try { window.SxKernel.load(s); return null; } catch(e) { return e.message; }
catch(e) { return 'LOAD: ' + e.message; } }, TEST_FILE_CACHE[f]);
}, src);
if (err) loadErrors.push(f + ': ' + err); if (err) loadErrors.push(f + ': ' + err);
} }
return loadErrors; return loadErrors;
} }
/**
* Get list of registered tests: [{suite, name, index}]
*/
async function getTestList(page) {
return page.evaluate(() => {
const K = window.SxKernel;
const count = K.eval('(len _test-registry)');
const tests = [];
for (let i = 0; i < count; i++) {
const suite = K.eval(`(get (nth _test-registry ${i}) "suite")`) || '';
const name = K.eval(`(get (nth _test-registry ${i}) "name")`) || `test-${i}`;
tests.push({ suite, name, index: i });
}
return tests;
});
}
/**
* Run a single test by index. Returns {pass, error}.
*/
async function runTest(page, index) {
return page.evaluate(idx => {
const K = window.SxKernel;
// Clean DOM between tests
const body = document.querySelector('body');
if (body) body.innerHTML = '';
const entry = K.eval(`(nth _test-registry ${idx})`);
const thunk = K.eval(`(get (nth _test-registry ${idx}) "thunk")`);
if (!thunk) return { pass: false, error: 'no thunk registered' };
let lastError = null;
const origError = console.error;
console.error = function() {
const msg = Array.from(arguments).join(' ');
if (msg.startsWith('[sx]')) lastError = msg;
origError.apply(console, arguments);
};
try {
K.callFn(thunk, []);
console.error = origError;
if (lastError) return { pass: false, error: lastError.slice(0, 200) };
return { pass: true, error: null };
} catch(e) {
console.error = origError;
return { pass: false, error: (e.message || String(e)).slice(0, 200) };
}
}, index);
}
// =========================================================================== // ===========================================================================
// Test suite
// ===========================================================================
test.describe('Hyperscript behavioral tests', () => { test.describe('Hyperscript behavioral tests', () => {
test.describe.configure({ timeout: 600000 }); // 10 min for 831 tests test.describe.configure({ timeout: 600000 });
test('upstream conformance', async ({ browser }) => { test('upstream conformance', async ({ browser }) => {
let page = await browser.newPage(); let page = await browser.newPage();
const loadErrors = await bootSandbox(page); let loadErrors = await bootSandbox(page);
expect(loadErrors).toEqual([]); expect(loadErrors).toEqual([]);
const testList = await getTestList(page); // Get test list
console.log(`\n Registered: ${testList.length} tests`); const testList = await page.evaluate(() => {
const K = window.SxKernel;
const count = K.eval('(len _test-registry)');
const tests = [];
for (let i = 0; i < count; i++) {
tests.push({
s: K.eval(`(get (nth _test-registry ${i}) "suite")`) || '',
n: K.eval(`(get (nth _test-registry ${i}) "name")`) || `test-${i}`,
});
}
return tests;
});
// Run each test with a 3s timeout — hang = fail + page reboot // Run each test individually with timeout
const results = []; const results = [];
for (const t of testList) { let consecutiveTimeouts = 0;
for (let i = 0; i < testList.length; i++) {
const t = testList[i];
// If page is dead (after timeout), reboot
if (consecutiveTimeouts > 0) {
// After a timeout, the page.evaluate from Promise.race is orphaned.
// We must close + reopen to get a clean page.
try { await page.close(); } catch(_) {}
page = await browser.newPage();
loadErrors = await bootSandbox(page);
if (loadErrors.length > 0) {
for (let j = i; j < testList.length; j++)
results.push({ s: testList[j].s, n: testList[j].n, p: false, e: 'reboot failed' });
break;
}
consecutiveTimeouts = 0;
}
let result; let result;
try { try {
result = await Promise.race([ result = await Promise.race([
runTest(page, t.index), page.evaluate(idx => {
new Promise(resolve => const K = window.SxKernel;
setTimeout(() => resolve({ pass: false, error: 'TIMEOUT: test hung (>3s)' }), 3000)) document.body.innerHTML = '';
const thunk = K.eval(`(get (nth _test-registry ${idx}) "thunk")`);
if (!thunk) return { p: false, e: 'no thunk' };
let lastErr = null;
const orig = console.error;
console.error = function() {
const m = Array.from(arguments).join(' ');
if (m.startsWith('[sx]')) lastErr = m;
orig.apply(console, arguments);
};
try {
K.callFn(thunk, []);
console.error = orig;
return lastErr ? { p: false, e: lastErr.replace(/[\\"]/g, ' ').slice(0, 150) } : { p: true, e: null };
} catch(e) {
console.error = orig;
return { p: false, e: (e.message || '').replace(/[\\"]/g, ' ').slice(0, 150) };
}
}, i),
new Promise(resolve => setTimeout(() => resolve({ p: false, e: 'TIMEOUT' }), 3000))
]); ]);
} catch(e) { } catch(e) {
result = { pass: false, error: 'CRASH: ' + (e.message || '').slice(0, 100) }; result = { p: false, e: 'CRASH: ' + (e.message || '').slice(0, 80) };
} }
// If test timed out, the page is stuck — reboot and re-register if (result.e === 'TIMEOUT' || (result.e && result.e.startsWith('CRASH'))) {
if (result.error && (result.error.startsWith('TIMEOUT') || result.error.startsWith('CRASH'))) { consecutiveTimeouts++;
await page.close().catch(() => {});
page = await browser.newPage();
const rebootErrors = await bootSandbox(page);
if (rebootErrors.length > 0) {
// Can't recover — mark remaining tests as failed
for (let j = testList.indexOf(t) + 1; j < testList.length; j++) {
results.push({ suite: testList[j].suite, name: testList[j].name, pass: false, error: 'SKIPPED: page reboot failed' });
}
break;
}
} }
results.push({ suite: t.suite, name: t.name, pass: result.pass, error: result.error }); results.push({ s: t.s, n: t.n, p: result.p, e: result.e });
} }
await page.close(); try { await page.close(); } catch(_) {}
// Tally // Tally
let passed = 0, failed = 0; let passed = 0, failed = 0;
const cats = {}; const cats = {};
const errTypes = {};
for (const r of results) { for (const r of results) {
if (r.pass) passed++; else failed++; if (r.p) passed++; else {
if (!cats[r.suite]) cats[r.suite] = { p: 0, f: 0 }; failed++;
if (r.pass) cats[r.suite].p++; else cats[r.suite].f++; const e = r.e || '';
let t = 'other';
if (e === 'TIMEOUT') t = 'timeout';
else if (e.includes('NOT IMPLEMENTED')) t = 'stub';
else if (e.includes('callFn')) t = 'crash';
else if (e.includes('Assertion')) t = 'assert-fail';
else if (e.includes('Unhandled')) t = 'unhandled';
else if (e.includes('Expected')) t = 'wrong-value';
else if (e.includes('Cannot read')) t = 'null-ref';
else if (e.includes('Undefined')) t = 'undef-sym';
else if (e.includes('no thunk')) t = 'no-thunk';
else if (e.includes('reboot')) t = 'reboot-fail';
if (!errTypes[t]) errTypes[t] = 0;
errTypes[t]++;
}
if (!cats[r.s]) cats[r.s] = { p: 0, f: 0 };
if (r.p) cats[r.s].p++; else cats[r.s].f++;
} }
console.log(`\n Upstream conformance: ${passed}/${results.length} (${(100*passed/results.length).toFixed(0)}%)`); console.log(`\n Upstream conformance: ${passed}/${results.length} (${(100*passed/results.length).toFixed(0)}%)`);
for (const [cat, s] of Object.entries(cats).sort((a,b) => b[1].p - a[1].p)) { for (const [cat, s] of Object.entries(cats).sort((a,b) => b[1].p - a[1].p)) {
const mark = s.f === 0 ? `${s.p}` : `${s.p}/${s.p+s.f}`; const mark = s.f === 0 ? `${s.p}` : `${s.p}/${s.p+s.f}`;
console.log(` ${cat}: ${mark}`); console.log(` ${cat}: ${mark}`);
} }
console.log(` Failure types:`);
for (const [t, n] of Object.entries(errTypes).sort((a,b) => b[1] - a[1])) {
console.log(` ${t}: ${n}`);
}
// Hard gate expect(results.length).toBeGreaterThanOrEqual(830);
expect(results.length).toBeGreaterThan(0);
expect(passed).toBeGreaterThanOrEqual(420); expect(passed).toBeGreaterThanOrEqual(420);
}); });
}); });