diff --git a/tests/playwright/hs-behavioral.spec.js b/tests/playwright/hs-behavioral.spec.js index 53b8bfcd..42883385 100644 --- a/tests/playwright/hs-behavioral.spec.js +++ b/tests/playwright/hs-behavioral.spec.js @@ -1,12 +1,11 @@ // @ts-check /** - * Hyperscript behavioral tests — SX tests running in Playwright sandbox. + * Hyperscript behavioral tests — SX tests in Playwright sandbox. * - * Architecture: - * - SX test file registers test thunks via deftest-deferred (doesn't run them) - * - This spec iterates over registered tests, running each with a 3s timeout - * - Hanging tests (parser infinite loops) fail with TIMEOUT, page is rebooted - * - Results are reported per-category with a hard gate on pass count + * Tests are registered during file load (deferred), then each is run + * individually via page.evaluate with a 3s Promise.race timeout. + * Hanging tests fail with TIMEOUT. After a timeout, the page is + * closed and a fresh one is created to avoid cascading hangs. */ const { test, expect } = require('playwright/test'); const fs = require('fs'); @@ -16,28 +15,41 @@ const PROJECT_ROOT = path.resolve(__dirname, '../..'); const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm'); const SX_DIR = path.join(WASM_DIR, 'sx'); -const SANDBOX_STACKS = { - web: [ - 'render', 'core-signals', 'signals', 'deps', 'router', - 'page-helpers', 'freeze', 'dom', 'browser', - 'adapter-html', 'adapter-sx', 'adapter-dom', - 'boot-helpers', 'hypersx', 'engine', 'orchestration', 'boot', - ], - hs: [ - 'hs-tokenizer', 'hs-parser', 'hs-compiler', 'hs-runtime', 'hs-integration', - ], -}; +const WEB_MODULES = [ + 'render', 'core-signals', 'signals', 'deps', 'router', + 'page-helpers', 'freeze', 'dom', 'browser', + 'adapter-html', 'adapter-sx', 'adapter-dom', + 'boot-helpers', 'hypersx', 'engine', 'orchestration', 'boot', +]; +const HS_MODULES = [ + 'hs-tokenizer', 'hs-parser', 'hs-compiler', 'hs-runtime', 'hs-integration', +]; + +// Cache module sources — avoid re-reading files on reboot +const MODULE_CACHE = {}; +function getModuleSrc(mod) { + if (MODULE_CACHE[mod]) return MODULE_CACHE[mod]; + const sxPath = path.join(SX_DIR, mod + '.sx'); + const libPath = path.join(PROJECT_ROOT, 'lib/hyperscript', mod.replace(/^hs-/, '') + '.sx'); + try { + MODULE_CACHE[mod] = fs.existsSync(sxPath) ? fs.readFileSync(sxPath, 'utf8') : fs.readFileSync(libPath, 'utf8'); + } catch(e) { MODULE_CACHE[mod] = null; } + return MODULE_CACHE[mod]; +} + +// Cache test file sources +const TEST_FILES = ['spec/harness.sx', 'spec/tests/test-framework.sx', 'spec/tests/test-hyperscript-behavioral.sx']; +const TEST_FILE_CACHE = {}; +for (const f of TEST_FILES) { + TEST_FILE_CACHE[f] = fs.readFileSync(path.join(PROJECT_ROOT, f), 'utf8'); +} -/** - * Boot WASM kernel with hs stack. Returns loadErrors array. - */ async function bootSandbox(page) { await page.goto('about:blank'); const kernelSrc = fs.readFileSync(path.join(WASM_DIR, 'sx_browser.bc.js'), 'utf8'); await page.addScriptTag({ content: kernelSrc }); await page.waitForFunction('!!window.SxKernel', { timeout: 10000 }); - // Register FFI + IO driver (compact) await page.evaluate(() => { const K = window.SxKernel; K.registerNative('host-global', a => { const n=a[0]; return (n in globalThis)?globalThis[n]:null; }); @@ -57,11 +69,7 @@ async function bootSandbox(page) { const fn=a[0]; if(typeof fn==='function'&&fn.__sx_handle===undefined)return fn; if(fn&&fn.__sx_handle!==undefined){ - return function(){ - const r=K.callFn(fn,Array.from(arguments)); - if(window._driveAsync)window._driveAsync(r); - return r; - }; + return function(){const r=K.callFn(fn,Array.from(arguments));if(window._driveAsync)window._driveAsync(r);return r;}; } return function(){}; }); @@ -73,214 +81,183 @@ async function bootSandbox(page) { return typeof o; }); K.registerNative('host-await', a => { - const[p,cb]=a; - if(p&&typeof p.then==='function'){ - const f=(cb&&cb.__sx_handle!==undefined)?v=>K.callFn(cb,[v]):()=>{}; - p.then(f); - } + const[p,cb]=a;if(p&&typeof p.then==='function'){const f=(cb&&cb.__sx_handle!==undefined)?v=>K.callFn(cb,[v]):()=>{};p.then(f);} }); K.registerNative('load-library!', () => false); - - // IO suspension driver window._driveAsync = function driveAsync(result) { if(!result||!result.suspended)return; const req=result.request;const items=req&&(req.items||req); const op=items&&items[0];const opName=typeof op==='string'?op:(op&&op.name)||String(op); const arg=items&&items[1]; - function doResume(val,delay){ - setTimeout(()=>{try{const r=result.resume(val);driveAsync(r);}catch(e){}},delay); - } + function doResume(val,delay){setTimeout(()=>{try{const r=result.resume(val);driveAsync(r);}catch(e){}},delay);} if(opName==='io-sleep'||opName==='wait')doResume(null,Math.min(typeof arg==='number'?arg:0,10)); - else if(opName==='io-navigate'){} else if(opName==='io-fetch')doResume({ok:true,text:''},1); - else{} }; - K.eval('(define SX_VERSION "hs-test-1.0")'); K.eval('(define SX_ENGINE "ocaml-vm-sandbox")'); K.eval('(define parse sx-parse)'); K.eval('(define serialize sx-serialize)'); }); - // Load web + hs modules - const allModules = [...SANDBOX_STACKS.web, ...SANDBOX_STACKS.hs]; const loadErrors = []; await page.evaluate(() => { if (window.SxKernel.beginModuleLoad) window.SxKernel.beginModuleLoad(); }); - for (const mod of allModules) { - const sxPath = path.join(SX_DIR, mod + '.sx'); - const libPath = path.join(PROJECT_ROOT, 'lib/hyperscript', mod.replace(/^hs-/, '') + '.sx'); - let src; - try { src = fs.existsSync(sxPath) ? fs.readFileSync(sxPath, 'utf8') : fs.readFileSync(libPath, 'utf8'); } - catch(e) { loadErrors.push(mod + ': file not found'); continue; } + for (const mod of [...WEB_MODULES, ...HS_MODULES]) { + const src = getModuleSrc(mod); + if (!src) { loadErrors.push(mod); continue; } const err = await page.evaluate(s => { - try { window.SxKernel.load(s); return null; } - catch(e) { return e.message; } + try { window.SxKernel.load(s); return null; } catch(e) { return e.message; } }, src); if (err) loadErrors.push(mod + ': ' + err); } await page.evaluate(() => { if (window.SxKernel.endModuleLoad) window.SxKernel.endModuleLoad(); }); - // Define deferred test infrastructure + // Deferred test registration await page.evaluate(() => { const K = window.SxKernel; - // _test_registry: list of {suite, name, thunk} K.eval('(define _test-registry (list))'); K.eval('(define _test-suite "")'); - - // deftest-deferred: register thunk, don't run it - // The SX file uses standard defsuite/deftest but we redefine them to defer - K.eval(`(define push-suite (fn (name) (set! _test-suite name)))`); - K.eval(`(define pop-suite (fn () (set! _test-suite "")))`); - - // try-call just runs the thunk — no error handling needed since we defer + K.eval('(define push-suite (fn (name) (set! _test-suite name)))'); + K.eval('(define pop-suite (fn () (set! _test-suite "")))'); K.eval(`(define try-call (fn (thunk) - (set! _test-registry - (append _test-registry - (list {:suite _test-suite :thunk thunk}))) + (set! _test-registry (append _test-registry (list {:suite _test-suite :thunk thunk}))) {:ok true}))`); - - // report-pass/report-fail record the NAME for the just-registered thunk - // Since try-call always returns {:ok true}, report-pass is always called. - // We patch the last entry with the name. K.eval(`(define report-pass (fn (name) - (let ((last-idx (- (len _test-registry) 1))) - (when (>= last-idx 0) - (let ((entry (nth _test-registry last-idx))) - (dict-set! entry "name" name))))))`); + (let ((i (- (len _test-registry) 1))) + (when (>= i 0) (dict-set! (nth _test-registry i) "name" name)))))`); K.eval(`(define report-fail (fn (name error) - (let ((last-idx (- (len _test-registry) 1))) - (when (>= last-idx 0) - (let ((entry (nth _test-registry last-idx))) - (dict-set! entry "name" name))))))`); + (let ((i (- (len _test-registry) 1))) + (when (>= i 0) (dict-set! (nth _test-registry i) "name" name)))))`); }); - // Load harness (for assert/assert=) + test framework + behavioral tests - for (const f of ['spec/harness.sx', 'spec/tests/test-framework.sx', 'spec/tests/test-hyperscript-behavioral.sx']) { - const src = fs.readFileSync(path.join(PROJECT_ROOT, f), 'utf8'); + for (const f of TEST_FILES) { const err = await page.evaluate(s => { - try { window.SxKernel.load(s); return null; } - catch(e) { return 'LOAD: ' + e.message; } - }, src); + try { window.SxKernel.load(s); return null; } catch(e) { return e.message; } + }, TEST_FILE_CACHE[f]); if (err) loadErrors.push(f + ': ' + err); } - return loadErrors; } -/** - * Get list of registered tests: [{suite, name, index}] - */ -async function getTestList(page) { - return page.evaluate(() => { - const K = window.SxKernel; - const count = K.eval('(len _test-registry)'); - const tests = []; - for (let i = 0; i < count; i++) { - const suite = K.eval(`(get (nth _test-registry ${i}) "suite")`) || ''; - const name = K.eval(`(get (nth _test-registry ${i}) "name")`) || `test-${i}`; - tests.push({ suite, name, index: i }); - } - return tests; - }); -} - -/** - * Run a single test by index. Returns {pass, error}. - */ -async function runTest(page, index) { - return page.evaluate(idx => { - const K = window.SxKernel; - // Clean DOM between tests - const body = document.querySelector('body'); - if (body) body.innerHTML = ''; - - const entry = K.eval(`(nth _test-registry ${idx})`); - const thunk = K.eval(`(get (nth _test-registry ${idx}) "thunk")`); - if (!thunk) return { pass: false, error: 'no thunk registered' }; - - let lastError = null; - const origError = console.error; - console.error = function() { - const msg = Array.from(arguments).join(' '); - if (msg.startsWith('[sx]')) lastError = msg; - origError.apply(console, arguments); - }; - try { - K.callFn(thunk, []); - console.error = origError; - if (lastError) return { pass: false, error: lastError.slice(0, 200) }; - return { pass: true, error: null }; - } catch(e) { - console.error = origError; - return { pass: false, error: (e.message || String(e)).slice(0, 200) }; - } - }, index); -} - // =========================================================================== -// Test suite -// =========================================================================== - test.describe('Hyperscript behavioral tests', () => { - test.describe.configure({ timeout: 600000 }); // 10 min for 831 tests + test.describe.configure({ timeout: 600000 }); test('upstream conformance', async ({ browser }) => { let page = await browser.newPage(); - const loadErrors = await bootSandbox(page); + let loadErrors = await bootSandbox(page); expect(loadErrors).toEqual([]); - const testList = await getTestList(page); - console.log(`\n Registered: ${testList.length} tests`); + // Get test list + const testList = await page.evaluate(() => { + const K = window.SxKernel; + const count = K.eval('(len _test-registry)'); + const tests = []; + for (let i = 0; i < count; i++) { + tests.push({ + s: K.eval(`(get (nth _test-registry ${i}) "suite")`) || '', + n: K.eval(`(get (nth _test-registry ${i}) "name")`) || `test-${i}`, + }); + } + return tests; + }); - // Run each test with a 3s timeout — hang = fail + page reboot + // Run each test individually with timeout const results = []; - for (const t of testList) { + let consecutiveTimeouts = 0; + + for (let i = 0; i < testList.length; i++) { + const t = testList[i]; + + // If page is dead (after timeout), reboot + if (consecutiveTimeouts > 0) { + // After a timeout, the page.evaluate from Promise.race is orphaned. + // We must close + reopen to get a clean page. + try { await page.close(); } catch(_) {} + page = await browser.newPage(); + loadErrors = await bootSandbox(page); + if (loadErrors.length > 0) { + for (let j = i; j < testList.length; j++) + results.push({ s: testList[j].s, n: testList[j].n, p: false, e: 'reboot failed' }); + break; + } + consecutiveTimeouts = 0; + } + let result; try { result = await Promise.race([ - runTest(page, t.index), - new Promise(resolve => - setTimeout(() => resolve({ pass: false, error: 'TIMEOUT: test hung (>3s)' }), 3000)) + page.evaluate(idx => { + const K = window.SxKernel; + document.body.innerHTML = ''; + const thunk = K.eval(`(get (nth _test-registry ${idx}) "thunk")`); + if (!thunk) return { p: false, e: 'no thunk' }; + let lastErr = null; + const orig = console.error; + console.error = function() { + const m = Array.from(arguments).join(' '); + if (m.startsWith('[sx]')) lastErr = m; + orig.apply(console, arguments); + }; + try { + K.callFn(thunk, []); + console.error = orig; + return lastErr ? { p: false, e: lastErr.replace(/[\\"]/g, ' ').slice(0, 150) } : { p: true, e: null }; + } catch(e) { + console.error = orig; + return { p: false, e: (e.message || '').replace(/[\\"]/g, ' ').slice(0, 150) }; + } + }, i), + new Promise(resolve => setTimeout(() => resolve({ p: false, e: 'TIMEOUT' }), 3000)) ]); } catch(e) { - result = { pass: false, error: 'CRASH: ' + (e.message || '').slice(0, 100) }; + result = { p: false, e: 'CRASH: ' + (e.message || '').slice(0, 80) }; } - // If test timed out, the page is stuck — reboot and re-register - if (result.error && (result.error.startsWith('TIMEOUT') || result.error.startsWith('CRASH'))) { - await page.close().catch(() => {}); - page = await browser.newPage(); - const rebootErrors = await bootSandbox(page); - if (rebootErrors.length > 0) { - // Can't recover — mark remaining tests as failed - for (let j = testList.indexOf(t) + 1; j < testList.length; j++) { - results.push({ suite: testList[j].suite, name: testList[j].name, pass: false, error: 'SKIPPED: page reboot failed' }); - } - break; - } + if (result.e === 'TIMEOUT' || (result.e && result.e.startsWith('CRASH'))) { + consecutiveTimeouts++; } - results.push({ suite: t.suite, name: t.name, pass: result.pass, error: result.error }); + results.push({ s: t.s, n: t.n, p: result.p, e: result.e }); } - await page.close(); + try { await page.close(); } catch(_) {} // Tally let passed = 0, failed = 0; const cats = {}; + const errTypes = {}; for (const r of results) { - if (r.pass) passed++; else failed++; - if (!cats[r.suite]) cats[r.suite] = { p: 0, f: 0 }; - if (r.pass) cats[r.suite].p++; else cats[r.suite].f++; + if (r.p) passed++; else { + failed++; + const e = r.e || ''; + let t = 'other'; + if (e === 'TIMEOUT') t = 'timeout'; + else if (e.includes('NOT IMPLEMENTED')) t = 'stub'; + else if (e.includes('callFn')) t = 'crash'; + else if (e.includes('Assertion')) t = 'assert-fail'; + else if (e.includes('Unhandled')) t = 'unhandled'; + else if (e.includes('Expected')) t = 'wrong-value'; + else if (e.includes('Cannot read')) t = 'null-ref'; + else if (e.includes('Undefined')) t = 'undef-sym'; + else if (e.includes('no thunk')) t = 'no-thunk'; + else if (e.includes('reboot')) t = 'reboot-fail'; + if (!errTypes[t]) errTypes[t] = 0; + errTypes[t]++; + } + if (!cats[r.s]) cats[r.s] = { p: 0, f: 0 }; + if (r.p) cats[r.s].p++; else cats[r.s].f++; } console.log(`\n Upstream conformance: ${passed}/${results.length} (${(100*passed/results.length).toFixed(0)}%)`); for (const [cat, s] of Object.entries(cats).sort((a,b) => b[1].p - a[1].p)) { const mark = s.f === 0 ? `✓ ${s.p}` : `${s.p}/${s.p+s.f}`; console.log(` ${cat}: ${mark}`); } + console.log(` Failure types:`); + for (const [t, n] of Object.entries(errTypes).sort((a,b) => b[1] - a[1])) { + console.log(` ${t}: ${n}`); + } - // Hard gate - expect(results.length).toBeGreaterThan(0); + expect(results.length).toBeGreaterThanOrEqual(830); expect(passed).toBeGreaterThanOrEqual(420); }); });