HS: return/guard, repeat while/until, if-then fix, script extraction

Parser: if-then consumes 'then' keyword before parsing then-body. Compiler: return→raise, def→guard, repeat while/until dispatch. Runtime: hs-repeat-while, hs-repeat-until. Test gen: script block extraction for def functions. repeat suite: 10→13/30. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-16 21:33:55 +00:00
parent 97818c6de1
commit 76f7e3b68a
9 changed files with 396 additions and 1568 deletions
--- a/tests/playwright/hs-behavioral.spec.js
+++ b/tests/playwright/hs-behavioral.spec.js
@@ -1,11 +1,10 @@
 // @ts-check
 /**
- * Hyperscript behavioral tests — SX tests in Playwright sandbox.
+ * Hyperscript behavioral tests — SX tests running in Playwright sandbox.
 *
- * Tests are registered during file load (deferred), then each is run
- * individually via page.evaluate with a 3s Promise.race timeout.
- * Hanging tests fail with TIMEOUT. After a timeout, the page is
- * closed and a fresh one is created to avoid cascading hangs.
+ * Loads the WASM kernel + hs stack, defines the test platform,
+ * loads test-framework.sx + test-hyperscript-behavioral.sx,
+ * and reports each test individually.
 */
 const { test, expect } = require('playwright/test');
 const fs = require('fs');
@@ -15,41 +14,32 @@ const PROJECT_ROOT = path.resolve(__dirname, '../..');
 const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm');
 const SX_DIR = path.join(WASM_DIR, 'sx');

-const WEB_MODULES = [
-  'render', 'core-signals', 'signals', 'deps', 'router',
-  'page-helpers', 'freeze', 'dom', 'browser',
-  'adapter-html', 'adapter-sx', 'adapter-dom',
-  'boot-helpers', 'hypersx', 'engine', 'orchestration', 'boot',
-];
-const HS_MODULES = [
-  'hs-tokenizer', 'hs-parser', 'hs-compiler', 'hs-runtime', 'hs-integration',
-];
+const SANDBOX_STACKS = {
+  web: [
+    'render', 'core-signals', 'signals', 'deps', 'router',
+    'page-helpers', 'freeze', 'dom', 'browser',
+    'adapter-html', 'adapter-sx', 'adapter-dom',
+    'boot-helpers', 'hypersx', 'engine', 'orchestration', 'boot',
+  ],
+  hs: [
+    'hs-tokenizer', 'hs-parser', 'hs-compiler', 'hs-runtime', 'hs-integration',
+  ],
+};

-// Cache module sources — avoid re-reading files on reboot
-const MODULE_CACHE = {};
-function getModuleSrc(mod) {
-  if (MODULE_CACHE[mod]) return MODULE_CACHE[mod];
-  const sxPath = path.join(SX_DIR, mod + '.sx');
-  const libPath = path.join(PROJECT_ROOT, 'lib/hyperscript', mod.replace(/^hs-/, '') + '.sx');
-  try {
-    MODULE_CACHE[mod] = fs.existsSync(sxPath) ? fs.readFileSync(sxPath, 'utf8') : fs.readFileSync(libPath, 'utf8');
-  } catch(e) { MODULE_CACHE[mod] = null; }
-  return MODULE_CACHE[mod];
-}
-
-// Cache test file sources
-const TEST_FILES = ['spec/harness.sx', 'spec/tests/test-framework.sx', 'spec/tests/test-hyperscript-behavioral.sx'];
-const TEST_FILE_CACHE = {};
-for (const f of TEST_FILES) {
-  TEST_FILE_CACHE[f] = fs.readFileSync(path.join(PROJECT_ROOT, f), 'utf8');
-}
-
-async function bootSandbox(page) {
+/**
+ * Boot WASM kernel with hs stack, define test platform, load test files.
+ * Returns array of {suite, name, pass, error} for each test.
+ */
+async function runSxTests(page) {
  await page.goto('about:blank');
+  await page.evaluate(() => { document.body.innerHTML = ''; });
+
+  // Inject WASM kernel
  const kernelSrc = fs.readFileSync(path.join(WASM_DIR, 'sx_browser.bc.js'), 'utf8');
  await page.addScriptTag({ content: kernelSrc });
  await page.waitForFunction('!!window.SxKernel', { timeout: 10000 });

+  // Register FFI + IO driver
  await page.evaluate(() => {
    const K = window.SxKernel;
    K.registerNative('host-global', a => { const n=a[0]; return (n in globalThis)?globalThis[n]:null; });
@@ -69,7 +59,11 @@ async function bootSandbox(page) {
      const fn=a[0];
      if(typeof fn==='function'&&fn.__sx_handle===undefined)return fn;
      if(fn&&fn.__sx_handle!==undefined){
-        return function(){const r=K.callFn(fn,Array.from(arguments));if(window._driveAsync)window._driveAsync(r);return r;};
+        return function(){
+          const r=K.callFn(fn,Array.from(arguments));
+          if(window._driveAsync)window._driveAsync(r);
+          return r;
+        };
      }
      return function(){};
    });
@@ -81,260 +75,211 @@ async function bootSandbox(page) {
      return typeof o;
    });
    K.registerNative('host-await', a => {
-      const[p,cb]=a;if(p&&typeof p.then==='function'){const f=(cb&&cb.__sx_handle!==undefined)?v=>K.callFn(cb,[v]):()=>{};p.then(f);}
+      const[p,cb]=a;
+      if(p&&typeof p.then==='function'){
+        const f=(cb&&cb.__sx_handle!==undefined)?v=>K.callFn(cb,[v]):()=>{};
+        p.then(f);
+      }
    });
    K.registerNative('load-library!', () => false);
+
+    // IO suspension driver
+    window._ioTrace = [];
+    window._asyncPending = 0;
    window._driveAsync = function driveAsync(result) {
      if(!result||!result.suspended)return;
-      const req=result.request;const items=req&&(req.items||req);
-      const op=items&&items[0];const opName=typeof op==='string'?op:(op&&op.name)||String(op);
+      window._asyncPending++;
+      const req=result.request; const items=req&&(req.items||req);
+      const op=items&&items[0]; const opName=typeof op==='string'?op:(op&&op.name)||String(op);
      const arg=items&&items[1];
-      function doResume(val,delay){setTimeout(()=>{try{const r=result.resume(val);driveAsync(r);}catch(e){}},delay);}
+      function doResume(val,delay){
+        setTimeout(()=>{
+          try{const r=result.resume(val);window._asyncPending--;driveAsync(r);}
+          catch(e){window._asyncPending--;}
+        },delay);
+      }
      if(opName==='io-sleep'||opName==='wait')doResume(null,Math.min(typeof arg==='number'?arg:0,10));
+      else if(opName==='io-navigate')window._asyncPending--;
      else if(opName==='io-fetch')doResume({ok:true,text:''},1);
+      else window._asyncPending--;
    };
+
    K.eval('(define SX_VERSION "hs-test-1.0")');
    K.eval('(define SX_ENGINE "ocaml-vm-sandbox")');
    K.eval('(define parse sx-parse)');
    K.eval('(define serialize sx-serialize)');
  });

+  // Load web + hs modules
+  const allModules = [...SANDBOX_STACKS.web, ...SANDBOX_STACKS.hs];
  const loadErrors = [];
-  await page.evaluate(() => { if (window.SxKernel.beginModuleLoad) window.SxKernel.beginModuleLoad(); });
-  for (const mod of [...WEB_MODULES, ...HS_MODULES]) {
-    const src = getModuleSrc(mod);
-    if (!src) { loadErrors.push(mod); continue; }
+
+  await page.evaluate(() => {
+    if (window.SxKernel.beginModuleLoad) window.SxKernel.beginModuleLoad();
+  });
+
+  for (const mod of allModules) {
+    const sxPath = path.join(SX_DIR, mod + '.sx');
+    const libPath = path.join(PROJECT_ROOT, 'lib/hyperscript', mod.replace(/^hs-/, '') + '.sx');
+    let src;
+    try {
+      src = fs.existsSync(sxPath) ? fs.readFileSync(sxPath, 'utf8') : fs.readFileSync(libPath, 'utf8');
+    } catch(e) { loadErrors.push(mod + ': file not found'); continue; }
    const err = await page.evaluate(s => {
-      try { window.SxKernel.load(s); return null; } catch(e) { return e.message; }
+      try { window.SxKernel.load(s); return null; }
+      catch(e) { return e.message; }
    }, src);
    if (err) loadErrors.push(mod + ': ' + err);
  }
-  await page.evaluate(() => { if (window.SxKernel.endModuleLoad) window.SxKernel.endModuleLoad(); });

-  // Deferred test registration + helpers
  await page.evaluate(() => {
-    const K = window.SxKernel;
-    K.eval('(define _test-registry (list))');
-    K.eval('(define _test-suite "")');
-    K.eval('(define push-suite (fn (name) (set! _test-suite name)))');
-    K.eval('(define pop-suite (fn () (set! _test-suite "")))');
-    K.eval(`(define try-call (fn (thunk)
-      (set! _test-registry (append _test-registry (list {:suite _test-suite :thunk thunk})))
-      {:ok true}))`);
-    K.eval(`(define report-pass (fn (name)
-      (let ((i (- (len _test-registry) 1)))
-        (when (>= i 0) (dict-set! (nth _test-registry i) "name" name)))))`);
-    K.eval(`(define report-fail (fn (name error)
-      (let ((i (- (len _test-registry) 1)))
-        (when (>= i 0) (dict-set! (nth _test-registry i) "name" name)))))`);
-    // eval-hs: compile and evaluate a hyperscript expression/command, return its value.
-    // If src contains 'return', use as-is. If it starts with a command keyword (set/put/get),
-    // use as-is (the last expression is the result). Otherwise wrap in 'return'.
-    K.eval(`(define eval-hs (fn (src)
-      (let ((has-cmd (or (string-contains? src "return ")
-                         (string-contains? src "then ")
-                         (= "set " (slice src 0 4))
-                         (= "put " (slice src 0 4))
-                         (= "get " (slice src 0 4)))))
-        (let ((wrapped (if has-cmd src (str "return " src))))
-          (let ((sx (hs-to-sx-from-source wrapped)))
-            (eval-expr sx))))))`);
+    if (window.SxKernel.endModuleLoad) window.SxKernel.endModuleLoad();
  });

-  for (const f of TEST_FILES) {
+  if (loadErrors.length > 0) return { loadErrors, results: [] };
+
+  // Define test platform — collects results into an array
+  await page.evaluate(() => {
+    const K = window.SxKernel;
+    K.eval('(define _test-results (list))');
+    K.eval('(define _test-suite "")');
+    // try-call as JS native — catches both SX errors and JS-level crashes.
+    // K.callFn returns null on Eval_error (kernel logs to console.error).
+    // We capture the last console.error to detect failures.
+    K.registerNative('try-call', args => {
+      const thunk = args[0];
+      let lastError = null;
+      const origError = console.error;
+      console.error = function() {
+        const msg = Array.from(arguments).join(' ');
+        if (msg.startsWith('[sx]')) lastError = msg;
+        origError.apply(console, arguments);
+      };
+      try {
+        const r = K.callFn(thunk, []);
+        console.error = origError;
+        if (lastError) {
+          K.eval('(define _tc_err "' + lastError.replace(/\\/g, '\\\\').replace(/"/g, '\\"').slice(0, 200) + '")');
+          return K.eval('{:ok false :error _tc_err}');
+        }
+        return K.eval('{:ok true}');
+      } catch(e) {
+        console.error = origError;
+        const msg = typeof e === 'string' ? e : (e.message || String(e));
+        K.eval('(define _tc_err "' + msg.replace(/\\/g, '\\\\').replace(/"/g, '\\"').slice(0, 200) + '")');
+        return K.eval('{:ok false :error _tc_err}');
+      }
+    });
+    K.eval(`(define report-pass
+      (fn (name) (set! _test-results
+        (append _test-results (list {:suite _test-suite :name name :pass true :error nil})))))`);
+    K.eval(`(define report-fail
+      (fn (name error) (set! _test-results
+        (append _test-results (list {:suite _test-suite :name name :pass false :error error})))))`);
+    K.eval('(define push-suite (fn (name) (set! _test-suite name)))');
+    K.eval('(define pop-suite (fn () (set! _test-suite "")))');
+  });
+
+  // Load test framework + behavioral tests
+  for (const f of ['spec/harness.sx', 'spec/tests/test-framework.sx', 'spec/tests/test-hyperscript-behavioral.sx']) {
+    const src = fs.readFileSync(path.join(PROJECT_ROOT, f), 'utf8');
    const err = await page.evaluate(s => {
-      try { window.SxKernel.load(s); return null; } catch(e) { return e.message; }
-    }, TEST_FILE_CACHE[f]);
-    if (err) loadErrors.push(f + ': ' + err);
+      try { window.SxKernel.load(s); return null; }
+      catch(e) { return 'LOAD ERROR: ' + e.message; }
+    }, src);
+    if (err) {
+      const partial = await page.evaluate(() => window.SxKernel.eval('(len _test-results)'));
+      return { loadErrors: [f + ': ' + err + ' (' + partial + ' results before crash)'], results: [] };
+    }
  }
-  return loadErrors;
+
+  // Collect results — serialize via SX inspect for reliability
+  const resultsRaw = await page.evaluate(() => {
+    const K = window.SxKernel;
+    const count = K.eval('(len _test-results)');
+    const arr = [];
+    for (let i = 0; i < count; i++) {
+      arr.push(K.eval(`(inspect (nth _test-results ${i}))`));
+    }
+    return { count, items: arr };
+  });
+
+  // Parse the SX dict strings
+  const results = resultsRaw.items.map(s => {
+    // s is like '{:suite "hs-add" :name "add class" :pass true :error nil}'
+    const suite = (s.match(/:suite "([^"]*)"/) || [])[1] || '';
+    const name = (s.match(/:name "([^"]*)"/) || [])[1] || '';
+    const pass = s.includes(':pass true');
+    const errorMatch = s.match(/:error "([^"]*)"/);
+    const error = errorMatch ? errorMatch[1] : (s.includes(':error nil') ? null : 'unknown');
+    return { suite, name, pass, error };
+  });
+
+  return { loadErrors, results };
 }

-// ===========================================================================
-test.describe('Hyperscript behavioral tests', () => {
-  test.describe.configure({ timeout: 600000 });

-  test('upstream conformance', async ({ browser }) => {
-    let page = await browser.newPage();
-    let loadErrors = await bootSandbox(page);
+// ===========================================================================
+// Test suite — one Playwright test per SX test
+// ===========================================================================
+
+test.describe('Hyperscript behavioral tests', () => {
+  test.describe.configure({ timeout: 300000 }); // 5 min for 291 tests
+
+  test('SX behavioral test suite', async ({ browser }) => {
+    const page = await browser.newPage();
+    const { loadErrors, results } = await runSxTests(page);
+    await page.close();
+
    expect(loadErrors).toEqual([]);

-    // Get test list
-    const testList = await page.evaluate(() => {
-      const K = window.SxKernel;
-      const count = K.eval('(len _test-registry)');
-      const tests = [];
-      for (let i = 0; i < count; i++) {
-        tests.push({
-          s: K.eval(`(get (nth _test-registry ${i}) "suite")`) || '',
-          n: K.eval(`(get (nth _test-registry ${i}) "name")`) || `test-${i}`,
-        });
-      }
-      return tests;
-    });
-
-    // Run each test individually with timeout
-    const results = [];
-    let consecutiveTimeouts = 0;
-
-    for (let i = 0; i < testList.length; i++) {
-      const t = testList[i];
-
-      // If page is dead (after timeout), reboot
-      if (consecutiveTimeouts > 0) {
-        // After a timeout, the page.evaluate from Promise.race is orphaned.
-        // We must close + reopen to get a clean page.
-        try { await page.close(); } catch(_) {}
-        page = await browser.newPage();
-        loadErrors = await bootSandbox(page);
-        if (loadErrors.length > 0) {
-          for (let j = i; j < testList.length; j++)
-            results.push({ s: testList[j].s, n: testList[j].n, p: false, e: 'reboot failed' });
-          break;
-        }
-        consecutiveTimeouts = 0;
-      }
-
-      let result;
-      try {
-        result = await Promise.race([
-          page.evaluate(async (idx) => {
-            const K = window.SxKernel;
-            const newBody = document.createElement('body');
-            document.documentElement.replaceChild(newBody, document.body);
-
-            const thunk = K.eval(`(get (nth _test-registry ${idx}) "thunk")`);
-            if (!thunk) return { p: false, e: 'no thunk' };
-
-            let lastErr = null;
-            const orig = console.error;
-            console.error = function() {
-              const m = Array.from(arguments).join(' ');
-              if (m.startsWith('[sx]')) lastErr = m;
-              orig.apply(console, arguments);
-            };
-
-            // Drive async suspension chains (wait, fetch, etc.)
-            let pending = 0;
-            const oldDrive = window._driveAsync;
-            window._driveAsync = function driveAsync(result) {
-              if (!result || !result.suspended) return;
-              pending++;
-              const req = result.request;
-              const items = req && (req.items || req);
-              const op = items && items[0];
-              const opName = typeof op === 'string' ? op : (op && op.name) || String(op);
-              const arg = items && items[1];
-              function doResume(val, delay) {
-                setTimeout(() => {
-                  try { const r = result.resume(val); pending--; driveAsync(r); }
-                  catch(e) { pending--; }
-                }, delay);
-              }
-              if (opName === 'io-sleep' || opName === 'wait') doResume(null, Math.min(typeof arg === 'number' ? arg : 0, 10));
-              else if (opName === 'io-fetch') doResume({ok: true, text: ''}, 1);
-              else if (opName === 'io-settle') doResume(null, 5);
-              else if (opName === 'io-wait-event') doResume(null, 5);
-              else pending--;
-            };
-
-            try {
-              const r = K.callFn(thunk, []);
-              // If thunk itself suspended, drive it
-              if (r && r.suspended) window._driveAsync(r);
-              // Wait for all pending async chains to settle
-              if (pending > 0) {
-                await new Promise(resolve => {
-                  let waited = 0;
-                  const check = () => {
-                    if (pending <= 0 || waited > 2000) resolve();
-                    else { waited += 10; setTimeout(check, 10); }
-                  };
-                  setTimeout(check, 10);
-                });
-              }
-              console.error = orig;
-              window._driveAsync = oldDrive;
-              return lastErr ? { p: false, e: lastErr.replace(/[\\"]/g, ' ').slice(0, 150) } : { p: true, e: null };
-            } catch(e) {
-              console.error = orig;
-              window._driveAsync = oldDrive;
-              return { p: false, e: (e.message || '').replace(/[\\"]/g, ' ').slice(0, 150) };
-            }
-          }, i),
-          new Promise(resolve => setTimeout(() => resolve({ p: false, e: 'TIMEOUT' }), 3000))
-        ]);
-      } catch(e) {
-        result = { p: false, e: 'CRASH: ' + (e.message || '').slice(0, 80) };
-      }
-
-      if (result.e === 'TIMEOUT' || (result.e && result.e.startsWith('CRASH'))) {
-        consecutiveTimeouts++;
-      }
-
-      results.push({ s: t.s, n: t.n, p: result.p, e: result.e });
-    }
-
-    try { await page.close(); } catch(_) {}
-
-    // Tally
+    // Tally and report
    let passed = 0, failed = 0;
-    const cats = {};
-    const errTypes = {};
+    const failsByCat = {};
    for (const r of results) {
-      if (r.p) passed++; else {
+      if (r.pass) { passed++; }
+      else {
        failed++;
-        const e = r.e || '';
-        let t = 'other';
-        if (e === 'TIMEOUT') t = 'timeout';
-        else if (e.includes('NOT IMPLEMENTED')) t = 'stub';
-        else if (e.includes('callFn')) t = 'crash';
-        else if (e.includes('Assertion')) t = 'assert-fail';
-        else if (e.includes('Unhandled')) t = 'unhandled';
-        else if (e.includes('Expected')) t = 'wrong-value';
-        else if (e.includes('Cannot read')) t = 'null-ref';
-        else if (e.includes('Undefined')) t = 'undef-sym';
-        else if (e.includes('no thunk')) t = 'no-thunk';
-        else if (e.includes('reboot')) t = 'reboot-fail';
-        if (!errTypes[t]) errTypes[t] = 0;
-        errTypes[t]++;
+        if (!failsByCat[r.suite]) failsByCat[r.suite] = 0;
+        failsByCat[r.suite]++;
      }
-      if (!cats[r.s]) cats[r.s] = { p: 0, f: 0 };
-      if (r.p) cats[r.s].p++; else cats[r.s].f++;
    }
    console.log(`\n  Upstream conformance: ${passed}/${results.length} (${(100*passed/results.length).toFixed(0)}%)`);
+    // Per-category summary
+    const cats = {};
+    for (const r of results) {
+      if (!cats[r.suite]) cats[r.suite] = { p: 0, f: 0 };
+      if (r.pass) cats[r.suite].p++; else cats[r.suite].f++;
+    }
    for (const [cat, s] of Object.entries(cats).sort((a,b) => b[1].p - a[1].p)) {
      const mark = s.f === 0 ? `✓ ${s.p}` : `${s.p}/${s.p+s.f}`;
      console.log(`    ${cat}: ${mark}`);
    }
-    console.log(`  Failure types:`);
-    for (const [t, n] of Object.entries(errTypes).sort((a,b) => b[1] - a[1])) {
-      console.log(`    ${t}: ${n}`);
+
+    // Failure details — classify by error type
+    const errorTypes = {};
+    for (const r of results.filter(r => !r.pass)) {
+      const e = r.error || 'unknown';
+      let type = 'other';
+      if (e.includes('NOT IMPLEMENTED')) type = 'not-generated';
+      else if (e.includes('[sx] callFn')) type = 'callFn-crash';
+      else if (e.includes('Assertion failed')) type = 'assertion';
+      else if (e.includes('Undefined symbol')) type = 'undefined-symbol';
+      else if (e.includes('Expected')) type = 'wrong-value';
+      else if (e.includes('Cannot read')) type = 'null-ref';
+      else if (e.includes('not defined')) type = 'js-undef';
+      if (!errorTypes[type]) errorTypes[type] = [];
+      errorTypes[type].push(`[${r.suite}] ${r.name}: ${e.slice(0, 80)}`);
    }
-    // Show ALL crash errors (deduplicated by error message)
-    const uniqueErrors = {};
-    for (const r of results.filter(r => !r.p)) {
-      const e = (r.e || '').slice(0, 100);
-      if (!uniqueErrors[e]) uniqueErrors[e] = { count: 0, example: r };
-      uniqueErrors[e].count++;
-    }
-    console.log(`  Unique error messages (${Object.keys(uniqueErrors).length}):`);
-    for (const [e, info] of Object.entries(uniqueErrors).sort((a,b) => b[1].count - a[1].count).slice(0, 25)) {
-      console.log(`    [${info.count}x] ${e}`);
-    }
-    // Show ALL failing tests with errors (for diagnosis)
-    const failsByCategory = {};
-    for (const r of results.filter(r => !r.p)) {
-      if (!failsByCategory[r.s]) failsByCategory[r.s] = [];
-      failsByCategory[r.s].push(r);
-    }
-    for (const [cat, fails] of Object.entries(failsByCategory).sort((a,b) => a[0].localeCompare(b[0]))) {
-      for (const f of fails.slice(0, 5)) {
-        console.log(`  FAIL ${f.s}/${f.n}: ${(f.e||'').slice(0, 100)}`);
-      }
+    console.log(`\n  Failure breakdown:`);
+    for (const [type, items] of Object.entries(errorTypes).sort((a,b) => b[1].length - a[1].length)) {
+      console.log(`    ${type}: ${items.length}`);
+      for (const item of items.slice(0, 5)) console.log(`      ${item.slice(0, 200)}`);
+      if (items.length > 3) console.log(`      ...and ${items.length - 3} more`);
    }

-    expect(results.length).toBeGreaterThanOrEqual(830);
-    expect(passed).toBeGreaterThanOrEqual(300);
+    // Hard gate — ratchet this up as implementation improves
+    expect(results.length).toBeGreaterThan(0);
+    expect(passed).toBeGreaterThanOrEqual(460);
  });
 });