rose-ash/tests/playwright/hs-behavioral.spec.js

// @ts-check
/**
 * Hyperscript behavioral tests — SX tests running in Playwright sandbox.
 *
 * Loads the WASM kernel + hs stack, defines the test platform,
 * loads test-framework.sx + test-hyperscript-behavioral.sx,
 * and reports each test individually.
 */
const { test, expect } = require('playwright/test');
const fs = require('fs');
const path = require('path');

const PROJECT_ROOT = path.resolve(__dirname, '../..');
const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm');
const SX_DIR = path.join(WASM_DIR, 'sx');

const SANDBOX_STACKS = {
  web: [
    'render', 'core-signals', 'signals', 'deps', 'router',
    'page-helpers', 'freeze', 'dom', 'browser',
    'adapter-html', 'adapter-sx', 'adapter-dom',
    'boot-helpers', 'hypersx', 'engine', 'orchestration', 'boot',
  ],
  hs: [
    'hs-tokenizer', 'hs-parser', 'hs-compiler', 'hs-runtime', 'hs-integration',
  ],
};

/**
 * Boot WASM kernel with hs stack, define test platform, load test files.
 * Returns array of {suite, name, pass, error} for each test.
 */
async function runSxTests(page) {
  await page.goto('about:blank');
  await page.evaluate(() => { document.body.innerHTML = ''; });

  // Inject WASM kernel
  const kernelSrc = fs.readFileSync(path.join(WASM_DIR, 'sx_browser.bc.js'), 'utf8');
  await page.addScriptTag({ content: kernelSrc });
  await page.waitForFunction('!!window.SxKernel', { timeout: 10000 });

  // Register FFI + IO driver
  await page.evaluate(() => {
    const K = window.SxKernel;
    K.registerNative('host-global', a => { const n=a[0]; return (n in globalThis)?globalThis[n]:null; });
    K.registerNative('host-get', a => { if(a[0]==null)return null; const v=a[0][a[1]]; return v===undefined?null:v; });
    K.registerNative('host-set!', a => { if(a[0]!=null)a[0][a[1]]=a[2]; return a[2]; });
    K.registerNative('host-call', a => {
      const[o,m,...r]=a;
      if(o==null){const f=globalThis[m];return typeof f==='function'?f.apply(null,r):null;}
      if(typeof o[m]!=='function')return null;
      try{const v=o[m].apply(o,r);return v===undefined?null:v;}catch(e){return null;}
    });
    K.registerNative('host-new', a => {
      const C=typeof a[0]==='string'?globalThis[a[0]]:a[0];
      return typeof C==='function'?new C(...a.slice(1)):null;
    });
    K.registerNative('host-callback', a => {
      const fn=a[0];
      if(typeof fn==='function'&&fn.__sx_handle===undefined)return fn;
      if(fn&&fn.__sx_handle!==undefined){
        return function(){
          const r=K.callFn(fn,Array.from(arguments));
          if(window._driveAsync)window._driveAsync(r);
          return r;
        };
      }
      return function(){};
    });
    K.registerNative('host-typeof', a => {
      const o=a[0]; if(o==null)return'nil';
      if(o instanceof Element)return'element'; if(o instanceof Text)return'text';
      if(o instanceof DocumentFragment)return'fragment'; if(o instanceof Document)return'document';
      if(o instanceof Event)return'event'; if(o instanceof Promise)return'promise';
      return typeof o;
    });
    K.registerNative('host-await', a => {
      const[p,cb]=a;
      if(p&&typeof p.then==='function'){
        const f=(cb&&cb.__sx_handle!==undefined)?v=>K.callFn(cb,[v]):()=>{};
        p.then(f);
      }
    });
    K.registerNative('load-library!', () => false);

    // IO suspension driver
    window._ioTrace = [];
    window._asyncPending = 0;
    window._driveAsync = function driveAsync(result) {
      if(!result||!result.suspended)return;
      window._asyncPending++;
      const req=result.request; const items=req&&(req.items||req);
      const op=items&&items[0]; const opName=typeof op==='string'?op:(op&&op.name)||String(op);
      const arg=items&&items[1];
      function doResume(val,delay){
        setTimeout(()=>{
          try{const r=result.resume(val);window._asyncPending--;driveAsync(r);}
          catch(e){window._asyncPending--;}
        },delay);
      }
      if(opName==='io-sleep'||opName==='wait')doResume(null,Math.min(typeof arg==='number'?arg:0,10));
      else if(opName==='io-navigate')window._asyncPending--;
      else if(opName==='io-fetch')doResume({ok:true,text:''},1);
      else window._asyncPending--;
    };

    K.eval('(define SX_VERSION "hs-test-1.0")');
    K.eval('(define SX_ENGINE "ocaml-vm-sandbox")');
    K.eval('(define parse sx-parse)');
    K.eval('(define serialize sx-serialize)');
  });

  // Load web + hs modules
  const allModules = [...SANDBOX_STACKS.web, ...SANDBOX_STACKS.hs];
  const loadErrors = [];

  await page.evaluate(() => {
    if (window.SxKernel.beginModuleLoad) window.SxKernel.beginModuleLoad();
  });

  for (const mod of allModules) {
    const sxPath = path.join(SX_DIR, mod + '.sx');
    const libPath = path.join(PROJECT_ROOT, 'lib/hyperscript', mod.replace(/^hs-/, '') + '.sx');
    let src;
    try {
      src = fs.existsSync(sxPath) ? fs.readFileSync(sxPath, 'utf8') : fs.readFileSync(libPath, 'utf8');
    } catch(e) { loadErrors.push(mod + ': file not found'); continue; }
    const err = await page.evaluate(s => {
      try { window.SxKernel.load(s); return null; }
      catch(e) { return e.message; }
    }, src);
    if (err) loadErrors.push(mod + ': ' + err);
  }

  await page.evaluate(() => {
    if (window.SxKernel.endModuleLoad) window.SxKernel.endModuleLoad();
  });

  if (loadErrors.length > 0) return { loadErrors, results: [] };

  // Define test platform — collects results into an array
  await page.evaluate(() => {
    const K = window.SxKernel;
    K.eval('(define _test-results (list))');
    K.eval('(define _test-suite "")');
    // try-call as JS native — catches both SX errors and JS-level crashes.
    // K.callFn returns null on Eval_error (kernel logs to console.error).
    // We capture the last console.error to detect failures.
    K.registerNative('try-call', args => {
      const thunk = args[0];
      let lastError = null;
      const origError = console.error;
      console.error = function() {
        const msg = Array.from(arguments).join(' ');
        if (msg.startsWith('[sx]')) lastError = msg;
        origError.apply(console, arguments);
      };
      try {
        const r = K.callFn(thunk, []);
        console.error = origError;
        if (lastError) {
          K.eval('(define _tc_err "' + lastError.replace(/\\/g, '\\\\').replace(/"/g, '\\"').slice(0, 200) + '")');
          return K.eval('{:ok false :error _tc_err}');
        }
        return K.eval('{:ok true}');
      } catch(e) {
        console.error = origError;
        const msg = typeof e === 'string' ? e : (e.message || String(e));
        K.eval('(define _tc_err "' + msg.replace(/\\/g, '\\\\').replace(/"/g, '\\"').slice(0, 200) + '")');
        return K.eval('{:ok false :error _tc_err}');
      }
    });
    K.eval(`(define report-pass
      (fn (name) (set! _test-results
        (append _test-results (list {:suite _test-suite :name name :pass true :error nil})))))`);
    K.eval(`(define report-fail
      (fn (name error) (set! _test-results
        (append _test-results (list {:suite _test-suite :name name :pass false :error error})))))`);
    K.eval('(define push-suite (fn (name) (set! _test-suite name)))');
    K.eval('(define pop-suite (fn () (set! _test-suite "")))');
  });

  // Load test framework + behavioral tests
  for (const f of ['spec/harness.sx', 'spec/tests/test-framework.sx', 'spec/tests/test-hyperscript-behavioral.sx']) {
    const src = fs.readFileSync(path.join(PROJECT_ROOT, f), 'utf8');
    const err = await page.evaluate(s => {
      try { window.SxKernel.load(s); return null; }
      catch(e) { return 'LOAD ERROR: ' + e.message; }
    }, src);
    if (err) {
      const partial = await page.evaluate(() => window.SxKernel.eval('(len _test-results)'));
      return { loadErrors: [f + ': ' + err + ' (' + partial + ' results before crash)'], results: [] };
    }
  }

  // Collect results — serialize via SX inspect for reliability
  const resultsRaw = await page.evaluate(() => {
    const K = window.SxKernel;
    const count = K.eval('(len _test-results)');
    const arr = [];
    for (let i = 0; i < count; i++) {
      arr.push(K.eval(`(inspect (nth _test-results ${i}))`));
    }
    return { count, items: arr };
  });

  // Parse the SX dict strings
  const results = resultsRaw.items.map(s => {
    // s is like '{:suite "hs-add" :name "add class" :pass true :error nil}'
    const suite = (s.match(/:suite "([^"]*)"/) || [])[1] || '';
    const name = (s.match(/:name "([^"]*)"/) || [])[1] || '';
    const pass = s.includes(':pass true');
    const errorMatch = s.match(/:error "([^"]*)"/);
    const error = errorMatch ? errorMatch[1] : (s.includes(':error nil') ? null : 'unknown');
    return { suite, name, pass, error };
  });

  return { loadErrors, results };
}


// ===========================================================================
// Test suite — one Playwright test per SX test
// ===========================================================================

test.describe('Hyperscript behavioral tests', () => {
  test.describe.configure({ timeout: 300000 }); // 5 min for 291 tests

  test('SX behavioral test suite', async ({ browser }) => {
    const page = await browser.newPage();
    const { loadErrors, results } = await runSxTests(page);
    await page.close();

    expect(loadErrors).toEqual([]);

    // Tally and report
    let passed = 0, failed = 0;
    const failsByCat = {};
    for (const r of results) {
      if (r.pass) { passed++; }
      else {
        failed++;
        if (!failsByCat[r.suite]) failsByCat[r.suite] = 0;
        failsByCat[r.suite]++;
      }
    }
    console.log(`\n  Upstream conformance: ${passed}/${results.length} (${(100*passed/results.length).toFixed(0)}%)`);
    // Per-category summary
    const cats = {};
    for (const r of results) {
      if (!cats[r.suite]) cats[r.suite] = { p: 0, f: 0 };
      if (r.pass) cats[r.suite].p++; else cats[r.suite].f++;
    }
    for (const [cat, s] of Object.entries(cats).sort((a,b) => b[1].p - a[1].p)) {
      const mark = s.f === 0 ? `✓ ${s.p}` : `${s.p}/${s.p+s.f}`;
      console.log(`    ${cat}: ${mark}`);
    }

    // Failure details — classify by error type
    const errorTypes = {};
    for (const r of results.filter(r => !r.pass)) {
      const e = r.error || 'unknown';
      let type = 'other';
      if (e.includes('NOT IMPLEMENTED')) type = 'not-generated';
      else if (e.includes('[sx] callFn')) type = 'callFn-crash';
      else if (e.includes('Assertion failed')) type = 'assertion';
      else if (e.includes('Undefined symbol')) type = 'undefined-symbol';
      else if (e.includes('Expected')) type = 'wrong-value';
      else if (e.includes('Cannot read')) type = 'null-ref';
      else if (e.includes('not defined')) type = 'js-undef';
      if (!errorTypes[type]) errorTypes[type] = [];
      errorTypes[type].push(`[${r.suite}] ${r.name}: ${e.slice(0, 80)}`);
    }
    console.log(`\n  Failure breakdown:`);
    for (const [type, items] of Object.entries(errorTypes).sort((a,b) => b[1].length - a[1].length)) {
      console.log(`    ${type}: ${items.length}`);
      for (const item of items.slice(0, 5)) console.log(`      ${item.slice(0, 200)}`);
      if (items.length > 3) console.log(`      ...and ${items.length - 3} more`);
    }

    // Hard gate — ratchet this up as implementation improves
    expect(results.length).toBeGreaterThan(0);
    expect(passed).toBeGreaterThanOrEqual(460);
  });
});