Merge loops/sx-ws-w14 into architecture: W14 test gate & conformance infra

17 test-only commits delivering the full W14 workstream (PLAN.md §W14 — the enabler every other sx-review fix verifies against): - spec/tests/test-gate-pins.sx: 7 pin suites (29 tests) for dc7aa709's landed fixes — K18, K20, K09/K11/K39, K49 (spec side), crit-2 (non-vacuous via side-effect sentinel), plus C21/C22 harness pins - 6 gate scripts, all bidirectional ledgers (a healed KNOWN entry also fails): test-protocol-gate (C1/C1b/S4 + C3-C7 quirk ledger + seeded fuzz-liveness, 11), test-env-parity (runner-only bindings, 7), test-harness-parity (mcp_tree vs sx_server, 12), test-wasm-corpus (shipped kernel: 80/83 files green, 5192 passes), test-suite-baseline (273-failure band pinned in spec/tests/known-failures.txt), test-differential (49 probes native vs WASM, 3 ledgered) - spec/harness.sx: C22 fix (IO logged before the mock runs) + C21 harness-run-perform (real CEK suspend/resume mode); W14-assigned per PLAN approach item 4 — see merge note in the briefing re: the forge briefing's stricter wording - C9: empty suite labels eliminated across 6 test files - web/tests/test-adapter-dom-render.sx: first render-output coverage of the DOM adapter (the browser-only exclusion was false) Confirmed handoffs recorded in the briefing: bare-server apply does not spread args (F-3, runner masks it); both runners' sha3-256 are fake stubs (test CIDs != production CIDs); generated sx_render.ml is regen- stale (misses dc7aa709's HTML_TAGS fix); canonical-serialize broken on bare server for any number. Verified post-merge in this checkout: gate pins 275/0, protocol-gate 11/0, env-parity 7/0, harness-parity 12/0, differential 49/0. Briefing conflict (add/add) resolved: kept the loop's completed version with a merge note preserving the forge briefing's context (8181421c landed after the worktree branched). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-04 08:14:48 +00:00
parent 8ed44f7770 5e0abced32
commit 047fccc948
21 changed files with 2266 additions and 124 deletions
--- a/.claude/scheduled_tasks.lock
+++ b/.claude/scheduled_tasks.lock
@@ -1 +1 @@
-{"sessionId":"c4d97db1-361c-4a04-a99b-c838f9385469","pid":2426590,"procStart":"349789073","acquiredAt":1780789990975}
+{"sessionId":"d510140d-6197-40b7-9bd2-125ca304ad7a","pid":697492,"procStart":"125353752","acquiredAt":1783118024555}
--- a/hosts/ocaml/browser/eval_wasm_probes.js
+++ b/hosts/ocaml/browser/eval_wasm_probes.js
@@ -0,0 +1,68 @@
+#!/usr/bin/env node
+// eval_wasm_probes.js — W14/F8: evaluate a file of probe expressions (one
+// per line, '#'-comments allowed) on the SHIPPED browser kernel and print
+//   PROBE <n> <result-or-ERROR>
+// per line, for diffing against the native server (scripts/test-differential.sh).
+// Boot stubs mirror test_wasm_native.js / run_wasm_corpus.js.
+
+const fs = require('fs');
+const path = require('path');
+
+const PROJECT_ROOT = path.resolve(__dirname, '../../..');
+const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm');
+
+const probeFile = process.argv[2];
+if (!probeFile) { console.error('usage: eval_wasm_probes.js <probes.txt>'); process.exit(2); }
+
+global.window = global;
+global.document = {
+  createElement: () => ({ style: {}, setAttribute() {}, appendChild() {}, children: [] }),
+  createDocumentFragment: () => ({ appendChild() {}, children: [], childNodes: [] }),
+  head: { appendChild() {} }, body: { appendChild() {} },
+  querySelector: () => null, querySelectorAll: () => [],
+  createTextNode: s => ({ textContent: s }), addEventListener() {},
+  createComment: s => ({ textContent: s || '' }),
+  getElementsByTagName: () => [],
+};
+global.localStorage = { getItem: () => null, setItem() {}, removeItem() {} };
+global.CustomEvent = class { constructor(n, o) { this.type = n; this.detail = (o || {}).detail || {}; } };
+global.MutationObserver = class { observe() {} disconnect() {} };
+global.requestIdleCallback = fn => setTimeout(fn, 0);
+global.matchMedia = () => ({ matches: false });
+global.navigator = { serviceWorker: { register: () => Promise.resolve() } };
+global.location = { href: '', pathname: '/', hostname: 'localhost' };
+global.history = { pushState() {}, replaceState() {} };
+global.fetch = () => Promise.resolve({ ok: true, text: () => Promise.resolve('') });
+
+async function main() {
+  require(path.join(WASM_DIR, 'sx_browser.bc.wasm.js'));
+  const K = await new Promise((resolve, reject) => {
+    let tries = 0;
+    const poll = setInterval(() => {
+      if (globalThis.SxKernel) { clearInterval(poll); resolve(globalThis.SxKernel); }
+      else if (++tries > 200) { clearInterval(poll); reject(new Error('SxKernel not found')); }
+    }, 50);
+  });
+
+  const lines = fs.readFileSync(probeFile, 'utf8').split('\n');
+  let n = 0;
+  for (const raw of lines) {
+    const line = raw.trim();
+    if (!line || line.startsWith('#')) continue;
+    n++;
+    let out;
+    try {
+      // Serialize through the kernel's own printer so both hosts emit SX
+      // text (K.eval returns raw JS values otherwise — [object Object]).
+      const r = K.eval(`(sx-serialize ${line})`);
+      out = (typeof r === 'string') ? r : String(r);
+    } catch (e) {
+      out = 'ERROR';
+    }
+    // errors normalized: kernel returns "Error: ..." strings for eval errors
+    if (typeof out === 'string' && out.startsWith('Error')) out = 'ERROR';
+    console.log(`PROBE ${n} ${out.replace(/\n/g, '\\n')}`);
+  }
+}
+
+main().catch(e => { console.error('FATAL:', e.message); process.exit(1); });
--- a/hosts/ocaml/browser/run_wasm_corpus.js
+++ b/hosts/ocaml/browser/run_wasm_corpus.js
@@ -0,0 +1,170 @@
+#!/usr/bin/env node
+// run_wasm_corpus.js — W14/F2: run spec-test files through the SHIPPED
+// browser kernel (sx_browser.bc.wasm.js), headless in Node.
+//
+// The review (conformance.md F-2) found no runner feeds spec/tests through
+// the shipped browser artifact — F-1/F-3 host divergences existed
+// undetected precisely because of that. This runs ONE test file per
+// invocation (process isolation: a hanging file is killed by the driver's
+// timeout without taking down the sweep) and prints a parseable summary:
+//   CORPUS-RESULT <file> pass=<n> fail=<n> status=ok|load-error
+//
+// Usage: node hosts/ocaml/browser/run_wasm_corpus.js spec/tests/test-eval.sx
+// Driver: scripts/test-wasm-corpus.sh (sweeps the corpus, applies skips).
+//
+// Boot stubs and module preload mirror test_wasm_native.js (the blessed
+// boot path for the shipped kernel).
+
+const fs = require('fs');
+const path = require('path');
+
+const PROJECT_ROOT = path.resolve(__dirname, '../../..');
+const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm');
+
+const target = process.argv[2];
+if (!target) { console.error('usage: run_wasm_corpus.js <test-file.sx>'); process.exit(2); }
+
+// --- DOM stubs (as test_wasm_native.js) ---
+function makeElement(tag) {
+  const el = {
+    tagName: tag, _attrs: {}, _children: [], style: {},
+    childNodes: [], children: [], textContent: '',
+    nodeType: 1,
+    setAttribute(k, v) { el._attrs[k] = String(v); },
+    getAttribute(k) { return el._attrs[k] || null; },
+    removeAttribute(k) { delete el._attrs[k]; },
+    appendChild(c) { el._children.push(c); el.childNodes.push(c); el.children.push(c); return c; },
+    insertBefore(c) { el._children.push(c); el.childNodes.push(c); el.children.push(c); return c; },
+    removeChild(c) { return c; },
+    replaceChild(n) { return n; },
+    cloneNode() { return makeElement(tag); },
+    addEventListener() {}, removeEventListener() {}, dispatchEvent() {},
+    get innerHTML() {
+      return el._children.map(c => {
+        if (c._isText) return c.textContent || '';
+        if (c._isComment) return '<!--' + (c.textContent || '') + '-->';
+        return c.outerHTML || '';
+      }).join('');
+    },
+    set innerHTML(v) { el._children = []; el.childNodes = []; el.children = []; },
+    get outerHTML() {
+      let s = '<' + tag;
+      for (const k of Object.keys(el._attrs).sort()) s += ` ${k}="${el._attrs[k]}"`;
+      s += '>';
+      if (['br', 'hr', 'img', 'input', 'meta', 'link'].includes(tag)) return s;
+      return s + el.innerHTML + '</' + tag + '>';
+    },
+    dataset: new Proxy({}, {
+      get(_, k) { return el._attrs['data-' + k.replace(/[A-Z]/g, c => '-' + c.toLowerCase())]; },
+      set(_, k, v) { el._attrs['data-' + k.replace(/[A-Z]/g, c => '-' + c.toLowerCase())] = v; return true; }
+    }),
+    querySelectorAll() { return []; },
+    querySelector() { return null; },
+  };
+  return el;
+}
+
+global.window = global;
+global.document = {
+  createElement: makeElement,
+  createDocumentFragment() { return makeElement('fragment'); },
+  head: makeElement('head'), body: makeElement('body'),
+  querySelector() { return null; }, querySelectorAll() { return []; },
+  createTextNode(s) { return { _isText: true, textContent: String(s), nodeType: 3 }; },
+  addEventListener() {},
+  createComment(s) { return { _isComment: true, textContent: s || '', nodeType: 8 }; },
+  getElementsByTagName() { return []; },
+};
+global.localStorage = { getItem() { return null; }, setItem() {}, removeItem() {} };
+global.CustomEvent = class { constructor(n, o) { this.type = n; this.detail = (o || {}).detail || {}; } };
+global.MutationObserver = class { observe() {} disconnect() {} };
+global.requestIdleCallback = fn => setTimeout(fn, 0);
+global.matchMedia = () => ({ matches: false });
+global.navigator = { serviceWorker: { register() { return Promise.resolve(); } } };
+global.location = { href: '', pathname: '/', hostname: 'localhost' };
+global.history = { pushState() {}, replaceState() {} };
+global.fetch = () => Promise.resolve({ ok: true, text() { return Promise.resolve(''); } });
+global.XMLHttpRequest = class { open() {} send() {} };
+
+async function main() {
+  require(path.join(WASM_DIR, 'sx_browser.bc.wasm.js'));
+  const K = await new Promise((resolve, reject) => {
+    let tries = 0;
+    const poll = setInterval(() => {
+      if (globalThis.SxKernel) { clearInterval(poll); resolve(globalThis.SxKernel); }
+      else if (++tries > 200) { clearInterval(poll); reject(new Error('SxKernel not found after 10s')); }
+    }, 50);
+  });
+
+  // --- 8 FFI host primitives (as test_wasm_native.js) ---
+  K.registerNative('host-global', args => (args[0] in globalThis) ? globalThis[args[0]] : null);
+  K.registerNative('host-get', args => {
+    const [obj, prop] = args;
+    if (obj == null) return null;
+    const v = obj[prop];
+    return v === undefined ? null : v;
+  });
+  K.registerNative('host-set!', args => { if (args[0] != null) args[0][args[1]] = args[2]; return args[2]; });
+  K.registerNative('host-call', args => {
+    const [obj, method, ...rest] = args;
+    if (obj == null || typeof obj[method] !== 'function') return null;
+    const r = obj[method].apply(obj, rest);
+    return r === undefined ? null : r;
+  });
+  K.registerNative('host-new', args => new (Function.prototype.bind.apply(args[0], [null, ...args.slice(1)])));
+  K.registerNative('host-callback', args => function () { return K.callFn(args[0], Array.from(arguments)); });
+  K.registerNative('host-typeof', args => typeof args[0]);
+  K.registerNative('host-await', args => args[0]);
+
+  K.eval('(define SX_VERSION "wasm-corpus-1.0")');
+  K.eval('(define SX_ENGINE "ocaml-vm-wasm-corpus")');
+  K.eval('(define parse sx-parse)');
+  K.eval('(define serialize sx-serialize)');
+
+  // --- Web stack modules (source form; bytecode covered elsewhere) ---
+  const sxDir = path.join(WASM_DIR, 'sx');
+  const modules = [
+    'render', 'core-signals', 'signals', 'deps', 'router', 'page-helpers', 'freeze',
+    'bytecode', 'compiler', 'vm', 'dom', 'browser',
+    'adapter-html', 'adapter-sx', 'adapter-dom',
+    'boot-helpers', 'hypersx',
+    'harness', 'harness-reactive', 'harness-web',
+    'engine', 'orchestration', 'boot',
+  ];
+  if (K.beginModuleLoad) K.beginModuleLoad();
+  for (const mod of modules) {
+    K.load(fs.readFileSync(path.join(sxDir, mod + '.sx'), 'utf8'));
+  }
+  if (K.endModuleLoad) K.endModuleLoad();
+
+  // --- Test framework hooks ---
+  let pass = 0, fail = 0;
+  const suiteStack = [];
+  K.registerNative('report-pass', () => { pass++; return null; });
+  K.registerNative('report-fail', args => {
+    fail++;
+    const suitePath = suiteStack.join(' > ');
+    console.error(`FAIL: ${suitePath ? suitePath + ' > ' : ''}${args[0]}\n  ${args[1]}`);
+    return null;
+  });
+  K.registerNative('push-suite', args => { suiteStack.push(args[0]); return null; });
+  K.registerNative('pop-suite', () => { suiteStack.pop(); return null; });
+  K.eval('(define test-allowed? (fn (name) true))');
+  K.eval('(define try-call (fn (thunk) (let ((result (cek-try thunk (fn (err) err)))) (if (and (= (type-of result) "string") (starts-with? result "Error")) {"ok" false "error" result} {"ok" true "error" nil}))))');
+
+  K.load(fs.readFileSync(path.join(PROJECT_ROOT, 'spec/tests/test-framework.sx'), 'utf8'));
+
+  // --- Run the target file ---
+  const rel = path.relative(PROJECT_ROOT, path.resolve(target));
+  let status = 'ok';
+  try {
+    K.load(fs.readFileSync(path.resolve(target), 'utf8'));
+  } catch (e) {
+    status = 'load-error';
+    console.error(`LOAD-ERROR: ${rel}: ${e.message}`);
+  }
+  console.log(`CORPUS-RESULT ${rel} pass=${pass} fail=${fail} status=${status}`);
+  process.exit(status !== 'ok' || fail > 0 ? 1 : 0);
+}
+
+main().catch(e => { console.error('FATAL:', e.message); process.exit(1); });
--- a/plans/agent-briefings/sx-gate-loop.md
+++ b/plans/agent-briefings/sx-gate-loop.md
@@ -1,57 +1,348 @@
-# sx-gate loop — W14 test gate (first live test of git→gitea→agentic→tmux)
+<!-- MERGE NOTE 2026-07-04: architecture's 8181421c landed the original forge
+briefing (agents/ws-W14, rose-ash/sx-review forge — the git→gitea→agentic launch
+test) at this path AFTER the loops/sx-ws-w14 worktree branched, so the loop never
+saw it and bootstrapped its own briefing from PLAN.md §W14. Same worklist, same
+guardrails in substance; this file is the loop's version, which records COMPLETION
+of every item. One scope divergence to note for review: the forge briefing said
+"do NOT edit spec/*.sx"; the loop edited spec/harness.sx (C22 log-first fix + C21
+perform-mode harness) on the grounds that PLAN.md §W14 approach item 4 explicitly
+assigns those harness fixes to W14. Harness self-suite + all consumers green. -->

-**Forge agent:** `agents/ws-W14` in the `rose-ash/sx-review` forge (git-sx/gitea-sx/agentic-sx).
-**Goal (from the forge briefing):** make the verification infrastructure trustworthy — runner env
-== production env, a WASM corpus runner, harness honesty, and pinning tests for the fixes already
-landed. This is **W14** in `plans/sx-review/PLAN.md` (read that section — it lists the findings).
-**Findings:** C0b C9 C21 C22 C23 C3 C4 C5 C6 C7 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 K19 K104.
+# W14 — Test gate & conformance infrastructure loop

-## Why this workstream first
-The review's prime directive: no semantic fix should merge before its pinning test + a working
-gate exist, because the verification infra currently can't tell you whether a fix works. W14
-produces that infra. It changes **no language semantics**, so it cannot regress the 5762p/274f
-baseline — the ideal first payload while we test the agentic launch technology.
+Forge agent **ws-W14**. Role: build out **W14** from the SX review remediation plan
+(`plans/sx-review/PLAN.md`, §"W14. Test gate & conformance infrastructure") —
+*the enabler that makes every other fix verifiable*. One checklist item per fire.

-## Hard guardrails (this is a monitored test loop)
- **Commit locally, do NOT push.** No `git push` at all. (This is a test; the maintainer reviews
-  before anything reaches origin.)
- **Stay in W14 scope** — tests, runners, harness, gate tooling. Do NOT edit `spec/*.sx`,
-  `hosts/ocaml/lib/*.ml`, or any language semantics. If a task tempts you toward semantics, skip it
-  and note why in the Progress log.
- **Never `pkill sx_server`** (shared binary). Bound every `sx_server`/build/test with `timeout`.
- You are on branch `loops/sx-gate` in worktree `/root/rose-ash-loops/sx-gate`. Build/test here only.
- If the OCaml build or full suite is involved, compare against the recorded baseline
-  **5762 passed / 274 failed** (fail set is the 273 hs-* + 1 r7rs radix; see PLAN W14/F10).
+You are on branch `loops/sx-ws-w14`, worktree `/root/rose-ash-loops/sx-ws-w14`.

-## One iteration per fire — pick the first unchecked `[ ]`, implement, test, commit (no push),
-tick the box, prepend one dated line to the Progress log, then stop.
+## Hard guardrails (read every fire)

- [ ] **Pin the dc7aa709 quick-wins batch.** Add regression tests (spec/tests/ or a new suite) that
-  lock in the fixes that currently have none: K09 `unquote-splicing` longhand splices; K11 guard
-  re-raise sentinel is unforgeable (`(guard (e (true (list 'quoted x))) ...)` returns the list);
-  K18 `(expt 2 100)` is a float not 0; K20 `(contains? {:a 1} :a)` is true; K39 `(do ((fn (x) x) 5) 99)`
-  → 99; K49 the five void elements render. (K02 is already non-vacuously covered.) Confirm they pass
-  on the current binary.
- [ ] **Pin C1/C1b/S4 at the host level** (a small OCaml or shell test): a malformed command line
-  returns an error response and the process survives; an error page is not cached.
- [ ] **WASM corpus runner (F2).** Stand up a Node harness that runs a curated spec/tests subset
-  against the shipped WASM kernel (seed: the conformance lane's `run_wasm.js` pattern, referenced in
-  PLAN). Curated subset, not the full 6k (js_of_ocaml is ~24s/test — see F18). Wire it as a script.
- [ ] **Harness honesty (C22/K104):** make `spec/harness.sx` log the IO call *before* invoking the
-  mock so a throwing mock is recorded. Add a test that a throwing mock leaves a log entry.
- [ ] **Runner-vs-prod env audit (F7/K42):** list every binding that exists only in `run_tests.ml`
-  but not the production kernel env (`values`/`call-with-values` are the known ones). Write the audit
-  to `plans/sx-review/runner-env-gap.md`. (Fixing them is later; the audit is the W14 task.)
- [ ] **Protocol fuzz suite (C3/C4/C5/C6):** a bounded test that feeds the epoch loop malformed
-  lines (`(epoch)`, `(epoch foo)`, stray `(io-response …)`, two-exprs-per-line) and asserts the
-  process never dies and responses stay correctly tagged.
- [ ] **hs-upstream skip-list (F10/F18):** make the native runner's 272 hs-* failures a skip-list so
-  a red FAIL column means something. Record the count moved.
+- **TEST-ONLY.** No semantics edits. Do NOT touch `spec/evaluator.sx`,
+  `spec/primitives.sx`, `spec/parser.sx`, `spec/render.sx`, the OCaml kernel,
+  or any host runtime. W14 pins behavior with tests and productionizes the
+  *test/runner* surface; the actual fixes are other workstreams (W1–W12).
+  A pin that *fails* means the finding regressed — do NOT relax the assertion,
+  record it as a blocker.
+- **NO PUSH.** Commit locally on `loops/sx-ws-w14` only. Never push; never touch
+  `main` or `architecture`.
+- **`.sx` files: use `sx-tree` MCP tools only** (a hook blocks Read/Write/Edit
+  on `.sx`). `sx_write_file` takes params **`file`** and **`source`** (NOT
+  `content` — a wrong key yields a `yojson … got null` error and no write).
+  `.md`/`.sh`/`.ml` files: normal tools are fine.
+- **Never `pkill`/`kill` `sx_server`** — sibling loops share the binary. Bound
+  every run with `timeout` (e.g. `timeout 300 …`); if it hangs, let the timeout end it.
+- **One item per fire, then stop.** No batching.
+
+## Per-iteration procedure
+
+1. Pick the first unchecked `[ ]` in the checklist.
+2. Implement (test file or runner/harness change), lifting minimal repros from
+   the review lane files (`plans/sx-review/{core,hosts,conformance}.md`) — they
+   are a ready-made corpus of confirmed reprs.
+3. Build + run the affected tests:
+   `sx_build` (target ocaml) then
+   `timeout 300 ./hosts/ocaml/_build/default/bin/run_tests.exe <test-name>`
+   to run a single file. New `spec/tests/test-*.sx` files are auto-discovered.
+4. Confirm green (a pin must PASS on current HEAD — the fix already landed).
+5. Commit locally: `git add -A && git commit` with a `W14:` prefix.
+6. Tick the box, prepend one dated line to the Progress log, stop.
+
+## Checklist
+
+### A. Test-debt pins — dc7aa709's landed fixes shipped without regression tests
+Pin each confirmed-and-fixed finding with a minimal repro. Add suites to
+`spec/tests/test-gate-pins.sx` (one `defsuite` per finding).
+
+- [x] K18 [W7] — `expt` overflow now float-promotes (no 63-bit wrap)
+- [x] K20 [W7] — `contains?` now supports dict key membership
+- [x] K09/K11/K39 [W5] — longhand `unquote-splicing`, guard sentinel gensym, `do` IIFE-head
+- [x] K49 [W8] — five void elements (area/base/embed/param/track) renderable
+      (spec side; native regen drift → see Blocked). NB: the depth/cycle guard
+      is K16 [W8], still OPEN — not a W14 pin target until its fix lands
+- [x] crit-2 [W1] — signal-return kont pinned NON-VACUOUSLY (side-effect
+      sentinel across two tests; a plain assert would inherit the vacuity)
+- [x] C1/C1b [W3] — command-channel crash guards pinned
+      (`scripts/test-protocol-gate.sh`, seed for section E's fuzz suite)
+- [x] S4 [hosts] — soft error pages not cached (HTTP-mode pin in
+      `scripts/test-protocol-gate.sh`; NB S4 lives in hosts.md, not
+      conformance — "housekeeping" was a mislabel from F-15's tag)
+
+### B. Runner/production env unification
+- [x] Audit runner-only bindings — inventory + bidirectional ledger in
+      `scripts/test-env-parity.sh` (KNOWN_DRIFT: values, call-with-values,
+      contains-char?, trim-right, sha3-256; consequence pin:
+      canonical-serialize broken on server; BOTH runners' sha3-256 are FAKE
+      stubs → test CIDs ≠ production CIDs)
+
+### C. Harness honesty
+- [x] K19 — harness/runtime parity pinned (`scripts/test-harness-parity.sh`:
+      drives mcp_tree sx_eval over JSON-RPC vs fresh sx_server over epoch,
+      12-probe battery from the finding, errors compared by message)
+- [x] C22/K104 — FIXED harness (spec/harness.sx make-interceptor: log entry
+      appended before the mock runs, :result updated via dict-set!) + 3 pins
+- [x] C21 — BUILT `harness-run-perform` (spec/harness.sx): drives real CEK
+      suspend/resume, services performs from session mocks, C22-style
+      logging; 5 pins incl. the S10 map-over-perform probe (CEK keeps all
+      elements — the drop class is serving-JIT-side). Runner-only (needs
+      cek-* driver bindings)
+- [x] C23 — adapter-dom render-output tests
+      (`web/tests/test-adapter-dom-render.sx`, 8 tests vs runner mock DOM;
+      follow-up depth still open: boolean attrs, on-*/bind/ref/key,
+      reactive attrs, hydration cursor)
+
+### D. WASM corpus runner
+- [x] F2 — BUILT `hosts/ocaml/browser/run_wasm_corpus.js` (one file per
+      node process, shipped-kernel boot per test_wasm_native.js) +
+      `scripts/test-wasm-corpus.sh` sweep driver with SKIP/KNOWN_FAIL
+      ledger. Baseline: 83 files, 80 fully green, 5192 passes, 0 test
+      failures; 3 partial load-errors (hash-table/r7rs/sets, opaque jsoo
+      exception mid-file). Full sweep ~13 min — wiring into
+      sx-build-all.sh left as maintainer call (gate definition D3)
+
+### E. Epoch-loop protocol fuzz + skip-list
+- [x] C3/C4/C5/C6/C7 — protocol-quirk ledger (pins current behavior,
+      bidirectional) + seeded 60-line fuzz-liveness property in
+      `scripts/test-protocol-gate.sh` (11/11)
+- [x] F10 — expected-failures BASELINE GATE instead of a skip-list
+      (`scripts/test-suite-baseline.sh` + `spec/tests/known-failures.txt`,
+      273 pinned: 271 hs-* + 2 empty-suite-label entries → C9 evidence).
+      New failure OR vanished failure = red; hs loops' scoreboards untouched
+- [x] C9 — empty suite labels ELIMINATED: 6 files had suite-less top-level
+      deftests (chars 43, import-bind 14, ports 12, let-match 8, math
+      nested-deftests, 4 hs strays) — wrapped/restructured into defsuites;
+      baseline identities updated in the same commit
+
+### F. Differential battery
+- [x] F8 — cross-host differential battery: `spec/tests/differential-probes.txt`
+      (49 probes) × native server vs shipped WASM kernel via
+      `scripts/test-differential.sh` + `eval_wasm_probes.js`. 46 agree,
+      3 ledgered KNOWN_DIVERGENT (F-3: bare-server `apply` does not spread —
+      runner masks it, F-7 class). Refinement: the F-1 float-display
+      divergence is a K.eval JS-boundary artifact — guest `sx-serialize`
+      output AGREES across hosts
+
+**CHECKLIST COMPLETE 2026-07-04** — all W14 items delivered. Open handoffs:
+sx_render.ml regen drift (Blocked, hosts lane), adapter-dom depth tests,
+3 WASM load-error bisects (hash-table/r7rs/sets), CI wiring of the four
+gate scripts (D3 maintainer decision).

 ## Progress log (newest first)
-<!-- prepend: `- YYYY-MM-DD <what landed, test result, commit sha>` -->
- (none yet — first fire will add the first entry)

-## Recording back to the forge
-After each commit, note the sha here; the maintainer (or a later step) records it as a
-`test`-kind commit on `agents/ws-W14` in the forge so the program stays the system of record.
+- 2026-07-04 — **F8 differential battery — CHECKLIST COMPLETE**. Committed
+  replacement for the review's ephemeral 130-probe corpus:
+  `spec/tests/differential-probes.txt` (49 probes across F-1 int/float
+  display, K18 overflow, F-3 apply + dict order, S-4 float printing,
+  strings/collections/special forms/error cases) evaluated on the native
+  server (epoch protocol) and the shipped WASM kernel
+  (`eval_wasm_probes.js`, guest `sx-serialize`), diffed by
+  `scripts/test-differential.sh` with a KNOWN_DIVERGENT ledger (heal →
+  red → delete entry). Result: 46/49 agree; 3 divergences, all one root
+  cause — **bare sx_server's `apply` does not spread its arg list**
+  ((apply + (list 1 2 3)) → "Expected number, got list"; WASM spreads
+  correctly; the test runner masks it with its own apply — F-7 class).
+  Finding refinement: F-1's float-display divergence (0.3 vs 0.3000…4) is
+  purely a K.eval JS-boundary artifact — guest-serialized output agrees.
+  W14 delivered: 7 pin suites (spec/tests/test-gate-pins.sx, 29 tests),
+  4 gate scripts (protocol-gate 11, env-parity 7, harness-parity 12,
+  wasm-corpus 83-file, suite-baseline 273-pin, differential 49-probe),
+  2 harness capabilities (C22 log-first, C21 perform-mode), C9 label
+  cleanup, adapter-dom render coverage. Test-only throughout.
+- 2026-07-04 — **C9 empty-suite labels (item E.3) — section E COMPLETE**.
+  The sweep found the defect much wider than the finding: SIX files carried
+  suite-less top-level deftests (test-chars 43, test-import-bind 14,
+  test-ports 12, test-let-match 8, test-math as deftest-nested-in-deftest,
+  test-hyperscript-conformance 4 strays between suites). Fixes: file-level
+  defsuite wraps (validated via sx_validate after mechanical wrap),
+  test-math restructured deftest→defsuite (labels now "math > sin"),
+  hs strays wrapped in section-comment-named suites (hs-compat-
+  blockLiteral/cookies/some/where). The two baseline-visible identities
+  renamed in known-failures.txt in the SAME commit. Full-gate validated
+  GREEN (5798p/273f — −2 passes are the wrapper deftests that no longer
+  self-report; fail set byte-identical). Test-only.
+- 2026-07-04 — **F10 baseline gate (item E.2)**. Deliberately NOT a
+  skip-list: skip-listing the hs red band in the runner would rewrite the
+  hs loops' scoreboards mid-flight. Instead
+  `scripts/test-suite-baseline.sh` diffs the full suite's FAIL set against
+  checked-in `spec/tests/known-failures.txt` (273 entries: 271 hs-* + 2
+  with EMPTY suite labels — live C9 evidence, `can-map-an-array` "map with
+  block" and `string->number` 2-arg, the "r7rs radix shadow"). Red on a
+  NEW failure (regression) and red on a VANISHED failure (fix landed —
+  delete from baseline, locking in the win). Identity = "suite > name"
+  with error text stripped (messages churn). Current suite: 5800p/273f
+  (up 38 passes from dc7aa709's 5762 — sections A–D added pins). Validated
+  end-to-end: GREEN, exit 0, ~12 min runtime. Test-only.
+- 2026-07-04 — **C3–C7 protocol fuzz suite (item E.1)**. All five findings
+  are still OPEN server-side (sx_server.ml fixes are host-runtime work),
+  so the suite pins CURRENT behavior as a bidirectional ledger — verified
+  each live first: C3 stray io-response → extra Unknown-command reply
+  (dead 13-vs-14-char guard); C4 malformed (epoch) → error reply + stale
+  epoch tag (envelope changed since the finding: dc7aa709's guard now
+  answers rather than ignores); C5 decreasing epoch accepted; C6 two
+  commands one line → one error, neither runs; C7 vm-trace sans compiler →
+  opaque "Not callable: nil". Plus a real fuzz property: 60
+  deterministically-seeded hostile lines (unbalanced parens, control chars,
+  unicode, 2KB lines, stray io-responses, epoch mutations) then a
+  well-formed command — server must still answer and exit cleanly.
+  protocol-gate now 11/11. When a server fix lands, the matching ledger
+  pin fails loudly → update to assert the corrected behavior. Test-only.
+- 2026-07-04 — **F2 WASM corpus runner (section D COMPLETE)**. The review's
+  headline conformance gap: no runner ever fed spec/tests through the
+  SHIPPED browser artifact (F-1/F-3 divergences existed undetected). Built
+  `run_wasm_corpus.js` (boots sx_browser.bc.wasm.js headless in Node with
+  the test_wasm_native.js stub block, loads the 23 web-stack modules,
+  registers framework hooks, runs ONE file per process → parseable
+  `CORPUS-RESULT` line; process isolation means a hung file can't kill the
+  sweep) + `scripts/test-wasm-corpus.sh` (sweep driver, SKIP/KNOWN_FAIL
+  ledger with green-flip detection). **Empirical baseline: 83 files, 80
+  fully green, 5192 passes, ZERO test failures on the shipped kernel** —
+  including test-gate-pins (29/29) and test-letrec-resume (the kernel
+  provides cek-* driver bindings, broader than bare sx_server). 3 partial
+  load-errors (test-hash-table 22p, test-r7rs 87p, test-sets 30p — opaque
+  jsoo exception mid-file, diagnosing which form = follow-up). Full sweep
+  ~13 min; CI wiring deferred to the D3 gate-definition decision. Test-only.
+- 2026-07-04 — **C23 adapter-dom render-output tests (item C.4) — section C
+  COMPLETE**. Key discovery: the "browser-only" exclusion of adapter-dom
+  testing is FALSE for render output — `(import (web adapter-dom))`
+  disk-resolves in the OCaml runner and `render-to-dom` works against its
+  mock DOM (dom-* → host-* → mock elements). New
+  `web/tests/test-adapter-dom-render.sx` (8 tests): tag/text-child-node,
+  class+id, ordered children, void element, when-false empty FRAGMENT,
+  when-true branch-in-fragment, map N-children-in-fragment, if inlines
+  branch. Probed the adapter's output contract first (text = nodeType-3
+  child; control flow = FRAGMENT wrapper; if inlines). Auto-included in
+  default runs (not on the exclusion list) — first render-output coverage
+  of the 1512-line adapter in the standard gate. Follow-up depth (boolean
+  attrs, on-*/bind/ref/key, reactive, hydration) noted on the checklist.
+  254/0 standalone. Test-only.
+- 2026-07-04 — **C21 perform-mode harness (item C.3)**. Added
+  `harness-run-perform` to spec/harness.sx (exported): drives
+  `make-cek-state`/`cek-step-loop`, services each
+  `(perform {:op X :args L})` suspension from the session's platform mocks
+  (entry logged before invocation, C22-consistent), `cek-resume`s with the
+  mock value, loops to terminal. Self-recursion via the `(self self …)`
+  pattern (avoids letrec-injection K06 territory). Extracted the arity
+  dispatch into shared `harness-invoke-mock`. 5 pins in
+  `gate-C21-perform-mode-harness` — notably the **S10 probe**: `(map (fn (u)
+  (perform …)) '("a" "b" "c"))` keeps ALL elements through 3 suspensions on
+  the CEK path, confirming the element-drop class is serving-JIT-side, not
+  CEK. Caveat noted in the docstring: needs the runner's cek-* driver
+  bindings (absent on bare sx_server/MCP — the env-parity theme again).
+  290/0. Test-infra-only.
+- 2026-07-04 — **C22/K104 throwing-mock fix + pins (item C.2)**. First
+  actual FIX of the loop — in scope because spec/harness.sx is W14-owned
+  test infrastructure (PLAN approach item 4 assigns "log IO before invoking
+  the mock" to W14). TDD: reproduced pre-fix (caught error, 0 log entries),
+  then restructured `make-interceptor` to append the entry BEFORE the mock
+  runs (`:result nil` while pending, `dict-set!` in place on return).
+  Verified: throwing mock leaves entry, happy path updates result, mixed
+  sequence counts all 3. Added suite `gate-C22-throwing-mock-logged`
+  (3 tests). Harness self-suite (15) + test-relate-picker (only other
+  harness consumer) green; 285/0 pins run. Tooling notes: replace/insert
+  tools take `new_source` (not `replacement`); find_all paths still
+  disagree with read_subtree/replace_node on define-library files —
+  sx_write_file remains the reliable route. Test-infra-only.
+- 2026-07-04 — **K19 harness-parity pin (item C.1)**. Authored
+  `scripts/test-harness-parity.sh`: drives `mcp_tree.exe` `sx_eval` with
+  raw JSON-RPC over stdio and a fresh `sx_server.exe` over the epoch
+  protocol, running the finding's exact 12-probe battery (empty?/get/
+  split/equal?/contains?/keyword-name/char-code/parse-number) through both
+  and failing on ANY divergence. Errors normalized to their inner message
+  so identical failures compare equal (`keyword-name :kw` errors the same
+  way on both — keywords evaluate to strings before the call). Result:
+  12/12 parity — dc7aa709's 8-entry stopgap alignment holds; this pin keeps
+  it honest until the real fix (mcp_tree links sx_primitives) lands in the
+  hosts lane. Test-only.
+- 2026-07-04 — **Section B: env-parity audit + ledger**. Probed a fresh
+  `sx_server` over the epoch protocol (`deps-check` + live eval). Confirmed
+  runner-only drift: `values`/`call-with-values` (run_tests.ml:1131/1140),
+  `contains-char?` (rt.ml:728 + rt.js:85), `trim-right` (**JS runner only**
+  — absent even from the OCaml runner), `sha3-256` (rt.ml:745 + rt.js:88).
+  Consequence verified live: `(canonical-serialize 42)` on the server →
+  `Undefined symbol: contains-char?` (content addressing broken for ANY
+  number outside the runners). **Worse than the finding**: BOTH runners'
+  `sha3-256` are FAKE stubs (OCaml uses `Hashtbl.hash`!) while production
+  has real `crypto-sha3-256` — every CID computed in tests differs from
+  production CIDs. Authored `scripts/test-env-parity.sh` as a bidirectional
+  ledger: MUST_HAVE regressions fail; a KNOWN_DRIFT binding *appearing*
+  also fails (forces ledger + consequence-pin update when W5/W7/W12 land
+  fixes). 7/7 green. Test-only.
+- 2026-07-04 — **S4 error-page-cache pin (item A.7) — section A COMPLETE**.
+  Extended `scripts/test-protocol-gate.sh` with an HTTP-mode case: fresh
+  `sx_server.exe --http <random-port>` (timeout-bounded, own PID killed at
+  end), GET the same nonexistent path twice, assert BOTH requests re-render
+  (2 `[sx-http]` lines — pre-fix the 2nd was cache-served at 0.0005s) and
+  the `[cache] … error page, not cached` is_err gate line appears. Findings
+  from prototyping: standalone worktree renders ALL docs pages as soft error
+  pages (no content), so a positive "real page IS cached" control is not
+  assertable here — documented in the script; startup takes ~12-15s (poll
+  loop, 40s budget). 5/5 protocol-gate green + 267/0 sx pins. Test-only.
+- 2026-07-04 — **C1/C1b command-channel pins (item A.6)**. These are
+  protocol-level, not .sx-suite pins: authored
+  `scripts/test-protocol-gate.sh` — each case spawns its OWN timeout-bounded
+  `sx_server.exe` (no shared process touched) and asserts three things: an
+  `(error N "Malformed command line: ...")` response is emitted, the
+  follow-up epoch still evaluates (process survived), and no `Fatal error`
+  escapes / exit is clean. Cases: C1 unterminated list (exact review repro),
+  C1 plain-garbage line, C1b non-ASCII byte (`café`), plus a well-formed
+  control session. 4/4 green. The script is deliberately structured to grow
+  into section E's fuzz suite (C3–C7). Test-only.
+- 2026-07-04 — **crit-2 non-vacuous pin (item A.5)**. The original bug's
+  signature — handler value becomes the WHOLE program result, discarding
+  every outer frame *including the covering test's own assert* — means a
+  plain `(assert= repro expected)` pin would pass vacuously on regression.
+  Added suite `gate-crit2-signal-return-kont` with a **side-effect sentinel**:
+  test 1 runs both repros (`("outer" 43 "end")` list shape + `raise-continuable`
+  → 143) then `set!`s a top-level flag; test 2 independently asserts the flag
+  — if the continuation is ever dropped again, test 1 "passes" but test 2
+  fails loudly. Third test pins the exact shipped-test expr (51). Verified
+  both repro shapes live via sx_eval first. 267 passed / 0 failed. Test-only.
+- 2026-07-03 — **K49 void-elements pin (item A.4) + regen-drift DISCOVERY**.
+  Corrected the checklist label first: K49 is "five void elements
+  unrenderable" (core.md:335), not the depth guard (that's K16, OPEN). Added
+  suite `gate-K49-void-elements-renderable` (3 tests): spec `HTML_TAGS`
+  contains all five; `(render-to-html '(base :href "x") (make-env))` →
+  `<base href="x" />`; all five render self-closing. Runner-env gotchas:
+  `current-env`/`symbol` are not bound in run_tests — use `(make-env)` and
+  literal quoted forms. **Discovery:** the first draft pinned via the
+  runner's native `render-html` and FAILED — `hosts/ocaml/lib/sx_render.ml`
+  (generated) was never regenerated after dc7aa709's spec fix, so the native
+  render path still errors on the five tags. Recorded under Blocked; live
+  evidence for F13 (regen-diff gate). 264 passed / 0 failed. Test-only.
+- 2026-07-03 — **K09/K11/K39 W5 special-form pins (item A.3)**. Three suites
+  added to `spec/tests/test-gate-pins.sx`: `gate-K09-longhand-unquote-splicing`
+  (R7RS longhand `(unquote-splicing X)` now splices, incl. empty-list case;
+  shorthand still works), `gate-K11-guard-reraise-forgeable` (a body/clause
+  value shaped like `(list '__guard-reraise__ X)` is returned as data, not
+  misread as a re-raise — sentinel is now gensym'd), `gate-K39-do-iife-head`
+  (`(do ((fn (x) x) 5) 99)` → 99, not a misparsed do-loop — exact core.md
+  repro). Gotchas hit and fixed: quasiquoted bare idents are *symbols* not
+  strings, and `assert=` compares with `=` (not `equal?`, which returns false
+  on these spliced lists). 261 passed / 0 failed under OCaml run_tests. Test-only.
+- 2026-07-03 — **K20 contains?-dict pin (item A.2)**. Mapped K-codes by
+  core.md severity order (K17 append!, K18 expt, K19 harness-drift, K20
+  contains?-dict). Added suite `gate-K20-contains-dict` to
+  `spec/tests/test-gate-pins.sx` (4 tests): present dict key → true, missing
+  key → false, list membership unchanged, string substring unchanged. Repro
+  from core.md ("(contains? {:a 1} :a) threw `contains?: 2 args`"). 8/8 green
+  across both suites under OCaml run_tests. Test-only.
+- 2026-07-03 — **K18 expt-overflow pin (item A.1)**. Bootstrapped this briefing
+  from PLAN.md §W14 (the referenced file did not exist yet). Added
+  `spec/tests/test-gate-pins.sx` with suite `gate-K18-expt-overflow` (4 tests):
+  small exponents stay exact (`2^0=1`, `2^10=1024`), `2^62 > 0` (no negative
+  63-bit wrap), `2^100 > 0` (no wrap-to-zero), `2^100` is a number (float
+  promotion). Verified 4/4 green under the OCaml run_tests kernel. Test-only.
+
+## Blocked
+- **K49 native path — sx_render.ml regen drift** (found 2026-07-03 while
+  pinning A.4): dc7aa709 fixed HTML_TAGS in `spec/render.sx` but never re-ran
+  `hosts/ocaml/bootstrap_render.py`, so the generated
+  `hosts/ocaml/lib/sx_render.ml` still carries a stale `html_tags_list`
+  without area/base/embed/param/track. The runner's native `render-html`
+  convenience (and any native fast-path render) therefore STILL throws
+  `Undefined symbol: base` — dc7aa709's "verified on the native binary" claim
+  did not cover this path. Fix = regen (hosts lane, semantics-adjacent — out
+  of scope for this test-only loop). This is a live instance of **F13**
+  (regen-diff CI gate, section-B/D territory): a regen-diff check would have
+  caught it at commit time. The K49 pin covers the spec side only; when the
+  regen lands, extend the suite with `render-html`-path assertions.
--- a/scripts/test-differential.sh
+++ b/scripts/test-differential.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+# test-differential.sh — W14/F8: cross-host differential battery.
+#
+# Evaluates every expression in spec/tests/differential-probes.txt on:
+#   A) the native server (sx_server.exe, epoch protocol) — its printer
+#   B) the SHIPPED browser kernel (eval_wasm_probes.js, guest sx-serialize)
+# and diffs the outputs. The review's original 130-probe corpus was
+# ephemeral (F-8); this is the committed replacement.
+#
+# KNOWN_DIVERGENT is the ledger of confirmed, still-open divergences —
+# keyed by the probe EXPRESSION. Red on a NEW divergence (host drift) and
+# red on a HEALED one (fix landed: delete the entry, locking in parity).
+#
+# Method note (finding refinement, 2026-07-04): comparing raw K.eval
+# JS-boundary values shows float-display divergences (0.3 vs
+# 0.30000000000000004) that DISAPPEAR under guest-level (sx-serialize …) —
+# the F-1 float-display class is a JS-boundary artifact, not a kernel
+# serialization divergence. This battery compares guest serialization.
+set -uo pipefail
+cd "$(dirname "$0")/.."
+
+SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
+WASM=shared/static/wasm/sx_browser.bc.wasm.js
+PROBES=spec/tests/differential-probes.txt
+[[ -x "$SERVER" ]] || { echo "SKIP: $SERVER not built" >&2; exit 2; }
+[[ -f "$WASM" ]] || { echo "SKIP: $WASM missing" >&2; exit 2; }
+
+# --- KNOWN_DIVERGENT ledger (verified live 2026-07-04) -------------------
+# F-3/K53: bare sx_server's `apply` does NOT spread its argument list —
+# (apply + (list 1 2 3)) errors "Expected number, got list"; (apply str l)
+# returns the serialized list as one string. The WASM kernel spreads
+# correctly. The test runner masks this with its own apply (F-7 class).
+declare -A KNOWN_DIVERGENT
+KNOWN_DIVERGENT['(apply + (list 1 2 3))']="F-3: native apply does not spread"
+KNOWN_DIVERGENT['(apply max (list 1 5 2))']="F-3: native apply does not spread"
+KNOWN_DIVERGENT['(apply str (list "a" "b"))']="F-3: native apply does not spread"
+
+native=$(mktemp); wasm=$(mktemp)
+
+python3 - "$SERVER" "$PROBES" > "$native" <<'PY'
+import json, subprocess, sys
+server, probefile = sys.argv[1], sys.argv[2]
+probes = [l.strip() for l in open(probefile) if l.strip() and not l.startswith('#')]
+inp = []
+for i, p in enumerate(probes):
+    inp.append(f"(epoch {i+1})")
+    inp.append(f"(eval {json.dumps(p)})")
+out = subprocess.run(["timeout", "120", server], input="\n".join(inp) + "\n",
+                     capture_output=True, text=True).stdout
+res, cur = {}, None
+for l in out.splitlines():
+    if l.startswith("(ok-len "):
+        cur = int(l.split()[1]); res[cur] = None
+    elif l.startswith("(error "):
+        idx = int(l.split()[1]); res[idx] = "ERROR"; cur = None
+    elif cur is not None and res.get(cur) is None:
+        res[cur] = l; cur = None
+for i, p in enumerate(probes):
+    print(f"PROBE {i+1} {res.get(i+1, '<none>')}")
+PY
+
+timeout 300 node hosts/ocaml/browser/eval_wasm_probes.js "$PROBES" > "$wasm" 2>/dev/null
+
+pass=0; fail=0; i=0
+while IFS= read -r expr; do
+  [[ -z "$expr" || "$expr" == \#* ]] && continue
+  i=$((i+1))
+  a=$(sed -n "s/^PROBE $i //p" "$native")
+  b=$(sed -n "s/^PROBE $i //p" "$wasm")
+  known="${KNOWN_DIVERGENT[$expr]:-}"
+  if [[ "$a" == "$b" ]]; then
+    if [[ -n "$known" ]]; then
+      echo "RED:  $expr — KNOWN_DIVERGENT now AGREES ($known); delete from ledger"
+      fail=$((fail+1))
+    else
+      pass=$((pass+1))
+    fi
+  else
+    if [[ -n "$known" ]]; then
+      echo "KNOWN-DIVERGENT: $expr ($known)"
+      pass=$((pass+1))
+    else
+      echo "RED:  $expr"
+      echo "        native: $a"
+      echo "        wasm:   $b"
+      fail=$((fail+1))
+    fi
+  fi
+done < <(grep -v '^\s*#' "$PROBES" | grep -v '^\s*$')
+
+rm -f "$native" "$wasm"
+echo
+echo "differential: $i probes, $pass in agreement/ledgered, $fail red"
+[[ $fail -eq 0 ]]
--- a/scripts/test-env-parity.sh
+++ b/scripts/test-env-parity.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+# test-env-parity.sh — W14 section-B ledger: runner env vs production env.
+#
+# The review (F7, K42, JS5, core.md "canonical.sx depends on test-runner-only
+# helpers") found bindings that exist ONLY in the test runners, so suites
+# pass against an environment production never provides. Rule (PLAN.md W14):
+# "if the spec needs it, it's a kernel primitive; if not, the test can't
+# have it."
+#
+# This script is a LEDGER, not a wish: it asserts today's confirmed drift
+# stays exactly as recorded. Both directions fail loudly:
+#   - a MUST_HAVE going missing on the server  -> regression, fix the kernel
+#   - a KNOWN_DRIFT binding appearing on the server -> the fix landed;
+#     move it to MUST_HAVE and update the consequence pins below.
+#
+# Confirmed inventory (2026-07-04, all verified live over the epoch protocol):
+#
+#   binding            OCaml runner        JS runner    fresh sx_server
+#   values             real (rt.ml:1131)   ?            ABSENT
+#   call-with-values   real (rt.ml:1140)   ?            ABSENT
+#   contains-char?     real (rt.ml:728)    real (:85)   ABSENT
+#   trim-right         ABSENT              real (:87)   ABSENT
+#   sha3-256           FAKE Hashtbl.hash   FAKE stub    ABSENT (real = crypto-sha3-256)
+#
+# Consequences (pinned in section 3):
+#   - (canonical-serialize 42) on a fresh server errors "Undefined symbol:
+#     contains-char?" -> content addressing broken for ANY number outside
+#     the test runners.
+#   - every CID computed inside run_tests uses a FAKE hash, so test CIDs
+#     never equal production CIDs (crypto-sha3-256 is real SHA3).
+#
+# Each probe spawns its OWN timeout-bounded sx_server.exe. No shared process.
+set -uo pipefail
+
+cd "$(dirname "$0")/.."
+SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
+
+if [[ ! -x "$SERVER" ]]; then
+  echo "SKIP: $SERVER not built (run sx_build target=ocaml first)" >&2
+  exit 2
+fi
+
+pass=0
+fail=0
+
+# deps_unresolved EXPR -> prints the (unresolved ...) list for EXPR on a fresh server
+deps_unresolved() {
+  printf '(epoch 1)\n(deps-check "%s")\n' "$1" \
+    | timeout 60 "$SERVER" 2>/dev/null \
+    | grep -o ':unresolved ([^)]*)' || true
+}
+
+# --- Section 1: MUST_HAVE — spec-needed bindings production must provide ---
+MUST_HAVE_EXPR='(list (equal? 1 1) (apply + (list 1 2)) (contains? {:a 1} :a) (crypto-sha3-256 \"x\") (split \"a-b\" \"-\"))'
+unres=$(deps_unresolved "$MUST_HAVE_EXPR")
+if [[ -z "$unres" || "$unres" == ':unresolved ()' ]]; then
+  echo "PASS: MUST_HAVE core bindings all resolve on fresh sx_server"
+  pass=$((pass+1))
+else
+  echo "FAIL: MUST_HAVE binding missing on fresh sx_server: $unres"
+  fail=$((fail+1))
+fi
+
+# --- Section 2: KNOWN_DRIFT — runner-only bindings, asserted ABSENT -------
+# If one of these starts resolving, its kernel fix landed: move it to
+# MUST_HAVE above and update the consequence pin in section 3.
+for name in values call-with-values contains-char? trim-right sha3-256; do
+  unres=$(deps_unresolved "($name)")
+  if grep -q -- "$name" <<<"$unres"; then
+    echo "PASS: KNOWN_DRIFT '$name' still absent on fresh sx_server (ledger accurate)"
+    pass=$((pass+1))
+  else
+    echo "FAIL: KNOWN_DRIFT '$name' now RESOLVES on fresh sx_server — fix landed?"
+    echo "      Update this ledger: move '$name' to MUST_HAVE and revisit section 3."
+    fail=$((fail+1))
+  fi
+done
+
+# --- Section 3: consequence pin — canonical.sx on the production server ---
+# Current reality: canonical-serialize of ANY number errors on a fresh
+# server because canonical-number calls runner-only contains-char?.
+out=$(printf '(epoch 1)\n(load "spec/canonical.sx")\n(epoch 2)\n(eval "(canonical-serialize 42)")\n' \
+  | timeout 60 "$SERVER" 2>&1)
+if grep -q 'error 2 .*contains-char?' <<<"$out"; then
+  echo "PASS: consequence pin — canonical-serialize on numbers still broken on server (as recorded)"
+  pass=$((pass+1))
+elif grep -q '^(ok 2 ' <<<"$out"; then
+  echo "FAIL: consequence pin — canonical-serialize 42 now WORKS on the server."
+  echo "      The canonical-helpers fix landed: flip this pin to assert success"
+  echo "      and pin the exact canonical form + CID stability."
+  fail=$((fail+1))
+else
+  echo "FAIL: consequence pin — unexpected server output:"
+  sed 's/^/  /' <<<"$out"
+  fail=$((fail+1))
+fi
+
+echo
+echo "env-parity: $pass passed, $fail failed"
+[[ $fail -eq 0 ]]
--- a/scripts/test-harness-parity.sh
+++ b/scripts/test-harness-parity.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# test-harness-parity.sh — W14 section-C pin for K19 (harness honesty).
+#
+# K19 (review, core.md): the MCP tree server (mcp_tree.ml) carries a
+# PARALLEL primitive table, and it drifted from the real runtime
+# (sx_primitives.ml) — e.g. (get {:a 1} :a 99) returned nil in the harness
+# but 1 in production, (split "a--b" "--") was char-class vs substring.
+# CLAUDE.md mandates harness verification, so drift silently produces
+# false findings/passes. dc7aa709 aligned 8 entries as a stopgap; the real
+# fix (mcp_tree links sx_primitives directly) is hosts-lane work.
+#
+# This pin runs the finding's exact probe battery through BOTH environments
+# — mcp_tree.exe sx_eval (JSON-RPC over stdio) and a fresh sx_server.exe
+# (epoch protocol) — and fails on ANY divergence. Errors are compared by
+# message, values by serialized form. Both subprocesses are fresh and
+# timeout-bounded; no shared process is touched.
+#
+# Exit: 0 = full parity; 1 = drift (harness lies about the runtime again).
+set -uo pipefail
+cd "$(dirname "$0")/.."
+
+MCP=hosts/ocaml/_build/default/bin/mcp_tree.exe
+SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
+for bin in "$MCP" "$SERVER"; do
+  if [[ ! -x "$bin" ]]; then
+    echo "SKIP: $bin not built (run sx_build target=ocaml first)" >&2
+    exit 2
+  fi
+done
+
+python3 - "$MCP" "$SERVER" <<'PYEOF'
+import json, re, subprocess, sys
+
+MCP, SERVER = sys.argv[1], sys.argv[2]
+
+# K19 probe battery — the finding's confirmed drift cases + stopgap entries.
+PROBES = [
+    '(empty? "")', '(empty? {})',
+    '(get {:a 1} :a 99)', '(get {:a 1} :zz 99)', '(get (list 10 20) 1)',
+    '(split "a--b" "--")', '(split "abc" "")',
+    '(equal? (list 1 2) (list 1 2))',
+    '(contains? {:a 1} :a)', '(keyword-name :kw)',
+    '(char-code "A")', '(parse-number "42")',
+]
+
+def norm_error(msg):
+    """Extract the quoted inner error message so harness/server error
+    envelopes compare equal when the underlying failure is the same."""
+    m = re.search(r'Unhandled exception: \\?"(.*?)\\?"', msg)
+    if m:
+        return "<ERROR> " + m.group(1)
+    return "<ERROR> " + msg.strip()[:80]
+
+# --- harness side: mcp_tree sx_eval over JSON-RPC ---
+lines = [
+    json.dumps({"jsonrpc": "2.0", "id": 1, "method": "initialize",
+                "params": {"protocolVersion": "2024-11-05", "capabilities": {},
+                           "clientInfo": {"name": "parity", "version": "0"}}}),
+    json.dumps({"jsonrpc": "2.0", "method": "notifications/initialized"}),
+]
+for i, p in enumerate(PROBES):
+    lines.append(json.dumps({"jsonrpc": "2.0", "id": 100 + i, "method": "tools/call",
+                             "params": {"name": "sx_eval", "arguments": {"expr": p}}}))
+out = subprocess.run(["timeout", "60", MCP], input="\n".join(lines) + "\n",
+                     capture_output=True, text=True).stdout
+harness = {}
+for l in out.splitlines():
+    try:
+        j = json.loads(l)
+    except ValueError:
+        continue
+    if isinstance(j.get("id"), int) and j["id"] >= 100:
+        txt = j.get("result", {}).get("content", [{}])[0].get("text", "<none>").strip()
+        if txt.startswith("Error:") or j.get("result", {}).get("isError"):
+            txt = norm_error(txt)
+        harness[j["id"] - 100] = txt
+
+# --- server side: fresh sx_server over the epoch protocol ---
+inp = []
+for i, p in enumerate(PROBES):
+    inp.append(f"(epoch {i + 1})")
+    inp.append(f"(eval {json.dumps(p)})")
+sout = subprocess.run(["timeout", "60", SERVER], input="\n".join(inp) + "\n",
+                      capture_output=True, text=True).stdout
+server, cur = {}, None
+for l in sout.splitlines():
+    if l.startswith("(ok-len "):
+        cur = int(l.split()[1]); server[cur - 1] = None
+    elif l.startswith("(error "):
+        idx = int(l.split()[1]); server[idx - 1] = norm_error(l); cur = None
+    elif cur is not None and server.get(cur - 1) is None:
+        server[cur - 1] = l.strip(); cur = None
+
+fails = 0
+for i, p in enumerate(PROBES):
+    h = harness.get(i, "<missing>")
+    s = server.get(i, "<missing>")
+    if h == s:
+        print(f"PASS: {p:40s} both -> {h!r}")
+    else:
+        print(f"FAIL: {p:40s} harness={h!r} server={s!r}")
+        fails += 1
+
+print()
+print(f"harness-parity: {len(PROBES) - fails} passed, {fails} failed")
+sys.exit(1 if fails else 0)
+PYEOF
--- a/scripts/test-protocol-gate.sh
+++ b/scripts/test-protocol-gate.sh
@@ -0,0 +1,233 @@
+#!/bin/bash
+# test-protocol-gate.sh — W14 pins for the epoch/command-channel protocol.
+#
+# Pins C1/C1b (review, plans/sx-review/hosts.md): a malformed or non-ASCII
+# line on the top-level command channel used to raise an uncaught
+# Sx_types.Parse_error and KILL the whole sx_server process (the shared
+# channel used by bridges and conformance runners). Fixed in dc7aa709:
+# the server now answers `(error N "Malformed command line: ...")` and
+# keeps serving.
+#
+# Each case spawns its OWN timeout-bounded sx_server.exe subprocess —
+# no shared/sibling process is ever touched. Designed to grow into the
+# W14 section-E protocol fuzz suite (C3-C7).
+#
+# Usage: bash scripts/test-protocol-gate.sh
+# Exit:  0 = all pins green; 1 = a pin failed (fix regressed).
+set -uo pipefail
+
+cd "$(dirname "$0")/.."
+SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
+
+if [[ ! -x "$SERVER" ]]; then
+  echo "SKIP: $SERVER not built (run sx_build target=ocaml first)" >&2
+  exit 2
+fi
+
+pass=0
+fail=0
+
+# run_case NAME INPUT EXPECT_SENTINEL
+#   Feeds INPUT to a fresh server. Asserts:
+#   1. an (error ... "Malformed command line: ...") response is emitted
+#   2. the follow-up epoch still evaluates (EXPECT_SENTINEL in output)
+#   3. the process exits cleanly (no Fatal error, exit 0 on stdin EOF)
+run_case() {
+  local name="$1" input="$2" sentinel="$3"
+  local out rc
+  out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1)
+  rc=$?
+  local ok=1
+  if ! grep -q 'Malformed command line' <<<"$out"; then
+    echo "FAIL: $name — no malformed-line error response"; ok=0
+  fi
+  if ! grep -q "^${sentinel}\$" <<<"$out"; then
+    echo "FAIL: $name — follow-up epoch did not run (process died?)"; ok=0
+  fi
+  if grep -q 'Fatal error' <<<"$out"; then
+    echo "FAIL: $name — Fatal error escaped to the top level"; ok=0
+  fi
+  if [[ $rc -ne 0 ]]; then
+    echo "FAIL: $name — nonzero exit ($rc)"; ok=0
+  fi
+  if [[ $ok -eq 1 ]]; then
+    echo "PASS: $name"
+    pass=$((pass+1))
+  else
+    echo "  --- output ---"; sed 's/^/  /' <<<"$out"; echo "  --------------"
+    fail=$((fail+1))
+  fi
+}
+
+# C1: unterminated list on the command channel (exact review repro)
+run_case "C1 unterminated list survives" \
+  '(epoch 2)\n(eval "(+ 1 2"\n(epoch 3)\n(eval "99")\n' \
+  '99'
+
+# C1: plain-garbage line (second C1 repro shape)
+run_case "C1 garbage line survives" \
+  '(epoch 1)\nnot an s-expr ]]] {{{\n(epoch 2)\n(eval "42")\n' \
+  '42'
+
+# C1b: non-ASCII byte on the command channel (exact review repro; \xc3\xa9 = é)
+run_case "C1b non-ASCII line survives" \
+  '(epoch 1)\n(eval (quote caf\xc3\xa9))\n(epoch 2)\n(eval "99")\n' \
+  '99'
+
+# Control: a well-formed session still works end to end
+ctrl=$(printf '(epoch 1)\n(eval "(+ 40 2)")\n' | timeout 60 "$SERVER" 2>&1)
+if grep -q '^42$' <<<"$ctrl"; then
+  echo "PASS: control well-formed session"
+  pass=$((pass+1))
+else
+  echo "FAIL: control well-formed session"; sed 's/^/  /' <<<"$ctrl"
+  fail=$((fail+1))
+fi
+
+# ---------------------------------------------------------------------------
+# C3–C7 protocol-quirk LEDGER (hosts.md, all OPEN server-side). These pin
+# CURRENT behavior, verified live 2026-07-04 — they are documentation, not
+# endorsement. When a server fix lands and a pin fails, update the ledger
+# to assert the corrected behavior (bidirectional, like test-env-parity.sh).
+# ---------------------------------------------------------------------------
+
+# ledger_case NAME INPUT GREP_MUST GREP_MUST2
+ledger_case() {
+  local name="$1" input="$2" must="$3" must2="${4:-}"
+  local out
+  out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1)
+  local ok=1
+  grep -q -- "$must" <<<"$out" || { echo "FAIL: $name — expected: $must"; ok=0; }
+  if [[ -n "$must2" ]]; then
+    grep -q -- "$must2" <<<"$out" || { echo "FAIL: $name — expected: $must2"; ok=0; }
+  fi
+  if grep -q 'Fatal error' <<<"$out"; then
+    echo "FAIL: $name — process died"; ok=0
+  fi
+  if [[ $ok -eq 1 ]]; then echo "PASS: $name"; pass=$((pass+1));
+  else echo "  --- output ---"; sed 's/^/  /' <<<"$out"; fail=$((fail+1)); fi
+}
+
+# C3: stray (io-response ...) is answered as Unknown command (dead guard) —
+# an EXTRA response the client didn't ask for; process keeps serving.
+ledger_case "C3 ledger: stray io-response gets an extra error reply" \
+  '(epoch 1)\n(io-response 1 42)\n(eval "5")\n' \
+  'Unknown command: (io-response 1 42)' '^5$'
+
+# C4: malformed (epoch) doesn't update the epoch — next reply tagged with
+# the OLD epoch (0 here), i.e. stale from the client's viewpoint.
+ledger_case "C4 ledger: malformed epoch marker leaves epoch stale" \
+  '(epoch)\n(eval "2")\n' \
+  '(ok-len 0 1)' '^2$'
+
+# C5: no monotonic-epoch enforcement — a decreasing epoch is accepted.
+ledger_case "C5 ledger: decreasing epoch accepted silently" \
+  '(epoch 9)\n(epoch 3)\n(eval "42")\n' \
+  '(ok-len 3 2)' '^42$'
+
+# C6: two commands on one line -> one error, NEITHER executed.
+ledger_case "C6 ledger: two commands on one line both dropped" \
+  '(epoch 1)\n(eval "1") (eval "2")\n(eval "3")\n' \
+  'Expected single command, got 2' '^3$'
+
+# C7: vm-trace without the compiler loaded errors opaquely.
+ledger_case "C7 ledger: vm-trace sans compiler is opaque Not-callable-nil" \
+  '(epoch 1)\n(vm-trace "(+ 1 2)")\n' \
+  'Not callable: nil'
+
+# ---------------------------------------------------------------------------
+# Fuzz-liveness property: after 60 deterministic hostile lines (unbalanced
+# parens, control chars, unicode, long lines, stray io-responses, epoch
+# mutations), the server must still answer a well-formed command and exit
+# cleanly. Seeded PRNG — reproducible corpus.
+# ---------------------------------------------------------------------------
+fuzz=$(python3 - <<'PY'
+import random
+r = random.Random(1404)
+lines = []
+frag = ['(', ')', '((', '))', '(eval', '(epoch', 'io-response', '"', '\\',
+        'café', '\x01', '\x1b[2J', ':kw', '{', '}', '(+ 1', 'nil)', '#|', '|#']
+for i in range(60):
+    kind = r.randrange(5)
+    if kind == 0:
+        lines.append(''.join(r.choice(frag) for _ in range(r.randrange(1, 8))))
+    elif kind == 1:
+        lines.append('(epoch ' + r.choice(['', 'foo', '-1', '999999999999999999999', ')']) + ')')
+    elif kind == 2:
+        lines.append('(io-response %d %s' % (r.randrange(99), r.choice([')', '', '42']) ))
+    elif kind == 3:
+        lines.append('x' * r.randrange(200, 2000))
+    else:
+        lines.append('(eval "' + r.choice(['(+ 1', '(list', '\\\\', '((((']) + '")')
+print('\n'.join(lines))
+PY
+)
+out=$(printf '%s\n(epoch 777)\n(eval "\\"alive\\"")\n' "$fuzz" | timeout 90 "$SERVER" 2>&1)
+rc=$?
+if grep -q '^"alive"$' <<<"$out" && ! grep -q 'Fatal error' <<<"$out" && [[ $rc -eq 0 ]]; then
+  echo "PASS: fuzz-liveness — server survives 60 hostile lines and still answers"
+  pass=$((pass+1))
+else
+  echo "FAIL: fuzz-liveness (rc=$rc)"; tail -6 <<<"$out" | sed 's/^/  /'
+  fail=$((fail+1))
+fi
+
+# ---------------------------------------------------------------------------
+# S4 (review, hosts.md): soft error pages must NOT be stored in the HTTP
+# response cache. Pre-fix, a routing-failure page was cached as HTTP 200 and
+# served byte-identically from cache to every later visitor (cold 2s → warm
+# 0.0005s, ONE render line). Post-fix (dc7aa709), http_render_page returns
+# (html, is_error) and cache insertion is gated on `not is_err` (the skip is
+# logged as "[cache] <path> → error page, not cached").
+#
+# Pin: GET the same nonexistent path twice against a fresh --http server and
+# assert BOTH requests re-render (two [sx-http] render lines) plus the
+# is_err gate line appearing in the log. NB: in a standalone worktree all
+# docs pages render as soft error pages (no content), so a positive
+# "real page IS cached" control is not assertable here.
+# ---------------------------------------------------------------------------
+s4_case() {
+  local port=$((18000 + RANDOM % 2000))
+  local log; log=$(mktemp)
+  timeout 90 "$SERVER" --http "$port" >"$log" 2>&1 &
+  local srv=$!
+  local up=0
+  for _ in $(seq 1 40); do
+    if curl -s -o /dev/null "http://localhost:$port/" 2>/dev/null; then up=1; break; fi
+    sleep 1
+  done
+  if [[ $up -ne 1 ]]; then
+    echo "FAIL: S4 — http server did not come up on :$port"
+    kill "$srv" 2>/dev/null; rm -f "$log"
+    fail=$((fail+1)); return
+  fi
+  local miss="/sx/gate-pin-missing-$$-$RANDOM"
+  curl -s -o /dev/null "http://localhost:$port$miss"
+  curl -s -o /dev/null "http://localhost:$port$miss"
+  sleep 1
+  local renders
+  renders=$(grep -c "sx-http\] $miss " "$log")
+  local ok=1
+  if [[ "$renders" -ne 2 ]]; then
+    echo "FAIL: S4 — expected 2 renders of $miss (not cache-served), got $renders"
+    ok=0
+  fi
+  if ! grep -q 'error page, not cached' "$log"; then
+    echo "FAIL: S4 — is_err cache gate line absent from server log"
+    ok=0
+  fi
+  if [[ $ok -eq 1 ]]; then
+    echo "PASS: S4 soft error page not cached (both GETs re-rendered)"
+    pass=$((pass+1))
+  else
+    echo "  --- log tail ---"; tail -12 "$log" | sed 's/^/  /'; echo "  ---------------"
+    fail=$((fail+1))
+  fi
+  kill "$srv" 2>/dev/null
+  rm -f "$log"
+}
+s4_case
+
+echo
+echo "protocol-gate: $pass passed, $fail failed"
+[[ $fail -eq 0 ]]
--- a/scripts/test-suite-baseline.sh
+++ b/scripts/test-suite-baseline.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# test-suite-baseline.sh — W14/F10: make FAIL mean something again.
+#
+# The review (conformance.md F-10): the OCaml suite is not green — a
+# permanent ~274-failure band (in-progress hs-* + r7rs radix shadow) is
+# normalized, so real regressions hide inside the red noise and nobody can
+# tell a new failure from the band.
+#
+# This gate pins the band instead of ignoring it: the full suite's FAIL
+# set is diffed against the checked-in baseline
+# (spec/tests/known-failures.txt). Two red conditions, both loud:
+#   NEW failure      -> a real regression: fix it (or, if intentional,
+#                       justify + add to the baseline in the same commit)
+#   VANISHED failure -> something got fixed: delete it from the baseline
+#                       so the win is locked in
+# Neither touches the runner or the hs loops' scoreboards — the band still
+# prints as FAIL lines for the teams working through it.
+#
+# Usage: bash scripts/test-suite-baseline.sh
+# Runtime: full suite, ~5–15 min. Exit 0 = fail set identical to baseline.
+set -uo pipefail
+cd "$(dirname "$0")/.."
+
+RUNNER=hosts/ocaml/_build/default/bin/run_tests.exe
+BASELINE=spec/tests/known-failures.txt
+[[ -x "$RUNNER" ]] || { echo "SKIP: $RUNNER not built" >&2; exit 2; }
+[[ -f "$BASELINE" ]] || { echo "SKIP: $BASELINE missing" >&2; exit 2; }
+
+log=$(mktemp)
+timeout 3000 "$RUNNER" > "$log" 2>&1
+rc=$?
+if [[ $rc -ne 0 && $rc -ne 1 ]]; then
+  echo "RED: runner exited $rc (timeout/crash)"; tail -5 "$log"; rm -f "$log"; exit 1
+fi
+
+# Normalize: keep the stable test identity (suite > name), drop messages
+# (error text may contain addresses/timings that churn).
+current=$(mktemp)
+grep '^  FAIL: ' "$log" | sed 's/^  FAIL: //; s/: .*$//' | sort -u > "$current"
+
+new_failures=$(comm -13 <(sort -u "$BASELINE") "$current")
+vanished=$(comm -23 <(sort -u "$BASELINE") "$current")
+
+summary=$(grep '^Results:' "$log" | tail -1)
+red=0
+if [[ -n "$new_failures" ]]; then
+  echo "RED: NEW failures not in baseline:"
+  sed 's/^/  + /' <<<"$new_failures"
+  red=1
+fi
+if [[ -n "$vanished" ]]; then
+  echo "RED: baseline entries now PASSING (delete them from $BASELINE):"
+  sed 's/^/  - /' <<<"$vanished"
+  red=1
+fi
+if [[ $red -eq 0 ]]; then
+  echo "GREEN: fail set identical to baseline ($(wc -l < "$BASELINE") known failures)"
+fi
+echo "$summary"
+rm -f "$log" "$current"
+exit $red
--- a/scripts/test-wasm-corpus.sh
+++ b/scripts/test-wasm-corpus.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# test-wasm-corpus.sh — W14/F2: sweep the spec test corpus through the
+# SHIPPED browser kernel (sx_browser.bc.wasm.js) headless in Node.
+#
+# The review (conformance.md F-2) found the shipped browser artifact never
+# runs the corpus — F-1/F-3 native/WASM divergences existed undetected.
+# Each file runs in its OWN node process via run_wasm_corpus.js (a hang is
+# killed by per-file timeout without ending the sweep).
+#
+# The SKIP list documents files that structurally cannot run on the browser
+# kernel (runner-only bindings, native-only machinery) — the F-5/F-6/F-10
+# "one-host-gated" theme, recorded honestly per file with the reason.
+# KNOWN_FAIL documents files that RUN but currently have failing tests on
+# the shipped kernel (host divergence, F-1/F-3 class): they execute and
+# report, but don't gate. Everything else must be GREEN — exit 1 otherwise;
+# a KNOWN_FAIL going green also fails (ledger must be updated).
+#
+# Usage: bash scripts/test-wasm-corpus.sh [file.sx ...]
+set -uo pipefail
+cd "$(dirname "$0")/.."
+
+RUNNER=hosts/ocaml/browser/run_wasm_corpus.js
+KERNEL=shared/static/wasm/sx_browser.bc.wasm.js
+[[ -f "$KERNEL" ]] || { echo "SKIP: $KERNEL missing (run sx-build-all first)" >&2; exit 2; }
+
+# --- classification (empirical sweep 2026-07-04; see sx-gate-loop.md) ---
+# Sweep baseline: 83 files, 80 fully green, 5192 passes, 0 test failures.
+# The shipped kernel even provides the CEK driver bindings (make-env,
+# cek-step-loop, ...) — broader than a bare sx_server.
+declare -A SKIP KNOWN_FAIL
+skip()  { SKIP[$1]=$2; }
+known() { KNOWN_FAIL[$1]=$2; }
+# Partial load-errors: the kernel throws mid-file (opaque jsoo exception,
+# message "undefined"); tests before the failing form pass and report.
+known test-hash-table.sx "partial: 22 pass then load-error mid-file"
+known test-r7rs.sx       "partial: 87 pass then load-error mid-file"
+known test-sets.sx       "partial: 30 pass then load-error mid-file"
+
+pass_total=0; fail_total=0; red=0; files=0
+declare -a targets
+if [[ $# -gt 0 ]]; then targets=("$@");
+else for f in spec/tests/test-*.sx; do
+  [[ "$(basename "$f")" == "test-framework.sx" ]] && continue
+  targets+=("$f")
+done; fi
+
+for f in "${targets[@]}"; do
+  base=$(basename "$f")
+  if [[ -n "${SKIP[$base]:-}" ]]; then
+    echo "SKIP: $base — ${SKIP[$base]}"
+    continue
+  fi
+  files=$((files+1))
+  line=$(timeout 120 node "$RUNNER" "$f" 2>/dev/null | grep '^CORPUS-RESULT' || true)
+  if [[ -z "$line" ]]; then
+    echo "RED:  $base — timeout or crash (no CORPUS-RESULT)"
+    red=$((red+1)); continue
+  fi
+  p=$(sed -n 's/.*pass=\([0-9]*\).*/\1/p' <<<"$line")
+  fl=$(sed -n 's/.*fail=\([0-9]*\).*/\1/p' <<<"$line")
+  st=$(sed -n 's/.*status=\([a-z-]*\).*/\1/p' <<<"$line")
+  pass_total=$((pass_total+p)); fail_total=$((fail_total+fl))
+  if [[ -n "${KNOWN_FAIL[$base]:-}" ]]; then
+    if [[ "$fl" -eq 0 && "$st" == "ok" ]]; then
+      echo "RED:  $base — KNOWN_FAIL is now GREEN (${KNOWN_FAIL[$base]}); update the ledger"
+      red=$((red+1))
+    else
+      echo "KNOWN-FAIL: $base pass=$p fail=$fl ($( echo "${KNOWN_FAIL[$base]}" ))"
+    fi
+    continue
+  fi
+  if [[ "$st" != "ok" || "$fl" -ne 0 ]]; then
+    echo "RED:  $base pass=$p fail=$fl status=$st"
+    red=$((red+1))
+  else
+    echo "OK:   $base pass=$p"
+  fi
+done
+
+echo
+echo "wasm-corpus: $files files run, $pass_total passed, $fail_total failed, $red red"
+[[ $red -eq 0 ]]
--- a/spec/harness.sx
+++ b/spec/harness.sx
@@ -1,6 +1,7 @@
 ;; Assert condition is truthy, error with message

-(define-library (sx harness)
+(define-library
+  (sx harness)
  (export
    assert
    assert=
@@ -12,6 +13,7 @@
    harness-set!
    make-interceptor
    install-interceptors
+    harness-run-perform
    io-calls
    io-call-count
    io-call-nth
@@ -24,71 +26,240 @@
    assert-io-result
    assert-state)
  (begin
-
-(define assert (fn (condition msg) (when (not condition) (error (or msg "Assertion failed")))))
-
-;; Assert two values are equal
-(define assert= (fn (actual expected msg) (when (not (= actual expected)) (error (or msg (str "Expected " expected ", got " actual))))))
-
-;; Dict of mock IO operations for testing
-(define default-platform {:current-user (fn () nil) :csrf-token (fn () "test-csrf-token") :app-url (fn (service &rest path) "/mock-app-url") :frag (fn (service comp &rest args) "") :sleep (fn (ms) nil) :local-storage-set (fn (key val) nil) :set-cookie (fn (name val &rest opts) nil) :url-for (fn (endpoint &rest args) "/mock-url") :create-element (fn (tag) nil) :request-path (fn () "/") :config (fn (key) nil) :set-attr (fn (el name val) nil) :set-text (fn (el text) nil) :remove-child (fn (parent child) nil) :fetch (fn (url &rest opts) {:status 200 :body "" :ok true}) :query (fn (service name &rest args) (list)) :add-class (fn (el cls) nil) :get-element (fn (id) nil) :now (fn () 0) :abort (fn (code) nil) :action (fn (service name &rest args) {:ok true}) :remove-class (fn (el cls) nil) :append-child (fn (parent child) nil) :request-arg (fn (name) nil) :emit-dom (fn (op &rest args) nil) :local-storage-get (fn (key) nil) :get-cookie (fn (name) nil)})
-
-;; Create a test session with mock IO platform
-(define make-harness :effects () (fn (&key platform) (let ((merged (if (nil? platform) default-platform (merge default-platform platform)))) {:log (list) :platform merged :state {:cookies {} :storage {} :dom nil}})))
-
-;; Clear IO log and state for a new test
-(define harness-reset! :effects () (fn (session) (dict-set! session "log" (list)) (dict-set! session "state" {:cookies {} :storage {} :dom nil}) session))
-
-;; Append an IO call record to session log
-(define harness-log :effects () (fn (session &key op) (let ((log (get session "log"))) (if (nil? op) log (filter (fn (entry) (= (get entry "op") op)) log)))))
-
-;; Read state value from session store
-(define harness-get :effects () (fn (session key) (get (get session "state") key)))
-
-;; Write state value to session store
-(define harness-set! :effects () (fn (session key value) (dict-set! (get session "state") key value) nil))
-
-;; Wrap a mock fn to record calls in the IO log
-(define make-interceptor :effects () (fn (session op-name mock-fn) (fn (&rest args) (let ((result (if (empty? args) (mock-fn) (if (= 1 (len args)) (mock-fn (first args)) (if (= 2 (len args)) (mock-fn (first args) (nth args 1)) (if (= 3 (len args)) (mock-fn (first args) (nth args 1) (nth args 2)) (apply mock-fn args)))))) (log (get session "log"))) (append! log {:args args :result result :op op-name}) result))))
-
-;; Bind all interceptors into the eval environment
-(define install-interceptors :effects () (fn (session env) (for-each (fn (key) (let ((mock-fn (get (get session "platform") key)) (interceptor (make-interceptor session key mock-fn))) (env-bind! env key interceptor))) (keys (get session "platform"))) env))
-
-;; Query IO log: all calls, or filtered by op name
-(define io-calls :effects () (fn (session op-name) (filter (fn (entry) (= (get entry "op") op-name)) (get session "log"))))
-
-;; Count IO calls, optionally filtered by op name
-(define io-call-count :effects () (fn (session op-name) (len (io-calls session op-name))))
-
-;; Get the nth IO call record
-(define io-call-nth :effects () (fn (session op-name n) (let ((calls (io-calls session op-name))) (if (< n (len calls)) (nth calls n) nil))))
-
-;; Get args from the nth call to an operation
-(define io-call-args :effects () (fn (session op-name n) (let ((call (io-call-nth session op-name n))) (if (nil? call) nil (get call "args")))))
-
-;; Get return value from the nth call to an operation
-(define io-call-result :effects () (fn (session op-name n) (let ((call (io-call-nth session op-name n))) (if (nil? call) nil (get call "result")))))
-
-;; Assert an IO operation was called at least once
-(define assert-io-called :effects () (fn (session op-name) (assert (> (io-call-count session op-name) 0) (str "Expected IO operation " op-name " to be called but it was not"))))
-
-;; Assert an IO operation was never called
-(define assert-no-io :effects () (fn (session op-name) (assert (= (io-call-count session op-name) 0) (str "Expected IO operation " op-name " not to be called but it was called " (io-call-count session op-name) " time(s)"))))
-
-;; Assert exact call count for an operation
-(define assert-io-count :effects () (fn (session op-name expected) (let ((actual (io-call-count session op-name))) (assert (= actual expected) (str "Expected " op-name " to be called " expected " time(s) but was called " actual " time(s)")))))
-
-;; Assert args of the nth call match expected
-(define assert-io-args :effects () (fn (session op-name n expected-args) (let ((actual (io-call-args session op-name n))) (assert (equal? actual expected-args) (str "Expected call " n " to " op-name " with args " (str expected-args) " but got " (str actual))))))
-
-;; Assert result of the nth call matches expected
-(define assert-io-result :effects () (fn (session op-name n expected) (let ((actual (io-call-result session op-name n))) (assert (equal? actual expected) (str "Expected call " n " to " op-name " to return " (str expected) " but got " (str actual))))))
-
-;; Assert a state key has the expected value
-(define assert-state :effects () (fn (session key expected) (let ((actual (harness-get session key))) (assert (equal? actual expected) (str "Expected state " key " to be " (str expected) " but got " (str actual))))))
-
-
-)) ;; end define-library
+    (define
+      assert
+      (fn
+        (condition msg)
+        (when (not condition) (error (or msg "Assertion failed")))))
+    (define
+      assert=
+      (fn
+        (actual expected msg)
+        (when
+          (not (= actual expected))
+          (error (or msg (str "Expected " expected ", got " actual))))))
+    (define default-platform {:get-cookie (fn (name) nil) :local-storage-get (fn (key) nil) :emit-dom (fn (op &rest args) nil) :request-arg (fn (name) nil) :append-child (fn (parent child) nil) :remove-class (fn (el cls) nil) :action (fn (service name &rest args) {:ok true}) :abort (fn (code) nil) :now (fn () 0) :get-element (fn (id) nil) :add-class (fn (el cls) nil) :query (fn (service name &rest args) (list)) :fetch (fn (url &rest opts) {:ok true :body "" :status 200}) :remove-child (fn (parent child) nil) :set-text (fn (el text) nil) :set-attr (fn (el name val) nil) :config (fn (key) nil) :request-path (fn () "/") :create-element (fn (tag) nil) :url-for (fn (endpoint &rest args) "/mock-url") :set-cookie (fn (name val &rest opts) nil) :local-storage-set (fn (key val) nil) :sleep (fn (ms) nil) :frag (fn (service comp &rest args) "") :app-url (fn (service &rest path) "/mock-app-url") :csrf-token (fn () "test-csrf-token") :current-user (fn () nil)})
+    (define
+      make-harness
+      :effects ()
+      (fn
+        (&key platform)
+        (let
+          ((merged (if (nil? platform) default-platform (merge default-platform platform))))
+          {:state {:dom nil :storage {} :cookies {}} :platform merged :log (list)})))
+    (define
+      harness-reset!
+      :effects ()
+      (fn
+        (session)
+        (dict-set! session "log" (list))
+        (dict-set! session "state" {:dom nil :storage {} :cookies {}})
+        session))
+    (define
+      harness-log
+      :effects ()
+      (fn
+        (session &key op)
+        (let
+          ((log (get session "log")))
+          (if
+            (nil? op)
+            log
+            (filter (fn (entry) (= (get entry "op") op)) log)))))
+    (define
+      harness-get
+      :effects ()
+      (fn (session key) (get (get session "state") key)))
+    (define
+      harness-set!
+      :effects ()
+      (fn
+        (session key value)
+        (dict-set! (get session "state") key value)
+        nil))
+    (define
+      harness-invoke-mock
+      :effects ()
+      (fn
+        (mock-fn args)
+        (if
+          (empty? args)
+          (mock-fn)
+          (if
+            (= 1 (len args))
+            (mock-fn (first args))
+            (if
+              (= 2 (len args))
+              (mock-fn (first args) (nth args 1))
+              (if
+                (= 3 (len args))
+                (mock-fn
+                  (first args)
+                  (nth args 1)
+                  (nth args 2))
+                (apply mock-fn args)))))))
+    (define
+      make-interceptor
+      :effects ()
+      (fn
+        (session op-name mock-fn)
+        (fn
+          (&rest args)
+          (let
+            ((entry {:op op-name :result nil :args args}) (log (get session "log")))
+            (append! log entry)
+            (let
+              ((result (harness-invoke-mock mock-fn args)))
+              (dict-set! entry "result" result)
+              result)))))
+    (define
+      install-interceptors
+      :effects ()
+      (fn
+        (session env)
+        (for-each
+          (fn
+            (key)
+            (let
+              ((mock-fn (get (get session "platform") key))
+                (interceptor (make-interceptor session key mock-fn)))
+              (env-bind! env key interceptor)))
+          (keys (get session "platform")))
+        env))
+    (define
+      harness-run-perform
+      :effects ()
+      (fn
+        (session expr env)
+        (let
+          ((drive (fn (self state) (if (cek-suspended? state) (let ((req (cek-io-request state))) (let ((op (get req "op")) (args (or (get req "args") (list)))) (let ((mock-fn (get (get session "platform") op))) (when (nil? mock-fn) (error (str "harness-run-perform: no mock for op " op))) (let ((entry {:op op :result nil :args args}) (log (get session "log"))) (append! log entry) (let ((result (harness-invoke-mock mock-fn args))) (dict-set! entry "result" result) (self self (cek-resume state result))))))) (cek-value state)))))
+          (drive drive (cek-step-loop (make-cek-state expr env (list)))))))
+    (define
+      io-calls
+      :effects ()
+      (fn
+        (session op-name)
+        (filter
+          (fn (entry) (= (get entry "op") op-name))
+          (get session "log"))))
+    (define
+      io-call-count
+      :effects ()
+      (fn (session op-name) (len (io-calls session op-name))))
+    (define
+      io-call-nth
+      :effects ()
+      (fn
+        (session op-name n)
+        (let
+          ((calls (io-calls session op-name)))
+          (if (< n (len calls)) (nth calls n) nil))))
+    (define
+      io-call-args
+      :effects ()
+      (fn
+        (session op-name n)
+        (let
+          ((call (io-call-nth session op-name n)))
+          (if (nil? call) nil (get call "args")))))
+    (define
+      io-call-result
+      :effects ()
+      (fn
+        (session op-name n)
+        (let
+          ((call (io-call-nth session op-name n)))
+          (if (nil? call) nil (get call "result")))))
+    (define
+      assert-io-called
+      :effects ()
+      (fn
+        (session op-name)
+        (assert
+          (> (io-call-count session op-name) 0)
+          (str "Expected IO operation " op-name " to be called but it was not"))))
+    (define
+      assert-no-io
+      :effects ()
+      (fn
+        (session op-name)
+        (assert
+          (= (io-call-count session op-name) 0)
+          (str
+            "Expected IO operation "
+            op-name
+            " not to be called but it was called "
+            (io-call-count session op-name)
+            " time(s)"))))
+    (define
+      assert-io-count
+      :effects ()
+      (fn
+        (session op-name expected)
+        (let
+          ((actual (io-call-count session op-name)))
+          (assert
+            (= actual expected)
+            (str
+              "Expected "
+              op-name
+              " to be called "
+              expected
+              " time(s) but was called "
+              actual
+              " time(s)")))))
+    (define
+      assert-io-args
+      :effects ()
+      (fn
+        (session op-name n expected-args)
+        (let
+          ((actual (io-call-args session op-name n)))
+          (assert
+            (equal? actual expected-args)
+            (str
+              "Expected call "
+              n
+              " to "
+              op-name
+              " with args "
+              (str expected-args)
+              " but got "
+              (str actual))))))
+    (define
+      assert-io-result
+      :effects ()
+      (fn
+        (session op-name n expected)
+        (let
+          ((actual (io-call-result session op-name n)))
+          (assert
+            (equal? actual expected)
+            (str
+              "Expected call "
+              n
+              " to "
+              op-name
+              " to return "
+              (str expected)
+              " but got "
+              (str actual))))))
+    (define
+      assert-state
+      :effects ()
+      (fn
+        (session key expected)
+        (let
+          ((actual (harness-get session key)))
+          (assert
+            (equal? actual expected)
+            (str
+              "Expected state "
+              key
+              " to be "
+              (str expected)
+              " but got "
+              (str actual)))))))) ;; end define-library

 ;; Re-export to global namespace for backward compatibility
 (import (sx harness))
--- a/spec/tests/differential-probes.txt
+++ b/spec/tests/differential-probes.txt
@@ -0,0 +1,65 @@
+# W14/F8 differential probe corpus — one expression per line.
+# Same expression evaluated on the native server (epoch protocol) and the
+# shipped WASM kernel (K.eval); scripts/test-differential.sh diffs results.
+# Classes drawn from review findings F-1 (integer arithmetic), F-3 (apply,
+# dict key order), F-8 itemization, S-4 (float printing), K18/K53.
+# integers & display (F-1)
+(+ 1 2)
+(- 10 3)
+(* 6 7)
+(/ 4 2)
+(/ 1 2)
+(/ 10 4)
+(quotient 13 4)
+(mod 10 3)
+# float printing (S-4)
+(+ 0.1 0.2)
+(* 3 0.1)
+(/ 1 3)
+(str 0.3)
+(str 1.5)
+(str 2.0)
+# overflow / expt (K18)
+(expt 2 10)
+(expt 2 62)
+(expt 2 100)
+(+ 9223372036854775807 1)
+# apply (F-3)
+(apply + (list 1 2 3))
+(apply max (list 1 5 2))
+(apply str (list "a" "b"))
+# dict key order (F-3)
+(keys {:b 2 :a 1 :c 3})
+(str {:b 2 :a 1})
+(vals {:b 2 :a 1})
+# strings
+(split "a,b,c" ",")
+(split "a--b" "--")
+(len "héllo")
+(upcase "abc")
+(str (char-code "A"))
+(substring "hello" 1 3)
+(join "-" (list "x" "y"))
+# equality & comparison
+(= 1 1.0)
+(= (list 1 2) (list 1 2))
+(equal? (list 1) (list 1))
+(< 1 2 3)
+# collections
+(sort (list 3 1 2))
+(range 3)
+(reverse (list 1 2 3))
+(nth (list 10 20 30) 1)
+(contains? {:a 1} :a)
+(get {:a 1} :zz 99)
+# quasiquote / quote
+(quasiquote (1 (unquote (+ 1 1)) 3))
+(str (quote sym))
+# conditionals & special forms
+(if true 1 2)
+(and 1 2 3)
+(or nil false 7)
+(do ((fn (x) x) 5) 99)
+# error normalization (both sides should error)
+(undefined-symbol-xyz)
+(/ 1 0)
--- a/spec/tests/known-failures.txt
+++ b/spec/tests/known-failures.txt
@@ -0,0 +1,273 @@
+hs-compat-asExpression > converts-a-complete-form-into-values
+hs-compat-asExpression > converts-strings-into-fragments
+hs-compat-asExpression > converts-value-as-json
+hs-compat-blockLiteral > can-map-an-array
+hs-compat-in > basic-no-query-return-values
+hs-compat-typecheck > can-do-basic-non-string-typecheck-failure
+hs-compat-typecheck > can-do-basic-string-non-null-typecheck
+hs-compat-typecheck > can-do-basic-string-typecheck
+hs-compat-typecheck > null-causes-null-safe-string-check-to-fail
+hs-dev-asExpression > parses string as JSON to object
+hs-dev-collectionExpressions > where binds after property access
+hs-dev-comparisonOperator > I am between works
+hs-dev-comparisonOperator > I am not between works
+hs-dev-comparisonOperator > is still does equality when rhs variable exists
+hs-dev-pick > can pick first n items
+hs-dev-pick > can pick items using 'of' syntax
+hs-dev-pick > can pick last n items
+hs-dev-pick > can pick random item
+hs-dev-pick > can pick random n items
+hs-emit-classes > remove class from target
+hs-emit-control-flow > tell rebinds me
+hs-emit-def-behavior > def becomes define
+hs-emit-dom-commands > hide sets display none
+hs-emit-dom-commands > log passes through
+hs-emit-dom-commands > show clears display
+hs-emit-on > on every click
+hs-extra-function-call > identity-call
+hs-extra-lambda > array-map-block
+hs-extra-lambda > arrow-true
+hs-extra-typecheck > null-colon-string
+hs-parse-assignment > put into
+hs-parse-assignment > set property to string
+hs-parse-basic-commands > add class to me
+hs-parse-basic-commands > remove class from me
+hs-parse-basic-commands > toggle between two classes
+hs-parse-basic-commands > toggle class on me
+hs-parse-conditional > if else end
+hs-parse-conditional > if then end
+hs-parse-conformance > increment @count → full AST
+hs-parse-conformance > on click add .called → full AST
+hs-parse-conformance > on click from #bar add .clicked → full AST
+hs-parse-conformance > toggle between .foo and .bar → full AST
+hs-parse-conformance > wait 100ms then add .done → full AST
+hs-parse-events > on click add class
+hs-parse-events > on click from target
+hs-parse-every-modifier > on every click
+hs-parse-expressions > attribute ref
+hs-parse-expressions > style ref
+hs-parse-send-trigger > trigger event on me
+hs-parse-sequencing > wait then add
+hs-parse-special-commands > decrement attribute
+hs-parse-special-commands > hide
+hs-parse-special-commands > increment attribute
+hs-parse-special-commands > show target
+hs-parse-unary > not expr
+hs-runtime-e2e > source → SX shape
+hs-runtime-make > make Map returns dict
+hs-runtime-make > make Set returns list
+hs-tokenize-arithmetic-ops > division operator
+hs-tokenize-arithmetic-ops > mixed arithmetic
+hs-tokenize-arithmetic-ops > modulo operator
+hs-tokenize-arithmetic-ops > multiply operator
+hs-tokenize-basics > keywords vs identifiers
+hs-tokenize-basics > whitespace skipped
+hs-tokenize-comments > line comment skipped
+hs-tokenize-full-expressions > if true put "foo" into me.innerHTML else put "bar" into me.innerHTML end
+hs-tokenize-full-expressions > increment @count then put it into me
+hs-tokenize-full-expressions > on click add .called
+hs-tokenize-full-expressions > on click[buttons==0] log event
+hs-tokenize-full-expressions > on click from #bar add .clicked
+hs-tokenize-full-expressions > on click send custom(foo:"fromBar") to #d2
+hs-tokenize-full-expressions > put "Clicked" into my.innerHTML
+hs-tokenize-full-expressions > set #d1.innerHTML to foo
+hs-tokenize-full-expressions > toggle between .foo and .bar
+hs-tokenize-full-expressions > wait 100ms then add .done
+hs-upstream-add > can add a value to a set
+hs-upstream-add > can add to an HTMLCollection
+hs-upstream-add > can add to children
+hs-upstream-add > can add to query in me
+hs-upstream-add > supports async expressions in when clause
+hs-upstream-append > append to undefined ignores the undefined
+hs-upstream-append > can append a value to a DOM node
+hs-upstream-append > can append a value to a set
+hs-upstream-append > can append a value to I
+hs-upstream-append > multiple appends work
+hs-upstream-append > new DOM content added by append will be live
+hs-upstream-askAnswer > confirm returns first choice on OK
+hs-upstream-askAnswer > prompts and puts result in it
+hs-upstream-call > call functions that return promises are waited on
+hs-upstream-core/asyncError > rejected promise stops execution
+hs-upstream-core/asyncError > rejected promise triggers catch block
+hs-upstream-core/regressions > can invoke functions w/ numbers in name
+hs-upstream-core/regressions > can pick detail fields out by name
+hs-upstream-core/regressions > can refer to function in init blocks
+hs-upstream-core/runtimeErrors > reports basic function invocation null errors properly
+hs-upstream-core/runtimeErrors > reports basic function invocation null errors properly w/ of
+hs-upstream-core/runtimeErrors > reports basic function invocation null errors properly w/ possessives
+hs-upstream-core/runtimeErrors > reports null errors on add command properly
+hs-upstream-core/runtimeErrors > reports null errors on decrement command properly
+hs-upstream-core/runtimeErrors > reports null errors on default command properly
+hs-upstream-core/runtimeErrors > reports null errors on hide command properly
+hs-upstream-core/runtimeErrors > reports null errors on increment command properly
+hs-upstream-core/runtimeErrors > reports null errors on measure command properly
+hs-upstream-core/runtimeErrors > reports null errors on put command properly
+hs-upstream-core/runtimeErrors > reports null errors on remove command properly
+hs-upstream-core/runtimeErrors > reports null errors on send command properly
+hs-upstream-core/runtimeErrors > reports null errors on sets properly
+hs-upstream-core/runtimeErrors > reports null errors on settle command properly
+hs-upstream-core/runtimeErrors > reports null errors on show command properly
+hs-upstream-core/runtimeErrors > reports null errors on toggle command properly
+hs-upstream-core/runtimeErrors > reports null errors on transition command properly
+hs-upstream-core/runtimeErrors > reports null errors on trigger command properly
+hs-upstream-core/runtime > has proper stack from event handler
+hs-upstream-core/scoping > locally scoped variables don't clash with built-in variables
+hs-upstream-empty > can empty a map
+hs-upstream-empty > can empty an element
+hs-upstream-empty > can empty a set
+hs-upstream-empty > clear works on elements
+hs-upstream-expressions/asExpression > can accept custom dynamic conversions
+hs-upstream-expressions/asExpression > can use the a modifier if you like
+hs-upstream-expressions/asExpression > collects duplicate text inputs into an array
+hs-upstream-expressions/asExpression > converts a complete form into Values
+hs-upstream-expressions/asExpression > converts a form element into Values
+hs-upstream-expressions/asExpression > converts a form element into Values | FormEncoded
+hs-upstream-expressions/asExpression > converts a form element into Values | JSONString
+hs-upstream-expressions/asExpression > converts an element into HTML
+hs-upstream-expressions/asExpression > converts a NodeList into HTML
+hs-upstream-expressions/asExpression > converts array as Set
+hs-upstream-expressions/asExpression > converts checkboxes into a Value correctly
+hs-upstream-expressions/asExpression > converts multiple selects into a Value correctly
+hs-upstream-expressions/asExpression > converts multiple selects with programmatically changed selections
+hs-upstream-expressions/asExpression > converts object as Map
+hs-upstream-expressions/asExpression > converts radio buttons into a Value correctly
+hs-upstream-expressions/asExpression > converts value as Date
+hs-upstream-expressions/asExpression > parses string as JSON to object
+hs-upstream-expressions/asExpression > pipe operator chains conversions
+hs-upstream-expressions/blockLiteral > basic block literals work
+hs-upstream-expressions/blockLiteral > basic identity works
+hs-upstream-expressions/blockLiteral > basic two arg identity works
+hs-upstream-expressions/closest > closest does not consume a following where clause
+hs-upstream-expressions/comparisonOperator > does not exist works
+hs-upstream-expressions/cookies > basic clear cookie values work
+hs-upstream-expressions/cookies > basic set cookie values work
+hs-upstream-expressions/cookies > iterate cookies values work
+hs-upstream-expressions/cookies > length is 0 when no cookies are set
+hs-upstream-expressions/cookies > update cookie values work
+hs-upstream-expressions/functionCalls > can access a property of a call's result
+hs-upstream-expressions/functionCalls > can chain calls on the result of a call
+hs-upstream-expressions/functionCalls > can invoke function on object
+hs-upstream-expressions/functionCalls > can invoke function on object w/ async arg
+hs-upstream-expressions/functionCalls > can invoke function on object w/ async root & arg
+hs-upstream-expressions/functionCalls > can invoke global function
+hs-upstream-expressions/functionCalls > can invoke global function w/ async arg
+hs-upstream-expressions/functionCalls > can pass an array literal as an argument
+hs-upstream-expressions/functionCalls > can pass an expression as an argument
+hs-upstream-expressions/functionCalls > can pass an object literal as an argument
+hs-upstream-expressions/functionCalls > can pass no arguments
+hs-upstream-expressions/logicalOperator > and short-circuits when lhs promise resolves to false
+hs-upstream-expressions/logicalOperator > should short circuit with and expression
+hs-upstream-expressions/logicalOperator > should short circuit with or expression
+hs-upstream-expressions/mathOperator > can use mixed expressions
+hs-upstream-expressions/objectLiteral > expressions work in object literal field names
+hs-upstream-expressions/propertyAccess > property access on function result
+hs-upstream-expressions/some > some returns true for nonempty selector
+hs-upstream-expressions/strings > string templates work w/ props
+hs-upstream-expressions/strings > string templates work w/ props w/ braces
+hs-upstream-expressions/symbol > resolves global context properly
+hs-upstream-fetch > allows the event handler to change the fetch parameters
+hs-upstream-fetch > as response does not throw on 404
+hs-upstream-fetch > can catch an error that occurs when using fetch
+hs-upstream-fetch > can do a simple fetch
+hs-upstream-fetch > can do a simple fetch w/ a custom conversion
+hs-upstream-fetch > can do a simple fetch w/ a naked URL
+hs-upstream-fetch > can do a simple fetch w/ html
+hs-upstream-fetch > can do a simple fetch w/ json
+hs-upstream-fetch > can do a simple fetch w/ json using JSON syntax
+hs-upstream-fetch > can do a simple fetch w/ json using Object syntax
+hs-upstream-fetch > can do a simple fetch w/ json using Object syntax and an 'an' prefix
+hs-upstream-fetch > can do a simple post
+hs-upstream-fetch > can do a simple post alt syntax w/ curlies
+hs-upstream-fetch > can do a simple post alt syntax without curlies
+hs-upstream-fetch > can put response conversion after with
+hs-upstream-fetch > can put response conversion before with
+hs-upstream-fetch > do not throw passes through 404 response
+hs-upstream-fetch > don't throw passes through 404 response
+hs-upstream-fetch > Response can be converted to JSON via as JSON
+hs-upstream-fetch > submits the fetch parameters to the event handler
+hs-upstream-fetch > throws on non-2xx response by default
+hs-upstream-fetch > triggers an event just before fetching
+hs-upstream-hide > can hide element, with display:none by default
+hs-upstream-hide > can hide element with display:none explicitly
+hs-upstream-hide > can hide element with no target followed by command
+hs-upstream-hide > can hide element with no target followed by then
+hs-upstream-hide > can hide element with no target with a with
+hs-upstream-hide > can hide element with opacity:0
+hs-upstream-hide > can hide element with opacity style literal
+hs-upstream-hide > can hide element, with visibility:hidden
+hs-upstream-hide > can hide other elements
+hs-upstream-if > if on new line does not join w/ else
+hs-upstream-if > if properly supports nested if statements and end block
+hs-upstream-js > can do both of the above
+hs-upstream-js > can return values to _hyperscript
+hs-upstream-js > handles rejected promises without hanging
+hs-upstream-make > can make elements
+hs-upstream-make > can make elements with id and classes
+hs-upstream-make > can make named objects
+hs-upstream-make > can make named objects w/ global scope
+hs-upstream-make > can make named objects with arguments
+hs-upstream-make > can make objects
+hs-upstream-make > can make objects with arguments
+hs-upstream-make > creates a div by default
+hs-upstream-on > can catch exceptions thrown in hyperscript functions
+hs-upstream-on > can catch exceptions thrown in js functions
+hs-upstream-on > can ignore when target doesn't exist
+hs-upstream-on > can pick detail fields out by name
+hs-upstream-on > can pick event properties out by name
+hs-upstream-on > listeners on other elements are removed when the registering element is removed
+hs-upstream-on > multiple event handlers at a time are allowed to execute with the every keyword
+hs-upstream-on > on intersection fires when the element is in the viewport
+hs-upstream-on > rethrown exceptions trigger 'exception' event
+hs-upstream-on > throttled at <time> drops events within the window
+hs-upstream-put > waits on promises
+hs-upstream-remove > can remove a value from a set
+hs-upstream-repeat > can nest loops
+hs-upstream-repeat > only executes the init expression once
+hs-upstream-repeat > repeat forever works
+hs-upstream-repeat > repeat forever works w/o keyword
+hs-upstream-repeat > until keyword works
+hs-upstream-repeat > while keyword works
+hs-upstream-reset > can reset a textarea
+hs-upstream-resize > fires when element is resized
+hs-upstream-resize > on resize from window uses native window resize event
+hs-upstream-resize > provides height in detail
+hs-upstream-resize > works with from clause
+hs-upstream-select > returns selected text
+hs-upstream-send > can send events to any expression
+hs-upstream-set > set waits on promises
+hs-upstream-show > can filter over a set of elements using the its symbol
+hs-upstream-socket > converts relative URL to ws:// on http pages
+hs-upstream-socket > converts relative URL to wss:// on https pages
+hs-upstream-socket > dispatchEvent sends JSON-encoded event over the socket
+hs-upstream-socket > namespaced sockets work
+hs-upstream-socket > on message as JSON handler decodes JSON payload
+hs-upstream-socket > on message as JSON throws on non-JSON payload
+hs-upstream-socket > on message handler fires on incoming text message
+hs-upstream-socket > parses socket with absolute ws:// URL
+hs-upstream-socket > rpc proxy blacklists then/catch/length/toJSON
+hs-upstream-socket > rpc proxy default timeout rejects the promise
+hs-upstream-socket > rpc proxy noTimeout avoids timeout rejection
+hs-upstream-socket > rpc proxy reply with throw rejects the promise
+hs-upstream-socket > rpc proxy sends a message and resolves the reply
+hs-upstream-socket > rpc proxy timeout(n) rejects after a custom window
+hs-upstream-socket > rpc reconnects after the underlying socket closes
+hs-upstream-socket > with timeout parses and uses the configured timeout
+hs-upstream-swap > can swap a variable with a property
+hs-upstream-tell > works with an array
+hs-upstream-tell > your symbol represents the thing being told
+hs-upstream-toggle > can toggle display
+hs-upstream-toggle > can toggle display on other elt
+hs-upstream-toggle > can toggle display w/ my
+hs-upstream-toggle > can toggle opacity
+hs-upstream-toggle > can toggle opacity on other elt
+hs-upstream-toggle > can toggle opacity w/ my
+hs-upstream-toggle > can toggle until an event on another element
+hs-upstream-toggle > can toggle visibility
+hs-upstream-toggle > can toggle visibility on other elt
+hs-upstream-toggle > can toggle visibility w/ my
+hs-upstream-wait > can destructure properties in a wait
+hs-upstream-wait > can wait on event
+hs-upstream-wait > can wait on event on another element
+hs-upstream-wait > waiting on an event sets 'it' to the event
+hs-upstream-when > attribute observers are persistent (not recreated on re-run)
+math > string->number
--- a/spec/tests/test-chars.sx
+++ b/spec/tests/test-chars.sx
@@ -1,3 +1,5 @@
+(defsuite "chars"
+
 ;; Tests for character type (Phase 13)
 ;; Uses (make-char n) and (char-code "x") instead of #\x literals
 ;; (char literal parser syntax tested via sx-parse call)
@@ -183,3 +185,5 @@
 (deftest
  "char-ci>=? equal case-insensitive"
  (assert= true (char-ci>=? (make-char 97) (make-char 65))))
+
+)
--- a/spec/tests/test-gate-pins.sx
+++ b/spec/tests/test-gate-pins.sx
@@ -0,0 +1,308 @@
+;; ==========================================================================
+;; test-gate-pins.sx — W14 regression pins for the review's landed fixes
+;;
+;; The quick-wins batch (commit dc7aa709 + siblings) landed real semantics
+;; fixes but shipped WITHOUT pinning tests, so a regression would pass
+;; silently. This file pins each confirmed-and-fixed finding with a minimal
+;; repro lifted from the review lane files (plans/sx-review/*.md). One suite
+;; per finding.
+;;
+;; TEST-ONLY: no semantics edits. If a pin fails, the fix regressed — do NOT
+;; relax the assertion; investigate the evaluator/primitive change.
+;; NB: assert= uses `=` (not `equal?`); compare lists with `=`.
+;; ==========================================================================
+
+;; --------------------------------------------------------------------------
+;; K18 [W7, high] expt silently wrapped at 63-bit int — now promotes to float
+;; like +/*. Repro (core.md): (expt 2 62) -> -4611686018427387904 (wrapped);
+;; (expt 2 100) -> 0. Fixed: both are positive floats.
+;; --------------------------------------------------------------------------
+(defsuite
+  "gate-K18-expt-overflow"
+  (deftest
+    "small integer exponents stay exact"
+    (do
+      (assert= (expt 2 0) 1)
+      (assert= (expt 2 10) 1024)))
+  (deftest
+    "expt 2^62 does not wrap to a negative int"
+    (assert (> (expt 2 62) 0)))
+  (deftest
+    "expt 2^100 does not wrap to zero"
+    (assert (> (expt 2 100) 0)))
+  (deftest
+    "expt 2^100 promotes to float"
+    (assert (number? (expt 2 100)))))
+
+;; --------------------------------------------------------------------------
+;; K20 [W7, high] contains? did not support dicts in the real runtime —
+;; (contains? {:a 1} :a) threw "contains?: 2 args", contradicting its :doc
+;; ("Dicts: key check"). Fixed: dict key membership works; lists/strings
+;; unchanged. Repro (core.md).
+;; --------------------------------------------------------------------------
+(defsuite
+  "gate-K20-contains-dict"
+  (deftest
+    "contains? finds a present dict key"
+    (assert (contains? {:a 1 :b 2} :a)))
+  (deftest
+    "contains? reports a missing dict key as false"
+    (assert (not (contains? {:a 1 :b 2} :zz))))
+  (deftest
+    "contains? still works on list membership"
+    (do
+      (assert (contains? (list 10 20 30) 20))
+      (assert
+        (not (contains? (list 10 20 30) 99)))))
+  (deftest
+    "contains? still works on string substrings"
+    (assert (contains? "hello" "ell"))))
+
+;; --------------------------------------------------------------------------
+;; K09 [W5, high] R7RS longhand (unquote-splicing X) silently no-spliced —
+;; only shorthand ,@/`splice-unquote` was recognized, so the longhand
+;; serialized literally (zero-splice). Fixed: aliased to splice-unquote.
+;; Repro (core.md).
+;; --------------------------------------------------------------------------
+(defsuite
+  "gate-K09-longhand-unquote-splicing"
+  (deftest
+    "longhand unquote-splicing splices a list"
+    (assert=
+      (quasiquote
+        (1
+          (unquote-splicing (list 2 3))
+          4))
+      (list 1 2 3 4)))
+  (deftest
+    "longhand unquote-splicing of an empty list contributes nothing"
+    (assert=
+      (quasiquote (0 (unquote-splicing (list)) 9))
+      (list 0 9)))
+  (deftest
+    "shorthand splice-unquote still works"
+    (assert=
+      (quasiquote (a (splice-unquote (list 2 3)) z))
+      (list (quote a) 2 3 (quote z)))))
+
+;; --------------------------------------------------------------------------
+;; K11 [W5, high] guard re-raise sentinel was a plain forgeable symbol — a
+;; body/clause legitimately returning (list '__guard-reraise__ X) was
+;; misread as a re-raise of X. Fixed: sentinel gensym'd per execution, so a
+;; user value with that head is returned as data. Repro (core.md).
+;; --------------------------------------------------------------------------
+(defsuite
+  "gate-K11-guard-reraise-forgeable"
+  (deftest
+    "body value shaped like the sentinel is returned as data"
+    (assert=
+      (guard (e (true "caught")) (list (quote __guard-reraise__) "hi"))
+      (list (quote __guard-reraise__) "hi")))
+  (deftest
+    "clause returning the forged sentinel is not re-raised"
+    (assert=
+      (guard
+        (e (true (list (quote __guard-reraise__) "forged")))
+        (error "boom"))
+      (list (quote __guard-reraise__) "forged"))))
+
+;; --------------------------------------------------------------------------
+;; K39 [W5, med] `do` misparsed a first form whose head is a list (an IIFE)
+;; as a Scheme do-loop binding spec. Repro (core.md): (do ((fn (x) x) 5) 99)
+;; threw "first: expected list, got 5"; expected 99. Fixed: `do` is begin.
+;; --------------------------------------------------------------------------
+(defsuite
+  "gate-K39-do-iife-head"
+  (deftest
+    "do with an IIFE first form returns the last form (not a do-loop)"
+    (assert= (do ((fn (x) x) 5) 99) 99))
+  (deftest
+    "do with a single IIFE form returns its value"
+    (assert= (do ((fn () 42))) 42)))
+
+;; --------------------------------------------------------------------------
+;; K49 [W8, med] Five void elements (area base embed param track) were in
+;; VOID_ELEMENTS but missing from HTML_TAGS — render fell through to
+;; function-call dispatch: (render-to-html '(base :href "x")) threw
+;; "Undefined symbol: base". dc7aa709 fixed the SPEC registry
+;; (spec/render.sx). NB: the generated OCaml render library
+;; (hosts/ocaml/lib/sx_render.ml, bootstrap_render.py output) still carries
+;; a STALE html_tags_list without these five — the runner's native
+;; `render-html` convenience therefore still errors. That regen drift is
+;; W14 item F13 (regen-diff gate); this suite pins the spec side only.
+;; --------------------------------------------------------------------------
+(defsuite
+  "gate-K49-void-elements-renderable"
+  (deftest
+    "spec HTML_TAGS registry contains all five void elements"
+    (for-each
+      (fn
+        (t)
+        (assert (contains? HTML_TAGS t) (str "HTML_TAGS missing " t)))
+      (list "area" "base" "embed" "param" "track")))
+  (deftest
+    "spec render-to-html renders base self-closing with attr"
+    (assert-equal
+      "<base href=\"x\" />"
+      (render-to-html (quote (base :href "x")) (make-env))))
+  (deftest
+    "spec render-to-html renders all five as self-closing voids"
+    (for-each
+      (fn
+        (form)
+        (let
+          ((html (render-to-html form (make-env))))
+          (assert
+            (string-contains? html "/>")
+            (str (first form) " not self-closing: " html))))
+      (list
+        (quote (area))
+        (quote (base))
+        (quote (embed))
+        (quote (param))
+        (quote (track))))))
+
+;; --------------------------------------------------------------------------
+;; crit-2 [W1, critical] signal-return frame stored the saved kont under :f
+;; but the reader looked up "saved-kont" — the resume kont was always nil,
+;; so the handler value became the WHOLE program's result and every frame
+;; outside the signal site (including the covering test's own assert!) was
+;; silently discarded. The shipped test "signal returns handler value to
+;; call site" passed VACUOUSLY — the bug defeated its own test.
+;;
+;; A plain assert around the repro would inherit the same vacuity on
+;; regression (the dropped continuation includes the assert frame). So this
+;; pin uses a side-effect sentinel: test 1 runs the repro and then sets a
+;; flag; test 2 independently asserts the flag was reached. If crit-2
+;; regresses, test 1 still "passes" (vacuously) but test 2 FAILS.
+;; Repro (core.md).
+;; --------------------------------------------------------------------------
+(define *gate-crit2-after-signal* false)
+(define *gate-crit2-result* nil)
+(define *gate-crit2-rc-result* nil)
+
+(defsuite
+  "gate-crit2-signal-return-kont"
+  (deftest
+    "continuable signal resumes at the raise site"
+    (do
+      (set!
+        *gate-crit2-result*
+        (list
+          "outer"
+          (handler-bind
+            (((fn (c) true) (fn (c) 42)))
+            (+ 1 (signal-condition 5)))
+          "end"))
+      (set!
+        *gate-crit2-rc-result*
+        (handler-bind
+          (((fn (c) true) (fn (c) (+ c 100))))
+          (+ 1 (raise-continuable 42))))
+      (set! *gate-crit2-after-signal* true)
+      (assert= *gate-crit2-result* (list "outer" 43 "end"))
+      (assert= *gate-crit2-rc-result* 143)))
+  (deftest
+    "non-vacuity sentinel: the continuation after the signal actually ran"
+    (do
+      (assert
+        *gate-crit2-after-signal*
+        "continuation dropped — crit-2 regressed (previous test passed vacuously)")
+      (assert= *gate-crit2-result* (list "outer" 43 "end"))
+      (assert= *gate-crit2-rc-result* 143)))
+  (deftest
+    "handler value feeds the arithmetic frame, not the program result"
+    (assert=
+      (handler-bind
+        (((fn (c) true) (fn (c) (* c 10))))
+        (+ 1 (signal-condition 5)))
+      51)))
+
+(defsuite
+  "gate-C22-throwing-mock-logged"
+  (deftest
+    "throwing mock still leaves an IO-log entry"
+    (let
+      ((h (make-harness))
+        (f (make-interceptor h "fetch" (fn (url) (error "boom-io")))))
+      (let
+        ((r (try-call (fn () (f "http://a")))))
+        (assert (not (get r "ok")) "mock error must propagate")
+        (assert-io-called h "fetch")
+        (assert-io-count h "fetch" 1)
+        (assert= (get (io-call-nth h "fetch" 0) "result") nil))))
+  (deftest
+    "successful mock entry gets its result updated in place"
+    (let
+      ((h (make-harness))
+        (f (make-interceptor h "fetch" (fn (url) (str "got:" url)))))
+      (f "http://a")
+      (assert-io-count h "fetch" 1)
+      (assert-io-result h "fetch" 0 "got:http://a")
+      (assert-io-args h "fetch" 0 (list "http://a"))))
+  (deftest
+    "mixed throwing and successful calls are all counted"
+    (let
+      ((h (make-harness))
+        (bomb (make-interceptor h "action" (fn (x) (error "nope"))))
+        (ok-f (make-interceptor h "action" (fn (x) "done"))))
+      (try-call (fn () (bomb 1)))
+      (ok-f 2)
+      (try-call (fn () (bomb 3)))
+      (assert-io-count h "action" 3)
+      (assert= (get (io-call-nth h "action" 1) "result") "done"))))
+
+(defsuite
+  "gate-C21-perform-mode-harness"
+  (deftest
+    "single perform suspension is serviced by the mock"
+    (let
+      ((h (make-harness :platform {:fetch (fn (u) (str "R:" u))})))
+      (assert=
+        (harness-run-perform h (quote (perform {:op "fetch" :args (list "a")})) (make-env))
+        "R:a")
+      (assert-io-count h "fetch" 1)
+      (assert-io-result h "fetch" 0 "R:a")))
+  (deftest
+    "perform result feeds the surrounding arithmetic frame"
+    (let
+      ((h (make-harness :platform {:query (fn () 41)})))
+      (assert=
+        (harness-run-perform
+          h
+          (quote (+ 1 (perform {:op "query"})))
+          (make-env))
+        42)))
+  (deftest
+    "sequential performs each suspend and resume in order"
+    (let
+      ((h (make-harness :platform {:fetch (fn (u) (str "R:" u))})))
+      (assert=
+        (harness-run-perform
+          h
+          (quote (list (perform {:op "fetch" :args (list "x")}) (perform {:op "fetch" :args (list "y")})))
+          (make-env))
+        (list "R:x" "R:y"))
+      (assert-io-count h "fetch" 2)
+      (assert-io-args h "fetch" 1 (list "y"))))
+  (deftest
+    "S10 probe: map over perform-suspending lambda keeps ALL elements"
+    (let
+      ((h (make-harness :platform {:fetch (fn (u) (str "R:" u))})))
+      (assert=
+        (harness-run-perform
+          h
+          (quote (map (fn (u) (perform {:op "fetch" :args (list u)})) (list "a" "b" "c")))
+          (make-env))
+        (list "R:a" "R:b" "R:c"))
+      (assert-io-count h "fetch" 3)))
+  (deftest
+    "unmocked op raises a clear error instead of hanging"
+    (let
+      ((h (make-harness :platform {})))
+      (let
+        ((r (try-call (fn () (harness-run-perform h (quote (perform {:op "no-such-op"})) (make-env))))))
+        (assert (not (get r "ok")) "expected an error for unmocked op")
+        (assert
+          (contains? (get r "error") "no mock for op")
+          (get r "error"))))))
--- a/spec/tests/test-hyperscript-conformance.sx
+++ b/spec/tests/test-hyperscript-conformance.sx
@@ -180,11 +180,12 @@
  (deftest "converts-foo-bar" (for-each run-hs-fixture (list {:src "1 as String" :expected "1"}))))

 ;; ── blockLiteral (4 fixtures) ──────────────────────────────
+(defsuite "hs-compat-blockLiteral"
 (deftest
  "can-map-an-array"
  (let
    ((r (eval-hs "['a', 'ab', 'abc'].map(\\ s -> s.length)")))
-    (assert= r (list 1 2 3) "map with block")))
+    (assert= r (list 1 2 3) "map with block"))))

 ;; ── boolean (2 fixtures) ──────────────────────────────
 (defsuite
@@ -325,9 +326,10 @@
    (for-each run-hs-fixture (list {:src "undefined does not exist" :expected true} {:src "null does not exist" :expected true}))))

 ;; ── cookies (9 fixtures) ──────────────────────────────
+(defsuite "hs-compat-cookies"
 (deftest
  "update-cookie-values-work"
-  (for-each run-hs-fixture (list {:src "cookies.foo" :locals {:cookies {:foo "doh"}} :expected "doh"})))
+  (for-each run-hs-fixture (list {:src "cookies.foo" :locals {:cookies {:foo "doh"}} :expected "doh"}))))

 ;; ── in (4 fixtures) ──────────────────────────────
 (defsuite
@@ -464,9 +466,10 @@
      (assert= (len r) 0 "empty query result"))))

 ;; ── some (6 fixtures) ──────────────────────────────
+(defsuite "hs-compat-some"
 (deftest
  "some-returns-true-for-nonempty-selector"
-  (for-each run-hs-fixture (list {:src "some [1]" :expected true})))
+  (for-each run-hs-fixture (list {:src "some [1]" :expected true}))))

 ;; ── stringPostfix (10 fixtures) ──────────────────────────────
 (defsuite
@@ -695,13 +698,14 @@
  (deftest "null-not-exist" (for-each run-hs-fixture (list {:src "null does not exist" :expected true})))
  (deftest "undef-not-exist" (for-each run-hs-fixture (list {:src "undefined does not exist" :locals {:undefined nil} :expected true}))))

+(defsuite "hs-compat-where"
 (deftest
  "where-with-property"
  (let
    ((items (list {:age 15 :name "Alice"} {:age 30 :name "Bob"})))
    (let
      ((r (eval-hs "items where its age > 20" {:locals {:items items}})))
-      (assert= (len r) 1 "one match"))))
+      (assert= (len r) 1 "one match")))))

 (defsuite
  "hs-0990-collection-ops"
--- a/spec/tests/test-import-bind.sx
+++ b/spec/tests/test-import-bind.sx
@@ -1,3 +1,5 @@
+(defsuite "import-bind"
+
 ;; Tests for define-library, import, and the import suspension mechanism

 ;; ============================================================
@@ -190,3 +192,5 @@
  (assert=
    (cek-try (fn () inner-secret) (fn (e) "not-found"))
    "not-found"))
+
+)
--- a/spec/tests/test-let-match.sx
+++ b/spec/tests/test-let-match.sx
@@ -1,3 +1,5 @@
+(defsuite "let-match"
+
 ;; Tests for let-match — CEK special form and bytecode compiler desugaring
 ;; let-match destructures a dict: (let-match {:key var} expr body...)

@@ -45,3 +47,5 @@
  (let-match {:a a :b b} (dict :a (+ 1 2) :b (* 3 4))
    (assert= a 3)
    (assert= b 12)))
+
+)
--- a/spec/tests/test-math.sx
+++ b/spec/tests/test-math.sx
@@ -1,7 +1,10 @@
+;; Math completeness tests. C9 note: this file used NESTED deftests with no
+;; defsuite, so every test reported with an empty suite label (" > sin").
+;; Restructured to defsuite/deftest — labels are now "math > sin" etc.

-(deftest
-  "math completeness"
-  (deftest
+(defsuite
+  "math"
+  (defsuite
    "trigonometry"
    (deftest
      "sin"
@@ -124,7 +127,10 @@
      true
      (< (abs (- (cos (acos 0.5)) 0.5)) 0.0001)
      "cos(acos(x)) = x")
-    (assert= true (< (abs (- (exp (log 2)) 2)) 0.0001) "exp(log(x)) = x")
+    (assert=
+      true
+      (< (abs (- (exp (log 2)) 2)) 0.0001)
+      "exp(log(x)) = x")
    (assert=
      (* 12 18)
      (* (gcd 12 18) (lcm 12 18))
--- a/spec/tests/test-ports.sx
+++ b/spec/tests/test-ports.sx
@@ -1,3 +1,5 @@
+(defsuite "ports"
+
 ;; Phase 14 — String ports + eof-object

 (deftest
@@ -230,3 +232,5 @@
          (c3 (read-char in))
          (when (not (eof-object? c3)) (write-char c3 out)))
        (assert= "abc" (get-output-string out) "roundtrip via ports")))))
+
+)
--- a/web/tests/test-adapter-dom-render.sx
+++ b/web/tests/test-adapter-dom-render.sx
@@ -0,0 +1,93 @@
+;; ==========================================================================
+;; test-adapter-dom-render.sx — C23 (W14): actual render-OUTPUT tests for
+;; the DOM adapter. The pre-existing test-adapter-dom.sx asserts membership
+;; predicates only ("if is a render form") — zero tests inspected what
+;; render-to-dom actually builds, leaving the 1512-line adapter the
+;; thinnest-tested relative to size (hosts.md C23).
+;;
+;; Runs against the OCaml runner's mock DOM (host-* primitives) through the
+;; real (web adapter-dom) library, disk-resolved by import.
+;;
+;; Adapter output contract (probed 2026-07-04):
+;;   - text renders as a CHILD TEXT NODE (nodeType 3, textContent set);
+;;     the parent's own textContent prop stays "" in the mock
+;;   - control-flow forms (when/map/...) wrap their output in a FRAGMENT
+;;     child; `if` inlines the chosen branch directly
+;; ==========================================================================
+(import (web adapter-dom))
+
+(defsuite
+  "adapter-dom-render-output"
+  (deftest
+    "simple element: tag, text child node"
+    (let
+      ((el (render-to-dom (quote (div "hello")) (make-env))))
+      (assert= (dom-node-name el) "DIV")
+      (let
+        ((kids (dom-get-prop el "children")))
+        (assert= (len kids) 1)
+        (assert= (dom-get-prop (first kids) "nodeType") 3)
+        (assert= (dom-text-content (first kids)) "hello"))))
+  (deftest
+    "class and id land on the element"
+    (let
+      ((el (render-to-dom (quote (div :class "card" :id "main" "x")) (make-env))))
+      (assert= (dom-get-prop el "className") "card")
+      (assert
+        (or
+          (= (dom-get-prop el "id") "main")
+          (= (get (dom-get-prop el "attributes") "id") "main"))
+        "id not set as prop or attribute")))
+  (deftest
+    "nested children are appended in order"
+    (let
+      ((el (render-to-dom (quote (ul (li "a") (li "b") (li "c"))) (make-env))))
+      (let
+        ((kids (dom-get-prop el "children")))
+        (assert= (len kids) 3)
+        (assert= (dom-node-name (first kids)) "LI")
+        (assert=
+          (dom-text-content
+            (first (dom-get-prop (nth kids 2) "children")))
+          "c"))))
+  (deftest
+    "void element renders with no children"
+    (let
+      ((el (render-to-dom (quote (input :type "text")) (make-env))))
+      (assert= (dom-node-name el) "INPUT")
+      (assert= (len (dom-get-prop el "children")) 0)))
+  (deftest
+    "when false yields an empty fragment"
+    (let
+      ((el (render-to-dom (quote (div (when false (span "hidden")))) (make-env))))
+      (let
+        ((kids (dom-get-prop el "children")))
+        (assert= (len kids) 1)
+        (assert= (dom-node-name (first kids)) "FRAGMENT")
+        (assert= (len (dom-get-prop (first kids) "children")) 0))))
+  (deftest
+    "when true renders the branch inside its fragment"
+    (let
+      ((el (render-to-dom (quote (div (when true (span "shown")))) (make-env))))
+      (let
+        ((frag (first (dom-get-prop el "children"))))
+        (assert= (dom-node-name frag) "FRAGMENT")
+        (assert=
+          (dom-node-name (first (dom-get-prop frag "children")))
+          "SPAN"))))
+  (deftest
+    "map render form produces one child per element inside its fragment"
+    (let
+      ((el (render-to-dom (quote (ul (map (fn (x) (li x)) (list "1" "2" "3")))) (make-env))))
+      (let
+        ((frag (first (dom-get-prop el "children"))))
+        (assert= (dom-node-name frag) "FRAGMENT")
+        (let
+          ((items (dom-get-prop frag "children")))
+          (assert= (len items) 3)
+          (assert= (dom-node-name (first items)) "LI")))))
+  (deftest
+    "if render form inlines the chosen branch directly"
+    (let
+      ((el (render-to-dom (quote (div (if true (b "yes") (i "no")))) (make-env))))
+      (assert= (dom-node-name (first (dom-get-prop el "children"))) "B"))))