Merge loops/sx-ws-w14 into architecture: W14 test gate & conformance infra

17 test-only commits delivering the full W14 workstream (PLAN.md §W14 —
the enabler every other sx-review fix verifies against):

- spec/tests/test-gate-pins.sx: 7 pin suites (29 tests) for dc7aa709's
  landed fixes — K18, K20, K09/K11/K39, K49 (spec side), crit-2
  (non-vacuous via side-effect sentinel), plus C21/C22 harness pins
- 6 gate scripts, all bidirectional ledgers (a healed KNOWN entry also
  fails): test-protocol-gate (C1/C1b/S4 + C3-C7 quirk ledger + seeded
  fuzz-liveness, 11), test-env-parity (runner-only bindings, 7),
  test-harness-parity (mcp_tree vs sx_server, 12), test-wasm-corpus
  (shipped kernel: 80/83 files green, 5192 passes), test-suite-baseline
  (273-failure band pinned in spec/tests/known-failures.txt),
  test-differential (49 probes native vs WASM, 3 ledgered)
- spec/harness.sx: C22 fix (IO logged before the mock runs) + C21
  harness-run-perform (real CEK suspend/resume mode); W14-assigned per
  PLAN approach item 4 — see merge note in the briefing re: the forge
  briefing's stricter wording
- C9: empty suite labels eliminated across 6 test files
- web/tests/test-adapter-dom-render.sx: first render-output coverage of
  the DOM adapter (the browser-only exclusion was false)

Confirmed handoffs recorded in the briefing: bare-server apply does not
spread args (F-3, runner masks it); both runners' sha3-256 are fake
stubs (test CIDs != production CIDs); generated sx_render.ml is regen-
stale (misses dc7aa709's HTML_TAGS fix); canonical-serialize broken on
bare server for any number.

Verified post-merge in this checkout: gate pins 275/0, protocol-gate
11/0, env-parity 7/0, harness-parity 12/0, differential 49/0.

Briefing conflict (add/add) resolved: kept the loop's completed version
with a merge note preserving the forge briefing's context (8181421c
landed after the worktree branched).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
2026-07-04 08:14:48 +00:00
21 changed files with 2266 additions and 124 deletions

View File

@@ -1 +1 @@
{"sessionId":"c4d97db1-361c-4a04-a99b-c838f9385469","pid":2426590,"procStart":"349789073","acquiredAt":1780789990975}
{"sessionId":"d510140d-6197-40b7-9bd2-125ca304ad7a","pid":697492,"procStart":"125353752","acquiredAt":1783118024555}

View File

@@ -0,0 +1,68 @@
#!/usr/bin/env node
// eval_wasm_probes.js — W14/F8: evaluate a file of probe expressions (one
// per line, '#'-comments allowed) on the SHIPPED browser kernel and print
// PROBE <n> <result-or-ERROR>
// per line, for diffing against the native server (scripts/test-differential.sh).
// Boot stubs mirror test_wasm_native.js / run_wasm_corpus.js.
const fs = require('fs');
const path = require('path');
const PROJECT_ROOT = path.resolve(__dirname, '../../..');
const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm');
const probeFile = process.argv[2];
if (!probeFile) { console.error('usage: eval_wasm_probes.js <probes.txt>'); process.exit(2); }
global.window = global;
global.document = {
createElement: () => ({ style: {}, setAttribute() {}, appendChild() {}, children: [] }),
createDocumentFragment: () => ({ appendChild() {}, children: [], childNodes: [] }),
head: { appendChild() {} }, body: { appendChild() {} },
querySelector: () => null, querySelectorAll: () => [],
createTextNode: s => ({ textContent: s }), addEventListener() {},
createComment: s => ({ textContent: s || '' }),
getElementsByTagName: () => [],
};
global.localStorage = { getItem: () => null, setItem() {}, removeItem() {} };
global.CustomEvent = class { constructor(n, o) { this.type = n; this.detail = (o || {}).detail || {}; } };
global.MutationObserver = class { observe() {} disconnect() {} };
global.requestIdleCallback = fn => setTimeout(fn, 0);
global.matchMedia = () => ({ matches: false });
global.navigator = { serviceWorker: { register: () => Promise.resolve() } };
global.location = { href: '', pathname: '/', hostname: 'localhost' };
global.history = { pushState() {}, replaceState() {} };
global.fetch = () => Promise.resolve({ ok: true, text: () => Promise.resolve('') });
async function main() {
require(path.join(WASM_DIR, 'sx_browser.bc.wasm.js'));
const K = await new Promise((resolve, reject) => {
let tries = 0;
const poll = setInterval(() => {
if (globalThis.SxKernel) { clearInterval(poll); resolve(globalThis.SxKernel); }
else if (++tries > 200) { clearInterval(poll); reject(new Error('SxKernel not found')); }
}, 50);
});
const lines = fs.readFileSync(probeFile, 'utf8').split('\n');
let n = 0;
for (const raw of lines) {
const line = raw.trim();
if (!line || line.startsWith('#')) continue;
n++;
let out;
try {
// Serialize through the kernel's own printer so both hosts emit SX
// text (K.eval returns raw JS values otherwise — [object Object]).
const r = K.eval(`(sx-serialize ${line})`);
out = (typeof r === 'string') ? r : String(r);
} catch (e) {
out = 'ERROR';
}
// errors normalized: kernel returns "Error: ..." strings for eval errors
if (typeof out === 'string' && out.startsWith('Error')) out = 'ERROR';
console.log(`PROBE ${n} ${out.replace(/\n/g, '\\n')}`);
}
}
main().catch(e => { console.error('FATAL:', e.message); process.exit(1); });

View File

@@ -0,0 +1,170 @@
#!/usr/bin/env node
// run_wasm_corpus.js — W14/F2: run spec-test files through the SHIPPED
// browser kernel (sx_browser.bc.wasm.js), headless in Node.
//
// The review (conformance.md F-2) found no runner feeds spec/tests through
// the shipped browser artifact — F-1/F-3 host divergences existed
// undetected precisely because of that. This runs ONE test file per
// invocation (process isolation: a hanging file is killed by the driver's
// timeout without taking down the sweep) and prints a parseable summary:
// CORPUS-RESULT <file> pass=<n> fail=<n> status=ok|load-error
//
// Usage: node hosts/ocaml/browser/run_wasm_corpus.js spec/tests/test-eval.sx
// Driver: scripts/test-wasm-corpus.sh (sweeps the corpus, applies skips).
//
// Boot stubs and module preload mirror test_wasm_native.js (the blessed
// boot path for the shipped kernel).
const fs = require('fs');
const path = require('path');
const PROJECT_ROOT = path.resolve(__dirname, '../../..');
const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm');
const target = process.argv[2];
if (!target) { console.error('usage: run_wasm_corpus.js <test-file.sx>'); process.exit(2); }
// --- DOM stubs (as test_wasm_native.js) ---
function makeElement(tag) {
const el = {
tagName: tag, _attrs: {}, _children: [], style: {},
childNodes: [], children: [], textContent: '',
nodeType: 1,
setAttribute(k, v) { el._attrs[k] = String(v); },
getAttribute(k) { return el._attrs[k] || null; },
removeAttribute(k) { delete el._attrs[k]; },
appendChild(c) { el._children.push(c); el.childNodes.push(c); el.children.push(c); return c; },
insertBefore(c) { el._children.push(c); el.childNodes.push(c); el.children.push(c); return c; },
removeChild(c) { return c; },
replaceChild(n) { return n; },
cloneNode() { return makeElement(tag); },
addEventListener() {}, removeEventListener() {}, dispatchEvent() {},
get innerHTML() {
return el._children.map(c => {
if (c._isText) return c.textContent || '';
if (c._isComment) return '<!--' + (c.textContent || '') + '-->';
return c.outerHTML || '';
}).join('');
},
set innerHTML(v) { el._children = []; el.childNodes = []; el.children = []; },
get outerHTML() {
let s = '<' + tag;
for (const k of Object.keys(el._attrs).sort()) s += ` ${k}="${el._attrs[k]}"`;
s += '>';
if (['br', 'hr', 'img', 'input', 'meta', 'link'].includes(tag)) return s;
return s + el.innerHTML + '</' + tag + '>';
},
dataset: new Proxy({}, {
get(_, k) { return el._attrs['data-' + k.replace(/[A-Z]/g, c => '-' + c.toLowerCase())]; },
set(_, k, v) { el._attrs['data-' + k.replace(/[A-Z]/g, c => '-' + c.toLowerCase())] = v; return true; }
}),
querySelectorAll() { return []; },
querySelector() { return null; },
};
return el;
}
global.window = global;
global.document = {
createElement: makeElement,
createDocumentFragment() { return makeElement('fragment'); },
head: makeElement('head'), body: makeElement('body'),
querySelector() { return null; }, querySelectorAll() { return []; },
createTextNode(s) { return { _isText: true, textContent: String(s), nodeType: 3 }; },
addEventListener() {},
createComment(s) { return { _isComment: true, textContent: s || '', nodeType: 8 }; },
getElementsByTagName() { return []; },
};
global.localStorage = { getItem() { return null; }, setItem() {}, removeItem() {} };
global.CustomEvent = class { constructor(n, o) { this.type = n; this.detail = (o || {}).detail || {}; } };
global.MutationObserver = class { observe() {} disconnect() {} };
global.requestIdleCallback = fn => setTimeout(fn, 0);
global.matchMedia = () => ({ matches: false });
global.navigator = { serviceWorker: { register() { return Promise.resolve(); } } };
global.location = { href: '', pathname: '/', hostname: 'localhost' };
global.history = { pushState() {}, replaceState() {} };
global.fetch = () => Promise.resolve({ ok: true, text() { return Promise.resolve(''); } });
global.XMLHttpRequest = class { open() {} send() {} };
async function main() {
require(path.join(WASM_DIR, 'sx_browser.bc.wasm.js'));
const K = await new Promise((resolve, reject) => {
let tries = 0;
const poll = setInterval(() => {
if (globalThis.SxKernel) { clearInterval(poll); resolve(globalThis.SxKernel); }
else if (++tries > 200) { clearInterval(poll); reject(new Error('SxKernel not found after 10s')); }
}, 50);
});
// --- 8 FFI host primitives (as test_wasm_native.js) ---
K.registerNative('host-global', args => (args[0] in globalThis) ? globalThis[args[0]] : null);
K.registerNative('host-get', args => {
const [obj, prop] = args;
if (obj == null) return null;
const v = obj[prop];
return v === undefined ? null : v;
});
K.registerNative('host-set!', args => { if (args[0] != null) args[0][args[1]] = args[2]; return args[2]; });
K.registerNative('host-call', args => {
const [obj, method, ...rest] = args;
if (obj == null || typeof obj[method] !== 'function') return null;
const r = obj[method].apply(obj, rest);
return r === undefined ? null : r;
});
K.registerNative('host-new', args => new (Function.prototype.bind.apply(args[0], [null, ...args.slice(1)])));
K.registerNative('host-callback', args => function () { return K.callFn(args[0], Array.from(arguments)); });
K.registerNative('host-typeof', args => typeof args[0]);
K.registerNative('host-await', args => args[0]);
K.eval('(define SX_VERSION "wasm-corpus-1.0")');
K.eval('(define SX_ENGINE "ocaml-vm-wasm-corpus")');
K.eval('(define parse sx-parse)');
K.eval('(define serialize sx-serialize)');
// --- Web stack modules (source form; bytecode covered elsewhere) ---
const sxDir = path.join(WASM_DIR, 'sx');
const modules = [
'render', 'core-signals', 'signals', 'deps', 'router', 'page-helpers', 'freeze',
'bytecode', 'compiler', 'vm', 'dom', 'browser',
'adapter-html', 'adapter-sx', 'adapter-dom',
'boot-helpers', 'hypersx',
'harness', 'harness-reactive', 'harness-web',
'engine', 'orchestration', 'boot',
];
if (K.beginModuleLoad) K.beginModuleLoad();
for (const mod of modules) {
K.load(fs.readFileSync(path.join(sxDir, mod + '.sx'), 'utf8'));
}
if (K.endModuleLoad) K.endModuleLoad();
// --- Test framework hooks ---
let pass = 0, fail = 0;
const suiteStack = [];
K.registerNative('report-pass', () => { pass++; return null; });
K.registerNative('report-fail', args => {
fail++;
const suitePath = suiteStack.join(' > ');
console.error(`FAIL: ${suitePath ? suitePath + ' > ' : ''}${args[0]}\n ${args[1]}`);
return null;
});
K.registerNative('push-suite', args => { suiteStack.push(args[0]); return null; });
K.registerNative('pop-suite', () => { suiteStack.pop(); return null; });
K.eval('(define test-allowed? (fn (name) true))');
K.eval('(define try-call (fn (thunk) (let ((result (cek-try thunk (fn (err) err)))) (if (and (= (type-of result) "string") (starts-with? result "Error")) {"ok" false "error" result} {"ok" true "error" nil}))))');
K.load(fs.readFileSync(path.join(PROJECT_ROOT, 'spec/tests/test-framework.sx'), 'utf8'));
// --- Run the target file ---
const rel = path.relative(PROJECT_ROOT, path.resolve(target));
let status = 'ok';
try {
K.load(fs.readFileSync(path.resolve(target), 'utf8'));
} catch (e) {
status = 'load-error';
console.error(`LOAD-ERROR: ${rel}: ${e.message}`);
}
console.log(`CORPUS-RESULT ${rel} pass=${pass} fail=${fail} status=${status}`);
process.exit(status !== 'ok' || fail > 0 ? 1 : 0);
}
main().catch(e => { console.error('FATAL:', e.message); process.exit(1); });

View File

@@ -1,57 +1,348 @@
# sx-gate loop — W14 test gate (first live test of git→gitea→agentic→tmux)
<!-- MERGE NOTE 2026-07-04: architecture's 8181421c landed the original forge
briefing (agents/ws-W14, rose-ash/sx-review forge — the git→gitea→agentic launch
test) at this path AFTER the loops/sx-ws-w14 worktree branched, so the loop never
saw it and bootstrapped its own briefing from PLAN.md §W14. Same worklist, same
guardrails in substance; this file is the loop's version, which records COMPLETION
of every item. One scope divergence to note for review: the forge briefing said
"do NOT edit spec/*.sx"; the loop edited spec/harness.sx (C22 log-first fix + C21
perform-mode harness) on the grounds that PLAN.md §W14 approach item 4 explicitly
assigns those harness fixes to W14. Harness self-suite + all consumers green. -->
**Forge agent:** `agents/ws-W14` in the `rose-ash/sx-review` forge (git-sx/gitea-sx/agentic-sx).
**Goal (from the forge briefing):** make the verification infrastructure trustworthy — runner env
== production env, a WASM corpus runner, harness honesty, and pinning tests for the fixes already
landed. This is **W14** in `plans/sx-review/PLAN.md` (read that section — it lists the findings).
**Findings:** C0b C9 C21 C22 C23 C3 C4 C5 C6 C7 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 K19 K104.
# W14 — Test gate & conformance infrastructure loop
## Why this workstream first
The review's prime directive: no semantic fix should merge before its pinning test + a working
gate exist, because the verification infra currently can't tell you whether a fix works. W14
produces that infra. It changes **no language semantics**, so it cannot regress the 5762p/274f
baseline — the ideal first payload while we test the agentic launch technology.
Forge agent **ws-W14**. Role: build out **W14** from the SX review remediation plan
(`plans/sx-review/PLAN.md`, §"W14. Test gate & conformance infrastructure") —
*the enabler that makes every other fix verifiable*. One checklist item per fire.
## Hard guardrails (this is a monitored test loop)
- **Commit locally, do NOT push.** No `git push` at all. (This is a test; the maintainer reviews
before anything reaches origin.)
- **Stay in W14 scope** — tests, runners, harness, gate tooling. Do NOT edit `spec/*.sx`,
`hosts/ocaml/lib/*.ml`, or any language semantics. If a task tempts you toward semantics, skip it
and note why in the Progress log.
- **Never `pkill sx_server`** (shared binary). Bound every `sx_server`/build/test with `timeout`.
- You are on branch `loops/sx-gate` in worktree `/root/rose-ash-loops/sx-gate`. Build/test here only.
- If the OCaml build or full suite is involved, compare against the recorded baseline
**5762 passed / 274 failed** (fail set is the 273 hs-* + 1 r7rs radix; see PLAN W14/F10).
You are on branch `loops/sx-ws-w14`, worktree `/root/rose-ash-loops/sx-ws-w14`.
## One iteration per fire — pick the first unchecked `[ ]`, implement, test, commit (no push),
tick the box, prepend one dated line to the Progress log, then stop.
## Hard guardrails (read every fire)
- [ ] **Pin the dc7aa709 quick-wins batch.** Add regression tests (spec/tests/ or a new suite) that
lock in the fixes that currently have none: K09 `unquote-splicing` longhand splices; K11 guard
re-raise sentinel is unforgeable (`(guard (e (true (list 'quoted x))) ...)` returns the list);
K18 `(expt 2 100)` is a float not 0; K20 `(contains? {:a 1} :a)` is true; K39 `(do ((fn (x) x) 5) 99)`
→ 99; K49 the five void elements render. (K02 is already non-vacuously covered.) Confirm they pass
on the current binary.
- [ ] **Pin C1/C1b/S4 at the host level** (a small OCaml or shell test): a malformed command line
returns an error response and the process survives; an error page is not cached.
- [ ] **WASM corpus runner (F2).** Stand up a Node harness that runs a curated spec/tests subset
against the shipped WASM kernel (seed: the conformance lane's `run_wasm.js` pattern, referenced in
PLAN). Curated subset, not the full 6k (js_of_ocaml is ~24s/test — see F18). Wire it as a script.
- [ ] **Harness honesty (C22/K104):** make `spec/harness.sx` log the IO call *before* invoking the
mock so a throwing mock is recorded. Add a test that a throwing mock leaves a log entry.
- [ ] **Runner-vs-prod env audit (F7/K42):** list every binding that exists only in `run_tests.ml`
but not the production kernel env (`values`/`call-with-values` are the known ones). Write the audit
to `plans/sx-review/runner-env-gap.md`. (Fixing them is later; the audit is the W14 task.)
- [ ] **Protocol fuzz suite (C3/C4/C5/C6):** a bounded test that feeds the epoch loop malformed
lines (`(epoch)`, `(epoch foo)`, stray `(io-response …)`, two-exprs-per-line) and asserts the
process never dies and responses stay correctly tagged.
- [ ] **hs-upstream skip-list (F10/F18):** make the native runner's 272 hs-* failures a skip-list so
a red FAIL column means something. Record the count moved.
- **TEST-ONLY.** No semantics edits. Do NOT touch `spec/evaluator.sx`,
`spec/primitives.sx`, `spec/parser.sx`, `spec/render.sx`, the OCaml kernel,
or any host runtime. W14 pins behavior with tests and productionizes the
*test/runner* surface; the actual fixes are other workstreams (W1W12).
A pin that *fails* means the finding regressed — do NOT relax the assertion,
record it as a blocker.
- **NO PUSH.** Commit locally on `loops/sx-ws-w14` only. Never push; never touch
`main` or `architecture`.
- **`.sx` files: use `sx-tree` MCP tools only** (a hook blocks Read/Write/Edit
on `.sx`). `sx_write_file` takes params **`file`** and **`source`** (NOT
`content` — a wrong key yields a `yojson … got null` error and no write).
`.md`/`.sh`/`.ml` files: normal tools are fine.
- **Never `pkill`/`kill` `sx_server`** — sibling loops share the binary. Bound
every run with `timeout` (e.g. `timeout 300 …`); if it hangs, let the timeout end it.
- **One item per fire, then stop.** No batching.
## Per-iteration procedure
1. Pick the first unchecked `[ ]` in the checklist.
2. Implement (test file or runner/harness change), lifting minimal repros from
the review lane files (`plans/sx-review/{core,hosts,conformance}.md`) — they
are a ready-made corpus of confirmed reprs.
3. Build + run the affected tests:
`sx_build` (target ocaml) then
`timeout 300 ./hosts/ocaml/_build/default/bin/run_tests.exe <test-name>`
to run a single file. New `spec/tests/test-*.sx` files are auto-discovered.
4. Confirm green (a pin must PASS on current HEAD — the fix already landed).
5. Commit locally: `git add -A && git commit` with a `W14:` prefix.
6. Tick the box, prepend one dated line to the Progress log, stop.
## Checklist
### A. Test-debt pins — dc7aa709's landed fixes shipped without regression tests
Pin each confirmed-and-fixed finding with a minimal repro. Add suites to
`spec/tests/test-gate-pins.sx` (one `defsuite` per finding).
- [x] K18 [W7] — `expt` overflow now float-promotes (no 63-bit wrap)
- [x] K20 [W7] — `contains?` now supports dict key membership
- [x] K09/K11/K39 [W5] — longhand `unquote-splicing`, guard sentinel gensym, `do` IIFE-head
- [x] K49 [W8] — five void elements (area/base/embed/param/track) renderable
(spec side; native regen drift → see Blocked). NB: the depth/cycle guard
is K16 [W8], still OPEN — not a W14 pin target until its fix lands
- [x] crit-2 [W1] — signal-return kont pinned NON-VACUOUSLY (side-effect
sentinel across two tests; a plain assert would inherit the vacuity)
- [x] C1/C1b [W3] — command-channel crash guards pinned
(`scripts/test-protocol-gate.sh`, seed for section E's fuzz suite)
- [x] S4 [hosts] — soft error pages not cached (HTTP-mode pin in
`scripts/test-protocol-gate.sh`; NB S4 lives in hosts.md, not
conformance — "housekeeping" was a mislabel from F-15's tag)
### B. Runner/production env unification
- [x] Audit runner-only bindings — inventory + bidirectional ledger in
`scripts/test-env-parity.sh` (KNOWN_DRIFT: values, call-with-values,
contains-char?, trim-right, sha3-256; consequence pin:
canonical-serialize broken on server; BOTH runners' sha3-256 are FAKE
stubs → test CIDs ≠ production CIDs)
### C. Harness honesty
- [x] K19 — harness/runtime parity pinned (`scripts/test-harness-parity.sh`:
drives mcp_tree sx_eval over JSON-RPC vs fresh sx_server over epoch,
12-probe battery from the finding, errors compared by message)
- [x] C22/K104 — FIXED harness (spec/harness.sx make-interceptor: log entry
appended before the mock runs, :result updated via dict-set!) + 3 pins
- [x] C21 — BUILT `harness-run-perform` (spec/harness.sx): drives real CEK
suspend/resume, services performs from session mocks, C22-style
logging; 5 pins incl. the S10 map-over-perform probe (CEK keeps all
elements — the drop class is serving-JIT-side). Runner-only (needs
cek-* driver bindings)
- [x] C23 — adapter-dom render-output tests
(`web/tests/test-adapter-dom-render.sx`, 8 tests vs runner mock DOM;
follow-up depth still open: boolean attrs, on-*/bind/ref/key,
reactive attrs, hydration cursor)
### D. WASM corpus runner
- [x] F2 — BUILT `hosts/ocaml/browser/run_wasm_corpus.js` (one file per
node process, shipped-kernel boot per test_wasm_native.js) +
`scripts/test-wasm-corpus.sh` sweep driver with SKIP/KNOWN_FAIL
ledger. Baseline: 83 files, 80 fully green, 5192 passes, 0 test
failures; 3 partial load-errors (hash-table/r7rs/sets, opaque jsoo
exception mid-file). Full sweep ~13 min — wiring into
sx-build-all.sh left as maintainer call (gate definition D3)
### E. Epoch-loop protocol fuzz + skip-list
- [x] C3/C4/C5/C6/C7 — protocol-quirk ledger (pins current behavior,
bidirectional) + seeded 60-line fuzz-liveness property in
`scripts/test-protocol-gate.sh` (11/11)
- [x] F10 — expected-failures BASELINE GATE instead of a skip-list
(`scripts/test-suite-baseline.sh` + `spec/tests/known-failures.txt`,
273 pinned: 271 hs-* + 2 empty-suite-label entries → C9 evidence).
New failure OR vanished failure = red; hs loops' scoreboards untouched
- [x] C9 — empty suite labels ELIMINATED: 6 files had suite-less top-level
deftests (chars 43, import-bind 14, ports 12, let-match 8, math
nested-deftests, 4 hs strays) — wrapped/restructured into defsuites;
baseline identities updated in the same commit
### F. Differential battery
- [x] F8 — cross-host differential battery: `spec/tests/differential-probes.txt`
(49 probes) × native server vs shipped WASM kernel via
`scripts/test-differential.sh` + `eval_wasm_probes.js`. 46 agree,
3 ledgered KNOWN_DIVERGENT (F-3: bare-server `apply` does not spread —
runner masks it, F-7 class). Refinement: the F-1 float-display
divergence is a K.eval JS-boundary artifact — guest `sx-serialize`
output AGREES across hosts
**CHECKLIST COMPLETE 2026-07-04** — all W14 items delivered. Open handoffs:
sx_render.ml regen drift (Blocked, hosts lane), adapter-dom depth tests,
3 WASM load-error bisects (hash-table/r7rs/sets), CI wiring of the four
gate scripts (D3 maintainer decision).
## Progress log (newest first)
<!-- prepend: `- YYYY-MM-DD <what landed, test result, commit sha>` -->
- (none yet — first fire will add the first entry)
## Recording back to the forge
After each commit, note the sha here; the maintainer (or a later step) records it as a
`test`-kind commit on `agents/ws-W14` in the forge so the program stays the system of record.
- 2026-07-04 — **F8 differential battery — CHECKLIST COMPLETE**. Committed
replacement for the review's ephemeral 130-probe corpus:
`spec/tests/differential-probes.txt` (49 probes across F-1 int/float
display, K18 overflow, F-3 apply + dict order, S-4 float printing,
strings/collections/special forms/error cases) evaluated on the native
server (epoch protocol) and the shipped WASM kernel
(`eval_wasm_probes.js`, guest `sx-serialize`), diffed by
`scripts/test-differential.sh` with a KNOWN_DIVERGENT ledger (heal →
red → delete entry). Result: 46/49 agree; 3 divergences, all one root
cause — **bare sx_server's `apply` does not spread its arg list**
((apply + (list 1 2 3)) → "Expected number, got list"; WASM spreads
correctly; the test runner masks it with its own apply — F-7 class).
Finding refinement: F-1's float-display divergence (0.3 vs 0.3000…4) is
purely a K.eval JS-boundary artifact — guest-serialized output agrees.
W14 delivered: 7 pin suites (spec/tests/test-gate-pins.sx, 29 tests),
4 gate scripts (protocol-gate 11, env-parity 7, harness-parity 12,
wasm-corpus 83-file, suite-baseline 273-pin, differential 49-probe),
2 harness capabilities (C22 log-first, C21 perform-mode), C9 label
cleanup, adapter-dom render coverage. Test-only throughout.
- 2026-07-04 — **C9 empty-suite labels (item E.3) — section E COMPLETE**.
The sweep found the defect much wider than the finding: SIX files carried
suite-less top-level deftests (test-chars 43, test-import-bind 14,
test-ports 12, test-let-match 8, test-math as deftest-nested-in-deftest,
test-hyperscript-conformance 4 strays between suites). Fixes: file-level
defsuite wraps (validated via sx_validate after mechanical wrap),
test-math restructured deftest→defsuite (labels now "math > sin"),
hs strays wrapped in section-comment-named suites (hs-compat-
blockLiteral/cookies/some/where). The two baseline-visible identities
renamed in known-failures.txt in the SAME commit. Full-gate validated
GREEN (5798p/273f — 2 passes are the wrapper deftests that no longer
self-report; fail set byte-identical). Test-only.
- 2026-07-04 — **F10 baseline gate (item E.2)**. Deliberately NOT a
skip-list: skip-listing the hs red band in the runner would rewrite the
hs loops' scoreboards mid-flight. Instead
`scripts/test-suite-baseline.sh` diffs the full suite's FAIL set against
checked-in `spec/tests/known-failures.txt` (273 entries: 271 hs-* + 2
with EMPTY suite labels — live C9 evidence, `can-map-an-array` "map with
block" and `string->number` 2-arg, the "r7rs radix shadow"). Red on a
NEW failure (regression) and red on a VANISHED failure (fix landed —
delete from baseline, locking in the win). Identity = "suite > name"
with error text stripped (messages churn). Current suite: 5800p/273f
(up 38 passes from dc7aa709's 5762 — sections AD added pins). Validated
end-to-end: GREEN, exit 0, ~12 min runtime. Test-only.
- 2026-07-04 — **C3C7 protocol fuzz suite (item E.1)**. All five findings
are still OPEN server-side (sx_server.ml fixes are host-runtime work),
so the suite pins CURRENT behavior as a bidirectional ledger — verified
each live first: C3 stray io-response → extra Unknown-command reply
(dead 13-vs-14-char guard); C4 malformed (epoch) → error reply + stale
epoch tag (envelope changed since the finding: dc7aa709's guard now
answers rather than ignores); C5 decreasing epoch accepted; C6 two
commands one line → one error, neither runs; C7 vm-trace sans compiler →
opaque "Not callable: nil". Plus a real fuzz property: 60
deterministically-seeded hostile lines (unbalanced parens, control chars,
unicode, 2KB lines, stray io-responses, epoch mutations) then a
well-formed command — server must still answer and exit cleanly.
protocol-gate now 11/11. When a server fix lands, the matching ledger
pin fails loudly → update to assert the corrected behavior. Test-only.
- 2026-07-04 — **F2 WASM corpus runner (section D COMPLETE)**. The review's
headline conformance gap: no runner ever fed spec/tests through the
SHIPPED browser artifact (F-1/F-3 divergences existed undetected). Built
`run_wasm_corpus.js` (boots sx_browser.bc.wasm.js headless in Node with
the test_wasm_native.js stub block, loads the 23 web-stack modules,
registers framework hooks, runs ONE file per process → parseable
`CORPUS-RESULT` line; process isolation means a hung file can't kill the
sweep) + `scripts/test-wasm-corpus.sh` (sweep driver, SKIP/KNOWN_FAIL
ledger with green-flip detection). **Empirical baseline: 83 files, 80
fully green, 5192 passes, ZERO test failures on the shipped kernel** —
including test-gate-pins (29/29) and test-letrec-resume (the kernel
provides cek-* driver bindings, broader than bare sx_server). 3 partial
load-errors (test-hash-table 22p, test-r7rs 87p, test-sets 30p — opaque
jsoo exception mid-file, diagnosing which form = follow-up). Full sweep
~13 min; CI wiring deferred to the D3 gate-definition decision. Test-only.
- 2026-07-04 — **C23 adapter-dom render-output tests (item C.4) — section C
COMPLETE**. Key discovery: the "browser-only" exclusion of adapter-dom
testing is FALSE for render output — `(import (web adapter-dom))`
disk-resolves in the OCaml runner and `render-to-dom` works against its
mock DOM (dom-* → host-* → mock elements). New
`web/tests/test-adapter-dom-render.sx` (8 tests): tag/text-child-node,
class+id, ordered children, void element, when-false empty FRAGMENT,
when-true branch-in-fragment, map N-children-in-fragment, if inlines
branch. Probed the adapter's output contract first (text = nodeType-3
child; control flow = FRAGMENT wrapper; if inlines). Auto-included in
default runs (not on the exclusion list) — first render-output coverage
of the 1512-line adapter in the standard gate. Follow-up depth (boolean
attrs, on-*/bind/ref/key, reactive, hydration) noted on the checklist.
254/0 standalone. Test-only.
- 2026-07-04 — **C21 perform-mode harness (item C.3)**. Added
`harness-run-perform` to spec/harness.sx (exported): drives
`make-cek-state`/`cek-step-loop`, services each
`(perform {:op X :args L})` suspension from the session's platform mocks
(entry logged before invocation, C22-consistent), `cek-resume`s with the
mock value, loops to terminal. Self-recursion via the `(self self …)`
pattern (avoids letrec-injection K06 territory). Extracted the arity
dispatch into shared `harness-invoke-mock`. 5 pins in
`gate-C21-perform-mode-harness` — notably the **S10 probe**: `(map (fn (u)
(perform …)) '("a" "b" "c"))` keeps ALL elements through 3 suspensions on
the CEK path, confirming the element-drop class is serving-JIT-side, not
CEK. Caveat noted in the docstring: needs the runner's cek-* driver
bindings (absent on bare sx_server/MCP — the env-parity theme again).
290/0. Test-infra-only.
- 2026-07-04 — **C22/K104 throwing-mock fix + pins (item C.2)**. First
actual FIX of the loop — in scope because spec/harness.sx is W14-owned
test infrastructure (PLAN approach item 4 assigns "log IO before invoking
the mock" to W14). TDD: reproduced pre-fix (caught error, 0 log entries),
then restructured `make-interceptor` to append the entry BEFORE the mock
runs (`:result nil` while pending, `dict-set!` in place on return).
Verified: throwing mock leaves entry, happy path updates result, mixed
sequence counts all 3. Added suite `gate-C22-throwing-mock-logged`
(3 tests). Harness self-suite (15) + test-relate-picker (only other
harness consumer) green; 285/0 pins run. Tooling notes: replace/insert
tools take `new_source` (not `replacement`); find_all paths still
disagree with read_subtree/replace_node on define-library files —
sx_write_file remains the reliable route. Test-infra-only.
- 2026-07-04 — **K19 harness-parity pin (item C.1)**. Authored
`scripts/test-harness-parity.sh`: drives `mcp_tree.exe` `sx_eval` with
raw JSON-RPC over stdio and a fresh `sx_server.exe` over the epoch
protocol, running the finding's exact 12-probe battery (empty?/get/
split/equal?/contains?/keyword-name/char-code/parse-number) through both
and failing on ANY divergence. Errors normalized to their inner message
so identical failures compare equal (`keyword-name :kw` errors the same
way on both — keywords evaluate to strings before the call). Result:
12/12 parity — dc7aa709's 8-entry stopgap alignment holds; this pin keeps
it honest until the real fix (mcp_tree links sx_primitives) lands in the
hosts lane. Test-only.
- 2026-07-04 — **Section B: env-parity audit + ledger**. Probed a fresh
`sx_server` over the epoch protocol (`deps-check` + live eval). Confirmed
runner-only drift: `values`/`call-with-values` (run_tests.ml:1131/1140),
`contains-char?` (rt.ml:728 + rt.js:85), `trim-right` (**JS runner only**
— absent even from the OCaml runner), `sha3-256` (rt.ml:745 + rt.js:88).
Consequence verified live: `(canonical-serialize 42)` on the server →
`Undefined symbol: contains-char?` (content addressing broken for ANY
number outside the runners). **Worse than the finding**: BOTH runners'
`sha3-256` are FAKE stubs (OCaml uses `Hashtbl.hash`!) while production
has real `crypto-sha3-256` — every CID computed in tests differs from
production CIDs. Authored `scripts/test-env-parity.sh` as a bidirectional
ledger: MUST_HAVE regressions fail; a KNOWN_DRIFT binding *appearing*
also fails (forces ledger + consequence-pin update when W5/W7/W12 land
fixes). 7/7 green. Test-only.
- 2026-07-04 — **S4 error-page-cache pin (item A.7) — section A COMPLETE**.
Extended `scripts/test-protocol-gate.sh` with an HTTP-mode case: fresh
`sx_server.exe --http <random-port>` (timeout-bounded, own PID killed at
end), GET the same nonexistent path twice, assert BOTH requests re-render
(2 `[sx-http]` lines — pre-fix the 2nd was cache-served at 0.0005s) and
the `[cache] … error page, not cached` is_err gate line appears. Findings
from prototyping: standalone worktree renders ALL docs pages as soft error
pages (no content), so a positive "real page IS cached" control is not
assertable here — documented in the script; startup takes ~12-15s (poll
loop, 40s budget). 5/5 protocol-gate green + 267/0 sx pins. Test-only.
- 2026-07-04 — **C1/C1b command-channel pins (item A.6)**. These are
protocol-level, not .sx-suite pins: authored
`scripts/test-protocol-gate.sh` — each case spawns its OWN timeout-bounded
`sx_server.exe` (no shared process touched) and asserts three things: an
`(error N "Malformed command line: ...")` response is emitted, the
follow-up epoch still evaluates (process survived), and no `Fatal error`
escapes / exit is clean. Cases: C1 unterminated list (exact review repro),
C1 plain-garbage line, C1b non-ASCII byte (`café`), plus a well-formed
control session. 4/4 green. The script is deliberately structured to grow
into section E's fuzz suite (C3C7). Test-only.
- 2026-07-04 — **crit-2 non-vacuous pin (item A.5)**. The original bug's
signature — handler value becomes the WHOLE program result, discarding
every outer frame *including the covering test's own assert* — means a
plain `(assert= repro expected)` pin would pass vacuously on regression.
Added suite `gate-crit2-signal-return-kont` with a **side-effect sentinel**:
test 1 runs both repros (`("outer" 43 "end")` list shape + `raise-continuable`
→ 143) then `set!`s a top-level flag; test 2 independently asserts the flag
— if the continuation is ever dropped again, test 1 "passes" but test 2
fails loudly. Third test pins the exact shipped-test expr (51). Verified
both repro shapes live via sx_eval first. 267 passed / 0 failed. Test-only.
- 2026-07-03 — **K49 void-elements pin (item A.4) + regen-drift DISCOVERY**.
Corrected the checklist label first: K49 is "five void elements
unrenderable" (core.md:335), not the depth guard (that's K16, OPEN). Added
suite `gate-K49-void-elements-renderable` (3 tests): spec `HTML_TAGS`
contains all five; `(render-to-html '(base :href "x") (make-env))`
`<base href="x" />`; all five render self-closing. Runner-env gotchas:
`current-env`/`symbol` are not bound in run_tests — use `(make-env)` and
literal quoted forms. **Discovery:** the first draft pinned via the
runner's native `render-html` and FAILED — `hosts/ocaml/lib/sx_render.ml`
(generated) was never regenerated after dc7aa709's spec fix, so the native
render path still errors on the five tags. Recorded under Blocked; live
evidence for F13 (regen-diff gate). 264 passed / 0 failed. Test-only.
- 2026-07-03 — **K09/K11/K39 W5 special-form pins (item A.3)**. Three suites
added to `spec/tests/test-gate-pins.sx`: `gate-K09-longhand-unquote-splicing`
(R7RS longhand `(unquote-splicing X)` now splices, incl. empty-list case;
shorthand still works), `gate-K11-guard-reraise-forgeable` (a body/clause
value shaped like `(list '__guard-reraise__ X)` is returned as data, not
misread as a re-raise — sentinel is now gensym'd), `gate-K39-do-iife-head`
(`(do ((fn (x) x) 5) 99)` → 99, not a misparsed do-loop — exact core.md
repro). Gotchas hit and fixed: quasiquoted bare idents are *symbols* not
strings, and `assert=` compares with `=` (not `equal?`, which returns false
on these spliced lists). 261 passed / 0 failed under OCaml run_tests. Test-only.
- 2026-07-03 — **K20 contains?-dict pin (item A.2)**. Mapped K-codes by
core.md severity order (K17 append!, K18 expt, K19 harness-drift, K20
contains?-dict). Added suite `gate-K20-contains-dict` to
`spec/tests/test-gate-pins.sx` (4 tests): present dict key → true, missing
key → false, list membership unchanged, string substring unchanged. Repro
from core.md ("(contains? {:a 1} :a) threw `contains?: 2 args`"). 8/8 green
across both suites under OCaml run_tests. Test-only.
- 2026-07-03 — **K18 expt-overflow pin (item A.1)**. Bootstrapped this briefing
from PLAN.md §W14 (the referenced file did not exist yet). Added
`spec/tests/test-gate-pins.sx` with suite `gate-K18-expt-overflow` (4 tests):
small exponents stay exact (`2^0=1`, `2^10=1024`), `2^62 > 0` (no negative
63-bit wrap), `2^100 > 0` (no wrap-to-zero), `2^100` is a number (float
promotion). Verified 4/4 green under the OCaml run_tests kernel. Test-only.
## Blocked
- **K49 native path — sx_render.ml regen drift** (found 2026-07-03 while
pinning A.4): dc7aa709 fixed HTML_TAGS in `spec/render.sx` but never re-ran
`hosts/ocaml/bootstrap_render.py`, so the generated
`hosts/ocaml/lib/sx_render.ml` still carries a stale `html_tags_list`
without area/base/embed/param/track. The runner's native `render-html`
convenience (and any native fast-path render) therefore STILL throws
`Undefined symbol: base` — dc7aa709's "verified on the native binary" claim
did not cover this path. Fix = regen (hosts lane, semantics-adjacent — out
of scope for this test-only loop). This is a live instance of **F13**
(regen-diff CI gate, section-B/D territory): a regen-diff check would have
caught it at commit time. The K49 pin covers the spec side only; when the
regen lands, extend the suite with `render-html`-path assertions.

94
scripts/test-differential.sh Executable file
View File

@@ -0,0 +1,94 @@
#!/bin/bash
# test-differential.sh — W14/F8: cross-host differential battery.
#
# Evaluates every expression in spec/tests/differential-probes.txt on:
# A) the native server (sx_server.exe, epoch protocol) — its printer
# B) the SHIPPED browser kernel (eval_wasm_probes.js, guest sx-serialize)
# and diffs the outputs. The review's original 130-probe corpus was
# ephemeral (F-8); this is the committed replacement.
#
# KNOWN_DIVERGENT is the ledger of confirmed, still-open divergences —
# keyed by the probe EXPRESSION. Red on a NEW divergence (host drift) and
# red on a HEALED one (fix landed: delete the entry, locking in parity).
#
# Method note (finding refinement, 2026-07-04): comparing raw K.eval
# JS-boundary values shows float-display divergences (0.3 vs
# 0.30000000000000004) that DISAPPEAR under guest-level (sx-serialize …) —
# the F-1 float-display class is a JS-boundary artifact, not a kernel
# serialization divergence. This battery compares guest serialization.
set -uo pipefail
cd "$(dirname "$0")/.."
SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
WASM=shared/static/wasm/sx_browser.bc.wasm.js
PROBES=spec/tests/differential-probes.txt
[[ -x "$SERVER" ]] || { echo "SKIP: $SERVER not built" >&2; exit 2; }
[[ -f "$WASM" ]] || { echo "SKIP: $WASM missing" >&2; exit 2; }
# --- KNOWN_DIVERGENT ledger (verified live 2026-07-04) -------------------
# F-3/K53: bare sx_server's `apply` does NOT spread its argument list —
# (apply + (list 1 2 3)) errors "Expected number, got list"; (apply str l)
# returns the serialized list as one string. The WASM kernel spreads
# correctly. The test runner masks this with its own apply (F-7 class).
declare -A KNOWN_DIVERGENT
KNOWN_DIVERGENT['(apply + (list 1 2 3))']="F-3: native apply does not spread"
KNOWN_DIVERGENT['(apply max (list 1 5 2))']="F-3: native apply does not spread"
KNOWN_DIVERGENT['(apply str (list "a" "b"))']="F-3: native apply does not spread"
native=$(mktemp); wasm=$(mktemp)
python3 - "$SERVER" "$PROBES" > "$native" <<'PY'
import json, subprocess, sys
server, probefile = sys.argv[1], sys.argv[2]
probes = [l.strip() for l in open(probefile) if l.strip() and not l.startswith('#')]
inp = []
for i, p in enumerate(probes):
inp.append(f"(epoch {i+1})")
inp.append(f"(eval {json.dumps(p)})")
out = subprocess.run(["timeout", "120", server], input="\n".join(inp) + "\n",
capture_output=True, text=True).stdout
res, cur = {}, None
for l in out.splitlines():
if l.startswith("(ok-len "):
cur = int(l.split()[1]); res[cur] = None
elif l.startswith("(error "):
idx = int(l.split()[1]); res[idx] = "ERROR"; cur = None
elif cur is not None and res.get(cur) is None:
res[cur] = l; cur = None
for i, p in enumerate(probes):
print(f"PROBE {i+1} {res.get(i+1, '<none>')}")
PY
timeout 300 node hosts/ocaml/browser/eval_wasm_probes.js "$PROBES" > "$wasm" 2>/dev/null
pass=0; fail=0; i=0
while IFS= read -r expr; do
[[ -z "$expr" || "$expr" == \#* ]] && continue
i=$((i+1))
a=$(sed -n "s/^PROBE $i //p" "$native")
b=$(sed -n "s/^PROBE $i //p" "$wasm")
known="${KNOWN_DIVERGENT[$expr]:-}"
if [[ "$a" == "$b" ]]; then
if [[ -n "$known" ]]; then
echo "RED: $expr — KNOWN_DIVERGENT now AGREES ($known); delete from ledger"
fail=$((fail+1))
else
pass=$((pass+1))
fi
else
if [[ -n "$known" ]]; then
echo "KNOWN-DIVERGENT: $expr ($known)"
pass=$((pass+1))
else
echo "RED: $expr"
echo " native: $a"
echo " wasm: $b"
fail=$((fail+1))
fi
fi
done < <(grep -v '^\s*#' "$PROBES" | grep -v '^\s*$')
rm -f "$native" "$wasm"
echo
echo "differential: $i probes, $pass in agreement/ledgered, $fail red"
[[ $fail -eq 0 ]]

100
scripts/test-env-parity.sh Executable file
View File

@@ -0,0 +1,100 @@
#!/bin/bash
# test-env-parity.sh — W14 section-B ledger: runner env vs production env.
#
# The review (F7, K42, JS5, core.md "canonical.sx depends on test-runner-only
# helpers") found bindings that exist ONLY in the test runners, so suites
# pass against an environment production never provides. Rule (PLAN.md W14):
# "if the spec needs it, it's a kernel primitive; if not, the test can't
# have it."
#
# This script is a LEDGER, not a wish: it asserts today's confirmed drift
# stays exactly as recorded. Both directions fail loudly:
# - a MUST_HAVE going missing on the server -> regression, fix the kernel
# - a KNOWN_DRIFT binding appearing on the server -> the fix landed;
# move it to MUST_HAVE and update the consequence pins below.
#
# Confirmed inventory (2026-07-04, all verified live over the epoch protocol):
#
# binding OCaml runner JS runner fresh sx_server
# values real (rt.ml:1131) ? ABSENT
# call-with-values real (rt.ml:1140) ? ABSENT
# contains-char? real (rt.ml:728) real (:85) ABSENT
# trim-right ABSENT real (:87) ABSENT
# sha3-256 FAKE Hashtbl.hash FAKE stub ABSENT (real = crypto-sha3-256)
#
# Consequences (pinned in section 3):
# - (canonical-serialize 42) on a fresh server errors "Undefined symbol:
# contains-char?" -> content addressing broken for ANY number outside
# the test runners.
# - every CID computed inside run_tests uses a FAKE hash, so test CIDs
# never equal production CIDs (crypto-sha3-256 is real SHA3).
#
# Each probe spawns its OWN timeout-bounded sx_server.exe. No shared process.
set -uo pipefail
cd "$(dirname "$0")/.."
SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
if [[ ! -x "$SERVER" ]]; then
echo "SKIP: $SERVER not built (run sx_build target=ocaml first)" >&2
exit 2
fi
pass=0
fail=0
# deps_unresolved EXPR -> prints the (unresolved ...) list for EXPR on a fresh server
deps_unresolved() {
printf '(epoch 1)\n(deps-check "%s")\n' "$1" \
| timeout 60 "$SERVER" 2>/dev/null \
| grep -o ':unresolved ([^)]*)' || true
}
# --- Section 1: MUST_HAVE — spec-needed bindings production must provide ---
MUST_HAVE_EXPR='(list (equal? 1 1) (apply + (list 1 2)) (contains? {:a 1} :a) (crypto-sha3-256 \"x\") (split \"a-b\" \"-\"))'
unres=$(deps_unresolved "$MUST_HAVE_EXPR")
if [[ -z "$unres" || "$unres" == ':unresolved ()' ]]; then
echo "PASS: MUST_HAVE core bindings all resolve on fresh sx_server"
pass=$((pass+1))
else
echo "FAIL: MUST_HAVE binding missing on fresh sx_server: $unres"
fail=$((fail+1))
fi
# --- Section 2: KNOWN_DRIFT — runner-only bindings, asserted ABSENT -------
# If one of these starts resolving, its kernel fix landed: move it to
# MUST_HAVE above and update the consequence pin in section 3.
for name in values call-with-values contains-char? trim-right sha3-256; do
unres=$(deps_unresolved "($name)")
if grep -q -- "$name" <<<"$unres"; then
echo "PASS: KNOWN_DRIFT '$name' still absent on fresh sx_server (ledger accurate)"
pass=$((pass+1))
else
echo "FAIL: KNOWN_DRIFT '$name' now RESOLVES on fresh sx_server — fix landed?"
echo " Update this ledger: move '$name' to MUST_HAVE and revisit section 3."
fail=$((fail+1))
fi
done
# --- Section 3: consequence pin — canonical.sx on the production server ---
# Current reality: canonical-serialize of ANY number errors on a fresh
# server because canonical-number calls runner-only contains-char?.
out=$(printf '(epoch 1)\n(load "spec/canonical.sx")\n(epoch 2)\n(eval "(canonical-serialize 42)")\n' \
| timeout 60 "$SERVER" 2>&1)
if grep -q 'error 2 .*contains-char?' <<<"$out"; then
echo "PASS: consequence pin — canonical-serialize on numbers still broken on server (as recorded)"
pass=$((pass+1))
elif grep -q '^(ok 2 ' <<<"$out"; then
echo "FAIL: consequence pin — canonical-serialize 42 now WORKS on the server."
echo " The canonical-helpers fix landed: flip this pin to assert success"
echo " and pin the exact canonical form + CID stability."
fail=$((fail+1))
else
echo "FAIL: consequence pin — unexpected server output:"
sed 's/^/ /' <<<"$out"
fail=$((fail+1))
fi
echo
echo "env-parity: $pass passed, $fail failed"
[[ $fail -eq 0 ]]

107
scripts/test-harness-parity.sh Executable file
View File

@@ -0,0 +1,107 @@
#!/bin/bash
# test-harness-parity.sh — W14 section-C pin for K19 (harness honesty).
#
# K19 (review, core.md): the MCP tree server (mcp_tree.ml) carries a
# PARALLEL primitive table, and it drifted from the real runtime
# (sx_primitives.ml) — e.g. (get {:a 1} :a 99) returned nil in the harness
# but 1 in production, (split "a--b" "--") was char-class vs substring.
# CLAUDE.md mandates harness verification, so drift silently produces
# false findings/passes. dc7aa709 aligned 8 entries as a stopgap; the real
# fix (mcp_tree links sx_primitives directly) is hosts-lane work.
#
# This pin runs the finding's exact probe battery through BOTH environments
# — mcp_tree.exe sx_eval (JSON-RPC over stdio) and a fresh sx_server.exe
# (epoch protocol) — and fails on ANY divergence. Errors are compared by
# message, values by serialized form. Both subprocesses are fresh and
# timeout-bounded; no shared process is touched.
#
# Exit: 0 = full parity; 1 = drift (harness lies about the runtime again).
set -uo pipefail
cd "$(dirname "$0")/.."
MCP=hosts/ocaml/_build/default/bin/mcp_tree.exe
SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
for bin in "$MCP" "$SERVER"; do
if [[ ! -x "$bin" ]]; then
echo "SKIP: $bin not built (run sx_build target=ocaml first)" >&2
exit 2
fi
done
python3 - "$MCP" "$SERVER" <<'PYEOF'
import json, re, subprocess, sys
MCP, SERVER = sys.argv[1], sys.argv[2]
# K19 probe battery — the finding's confirmed drift cases + stopgap entries.
PROBES = [
'(empty? "")', '(empty? {})',
'(get {:a 1} :a 99)', '(get {:a 1} :zz 99)', '(get (list 10 20) 1)',
'(split "a--b" "--")', '(split "abc" "")',
'(equal? (list 1 2) (list 1 2))',
'(contains? {:a 1} :a)', '(keyword-name :kw)',
'(char-code "A")', '(parse-number "42")',
]
def norm_error(msg):
"""Extract the quoted inner error message so harness/server error
envelopes compare equal when the underlying failure is the same."""
m = re.search(r'Unhandled exception: \\?"(.*?)\\?"', msg)
if m:
return "<ERROR> " + m.group(1)
return "<ERROR> " + msg.strip()[:80]
# --- harness side: mcp_tree sx_eval over JSON-RPC ---
lines = [
json.dumps({"jsonrpc": "2.0", "id": 1, "method": "initialize",
"params": {"protocolVersion": "2024-11-05", "capabilities": {},
"clientInfo": {"name": "parity", "version": "0"}}}),
json.dumps({"jsonrpc": "2.0", "method": "notifications/initialized"}),
]
for i, p in enumerate(PROBES):
lines.append(json.dumps({"jsonrpc": "2.0", "id": 100 + i, "method": "tools/call",
"params": {"name": "sx_eval", "arguments": {"expr": p}}}))
out = subprocess.run(["timeout", "60", MCP], input="\n".join(lines) + "\n",
capture_output=True, text=True).stdout
harness = {}
for l in out.splitlines():
try:
j = json.loads(l)
except ValueError:
continue
if isinstance(j.get("id"), int) and j["id"] >= 100:
txt = j.get("result", {}).get("content", [{}])[0].get("text", "<none>").strip()
if txt.startswith("Error:") or j.get("result", {}).get("isError"):
txt = norm_error(txt)
harness[j["id"] - 100] = txt
# --- server side: fresh sx_server over the epoch protocol ---
inp = []
for i, p in enumerate(PROBES):
inp.append(f"(epoch {i + 1})")
inp.append(f"(eval {json.dumps(p)})")
sout = subprocess.run(["timeout", "60", SERVER], input="\n".join(inp) + "\n",
capture_output=True, text=True).stdout
server, cur = {}, None
for l in sout.splitlines():
if l.startswith("(ok-len "):
cur = int(l.split()[1]); server[cur - 1] = None
elif l.startswith("(error "):
idx = int(l.split()[1]); server[idx - 1] = norm_error(l); cur = None
elif cur is not None and server.get(cur - 1) is None:
server[cur - 1] = l.strip(); cur = None
fails = 0
for i, p in enumerate(PROBES):
h = harness.get(i, "<missing>")
s = server.get(i, "<missing>")
if h == s:
print(f"PASS: {p:40s} both -> {h!r}")
else:
print(f"FAIL: {p:40s} harness={h!r} server={s!r}")
fails += 1
print()
print(f"harness-parity: {len(PROBES) - fails} passed, {fails} failed")
sys.exit(1 if fails else 0)
PYEOF

233
scripts/test-protocol-gate.sh Executable file
View File

@@ -0,0 +1,233 @@
#!/bin/bash
# test-protocol-gate.sh — W14 pins for the epoch/command-channel protocol.
#
# Pins C1/C1b (review, plans/sx-review/hosts.md): a malformed or non-ASCII
# line on the top-level command channel used to raise an uncaught
# Sx_types.Parse_error and KILL the whole sx_server process (the shared
# channel used by bridges and conformance runners). Fixed in dc7aa709:
# the server now answers `(error N "Malformed command line: ...")` and
# keeps serving.
#
# Each case spawns its OWN timeout-bounded sx_server.exe subprocess —
# no shared/sibling process is ever touched. Designed to grow into the
# W14 section-E protocol fuzz suite (C3-C7).
#
# Usage: bash scripts/test-protocol-gate.sh
# Exit: 0 = all pins green; 1 = a pin failed (fix regressed).
set -uo pipefail
cd "$(dirname "$0")/.."
SERVER=hosts/ocaml/_build/default/bin/sx_server.exe
if [[ ! -x "$SERVER" ]]; then
echo "SKIP: $SERVER not built (run sx_build target=ocaml first)" >&2
exit 2
fi
pass=0
fail=0
# run_case NAME INPUT EXPECT_SENTINEL
# Feeds INPUT to a fresh server. Asserts:
# 1. an (error ... "Malformed command line: ...") response is emitted
# 2. the follow-up epoch still evaluates (EXPECT_SENTINEL in output)
# 3. the process exits cleanly (no Fatal error, exit 0 on stdin EOF)
run_case() {
local name="$1" input="$2" sentinel="$3"
local out rc
out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1)
rc=$?
local ok=1
if ! grep -q 'Malformed command line' <<<"$out"; then
echo "FAIL: $name — no malformed-line error response"; ok=0
fi
if ! grep -q "^${sentinel}\$" <<<"$out"; then
echo "FAIL: $name — follow-up epoch did not run (process died?)"; ok=0
fi
if grep -q 'Fatal error' <<<"$out"; then
echo "FAIL: $name — Fatal error escaped to the top level"; ok=0
fi
if [[ $rc -ne 0 ]]; then
echo "FAIL: $name — nonzero exit ($rc)"; ok=0
fi
if [[ $ok -eq 1 ]]; then
echo "PASS: $name"
pass=$((pass+1))
else
echo " --- output ---"; sed 's/^/ /' <<<"$out"; echo " --------------"
fail=$((fail+1))
fi
}
# C1: unterminated list on the command channel (exact review repro)
run_case "C1 unterminated list survives" \
'(epoch 2)\n(eval "(+ 1 2"\n(epoch 3)\n(eval "99")\n' \
'99'
# C1: plain-garbage line (second C1 repro shape)
run_case "C1 garbage line survives" \
'(epoch 1)\nnot an s-expr ]]] {{{\n(epoch 2)\n(eval "42")\n' \
'42'
# C1b: non-ASCII byte on the command channel (exact review repro; \xc3\xa9 = é)
run_case "C1b non-ASCII line survives" \
'(epoch 1)\n(eval (quote caf\xc3\xa9))\n(epoch 2)\n(eval "99")\n' \
'99'
# Control: a well-formed session still works end to end
ctrl=$(printf '(epoch 1)\n(eval "(+ 40 2)")\n' | timeout 60 "$SERVER" 2>&1)
if grep -q '^42$' <<<"$ctrl"; then
echo "PASS: control well-formed session"
pass=$((pass+1))
else
echo "FAIL: control well-formed session"; sed 's/^/ /' <<<"$ctrl"
fail=$((fail+1))
fi
# ---------------------------------------------------------------------------
# C3C7 protocol-quirk LEDGER (hosts.md, all OPEN server-side). These pin
# CURRENT behavior, verified live 2026-07-04 — they are documentation, not
# endorsement. When a server fix lands and a pin fails, update the ledger
# to assert the corrected behavior (bidirectional, like test-env-parity.sh).
# ---------------------------------------------------------------------------
# ledger_case NAME INPUT GREP_MUST GREP_MUST2
ledger_case() {
local name="$1" input="$2" must="$3" must2="${4:-}"
local out
out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1)
local ok=1
grep -q -- "$must" <<<"$out" || { echo "FAIL: $name — expected: $must"; ok=0; }
if [[ -n "$must2" ]]; then
grep -q -- "$must2" <<<"$out" || { echo "FAIL: $name — expected: $must2"; ok=0; }
fi
if grep -q 'Fatal error' <<<"$out"; then
echo "FAIL: $name — process died"; ok=0
fi
if [[ $ok -eq 1 ]]; then echo "PASS: $name"; pass=$((pass+1));
else echo " --- output ---"; sed 's/^/ /' <<<"$out"; fail=$((fail+1)); fi
}
# C3: stray (io-response ...) is answered as Unknown command (dead guard) —
# an EXTRA response the client didn't ask for; process keeps serving.
ledger_case "C3 ledger: stray io-response gets an extra error reply" \
'(epoch 1)\n(io-response 1 42)\n(eval "5")\n' \
'Unknown command: (io-response 1 42)' '^5$'
# C4: malformed (epoch) doesn't update the epoch — next reply tagged with
# the OLD epoch (0 here), i.e. stale from the client's viewpoint.
ledger_case "C4 ledger: malformed epoch marker leaves epoch stale" \
'(epoch)\n(eval "2")\n' \
'(ok-len 0 1)' '^2$'
# C5: no monotonic-epoch enforcement — a decreasing epoch is accepted.
ledger_case "C5 ledger: decreasing epoch accepted silently" \
'(epoch 9)\n(epoch 3)\n(eval "42")\n' \
'(ok-len 3 2)' '^42$'
# C6: two commands on one line -> one error, NEITHER executed.
ledger_case "C6 ledger: two commands on one line both dropped" \
'(epoch 1)\n(eval "1") (eval "2")\n(eval "3")\n' \
'Expected single command, got 2' '^3$'
# C7: vm-trace without the compiler loaded errors opaquely.
ledger_case "C7 ledger: vm-trace sans compiler is opaque Not-callable-nil" \
'(epoch 1)\n(vm-trace "(+ 1 2)")\n' \
'Not callable: nil'
# ---------------------------------------------------------------------------
# Fuzz-liveness property: after 60 deterministic hostile lines (unbalanced
# parens, control chars, unicode, long lines, stray io-responses, epoch
# mutations), the server must still answer a well-formed command and exit
# cleanly. Seeded PRNG — reproducible corpus.
# ---------------------------------------------------------------------------
fuzz=$(python3 - <<'PY'
import random
r = random.Random(1404)
lines = []
frag = ['(', ')', '((', '))', '(eval', '(epoch', 'io-response', '"', '\\',
'café', '\x01', '\x1b[2J', ':kw', '{', '}', '(+ 1', 'nil)', '#|', '|#']
for i in range(60):
kind = r.randrange(5)
if kind == 0:
lines.append(''.join(r.choice(frag) for _ in range(r.randrange(1, 8))))
elif kind == 1:
lines.append('(epoch ' + r.choice(['', 'foo', '-1', '999999999999999999999', ')']) + ')')
elif kind == 2:
lines.append('(io-response %d %s' % (r.randrange(99), r.choice([')', '', '42']) ))
elif kind == 3:
lines.append('x' * r.randrange(200, 2000))
else:
lines.append('(eval "' + r.choice(['(+ 1', '(list', '\\\\', '((((']) + '")')
print('\n'.join(lines))
PY
)
out=$(printf '%s\n(epoch 777)\n(eval "\\"alive\\"")\n' "$fuzz" | timeout 90 "$SERVER" 2>&1)
rc=$?
if grep -q '^"alive"$' <<<"$out" && ! grep -q 'Fatal error' <<<"$out" && [[ $rc -eq 0 ]]; then
echo "PASS: fuzz-liveness — server survives 60 hostile lines and still answers"
pass=$((pass+1))
else
echo "FAIL: fuzz-liveness (rc=$rc)"; tail -6 <<<"$out" | sed 's/^/ /'
fail=$((fail+1))
fi
# ---------------------------------------------------------------------------
# S4 (review, hosts.md): soft error pages must NOT be stored in the HTTP
# response cache. Pre-fix, a routing-failure page was cached as HTTP 200 and
# served byte-identically from cache to every later visitor (cold 2s → warm
# 0.0005s, ONE render line). Post-fix (dc7aa709), http_render_page returns
# (html, is_error) and cache insertion is gated on `not is_err` (the skip is
# logged as "[cache] <path> → error page, not cached").
#
# Pin: GET the same nonexistent path twice against a fresh --http server and
# assert BOTH requests re-render (two [sx-http] render lines) plus the
# is_err gate line appearing in the log. NB: in a standalone worktree all
# docs pages render as soft error pages (no content), so a positive
# "real page IS cached" control is not assertable here.
# ---------------------------------------------------------------------------
s4_case() {
local port=$((18000 + RANDOM % 2000))
local log; log=$(mktemp)
timeout 90 "$SERVER" --http "$port" >"$log" 2>&1 &
local srv=$!
local up=0
for _ in $(seq 1 40); do
if curl -s -o /dev/null "http://localhost:$port/" 2>/dev/null; then up=1; break; fi
sleep 1
done
if [[ $up -ne 1 ]]; then
echo "FAIL: S4 — http server did not come up on :$port"
kill "$srv" 2>/dev/null; rm -f "$log"
fail=$((fail+1)); return
fi
local miss="/sx/gate-pin-missing-$$-$RANDOM"
curl -s -o /dev/null "http://localhost:$port$miss"
curl -s -o /dev/null "http://localhost:$port$miss"
sleep 1
local renders
renders=$(grep -c "sx-http\] $miss " "$log")
local ok=1
if [[ "$renders" -ne 2 ]]; then
echo "FAIL: S4 — expected 2 renders of $miss (not cache-served), got $renders"
ok=0
fi
if ! grep -q 'error page, not cached' "$log"; then
echo "FAIL: S4 — is_err cache gate line absent from server log"
ok=0
fi
if [[ $ok -eq 1 ]]; then
echo "PASS: S4 soft error page not cached (both GETs re-rendered)"
pass=$((pass+1))
else
echo " --- log tail ---"; tail -12 "$log" | sed 's/^/ /'; echo " ---------------"
fail=$((fail+1))
fi
kill "$srv" 2>/dev/null
rm -f "$log"
}
s4_case
echo
echo "protocol-gate: $pass passed, $fail failed"
[[ $fail -eq 0 ]]

61
scripts/test-suite-baseline.sh Executable file
View File

@@ -0,0 +1,61 @@
#!/bin/bash
# test-suite-baseline.sh — W14/F10: make FAIL mean something again.
#
# The review (conformance.md F-10): the OCaml suite is not green — a
# permanent ~274-failure band (in-progress hs-* + r7rs radix shadow) is
# normalized, so real regressions hide inside the red noise and nobody can
# tell a new failure from the band.
#
# This gate pins the band instead of ignoring it: the full suite's FAIL
# set is diffed against the checked-in baseline
# (spec/tests/known-failures.txt). Two red conditions, both loud:
# NEW failure -> a real regression: fix it (or, if intentional,
# justify + add to the baseline in the same commit)
# VANISHED failure -> something got fixed: delete it from the baseline
# so the win is locked in
# Neither touches the runner or the hs loops' scoreboards — the band still
# prints as FAIL lines for the teams working through it.
#
# Usage: bash scripts/test-suite-baseline.sh
# Runtime: full suite, ~515 min. Exit 0 = fail set identical to baseline.
set -uo pipefail
cd "$(dirname "$0")/.."
RUNNER=hosts/ocaml/_build/default/bin/run_tests.exe
BASELINE=spec/tests/known-failures.txt
[[ -x "$RUNNER" ]] || { echo "SKIP: $RUNNER not built" >&2; exit 2; }
[[ -f "$BASELINE" ]] || { echo "SKIP: $BASELINE missing" >&2; exit 2; }
log=$(mktemp)
timeout 3000 "$RUNNER" > "$log" 2>&1
rc=$?
if [[ $rc -ne 0 && $rc -ne 1 ]]; then
echo "RED: runner exited $rc (timeout/crash)"; tail -5 "$log"; rm -f "$log"; exit 1
fi
# Normalize: keep the stable test identity (suite > name), drop messages
# (error text may contain addresses/timings that churn).
current=$(mktemp)
grep '^ FAIL: ' "$log" | sed 's/^ FAIL: //; s/: .*$//' | sort -u > "$current"
new_failures=$(comm -13 <(sort -u "$BASELINE") "$current")
vanished=$(comm -23 <(sort -u "$BASELINE") "$current")
summary=$(grep '^Results:' "$log" | tail -1)
red=0
if [[ -n "$new_failures" ]]; then
echo "RED: NEW failures not in baseline:"
sed 's/^/ + /' <<<"$new_failures"
red=1
fi
if [[ -n "$vanished" ]]; then
echo "RED: baseline entries now PASSING (delete them from $BASELINE):"
sed 's/^/ - /' <<<"$vanished"
red=1
fi
if [[ $red -eq 0 ]]; then
echo "GREEN: fail set identical to baseline ($(wc -l < "$BASELINE") known failures)"
fi
echo "$summary"
rm -f "$log" "$current"
exit $red

82
scripts/test-wasm-corpus.sh Executable file
View File

@@ -0,0 +1,82 @@
#!/bin/bash
# test-wasm-corpus.sh — W14/F2: sweep the spec test corpus through the
# SHIPPED browser kernel (sx_browser.bc.wasm.js) headless in Node.
#
# The review (conformance.md F-2) found the shipped browser artifact never
# runs the corpus — F-1/F-3 native/WASM divergences existed undetected.
# Each file runs in its OWN node process via run_wasm_corpus.js (a hang is
# killed by per-file timeout without ending the sweep).
#
# The SKIP list documents files that structurally cannot run on the browser
# kernel (runner-only bindings, native-only machinery) — the F-5/F-6/F-10
# "one-host-gated" theme, recorded honestly per file with the reason.
# KNOWN_FAIL documents files that RUN but currently have failing tests on
# the shipped kernel (host divergence, F-1/F-3 class): they execute and
# report, but don't gate. Everything else must be GREEN — exit 1 otherwise;
# a KNOWN_FAIL going green also fails (ledger must be updated).
#
# Usage: bash scripts/test-wasm-corpus.sh [file.sx ...]
set -uo pipefail
cd "$(dirname "$0")/.."
RUNNER=hosts/ocaml/browser/run_wasm_corpus.js
KERNEL=shared/static/wasm/sx_browser.bc.wasm.js
[[ -f "$KERNEL" ]] || { echo "SKIP: $KERNEL missing (run sx-build-all first)" >&2; exit 2; }
# --- classification (empirical sweep 2026-07-04; see sx-gate-loop.md) ---
# Sweep baseline: 83 files, 80 fully green, 5192 passes, 0 test failures.
# The shipped kernel even provides the CEK driver bindings (make-env,
# cek-step-loop, ...) — broader than a bare sx_server.
declare -A SKIP KNOWN_FAIL
skip() { SKIP[$1]=$2; }
known() { KNOWN_FAIL[$1]=$2; }
# Partial load-errors: the kernel throws mid-file (opaque jsoo exception,
# message "undefined"); tests before the failing form pass and report.
known test-hash-table.sx "partial: 22 pass then load-error mid-file"
known test-r7rs.sx "partial: 87 pass then load-error mid-file"
known test-sets.sx "partial: 30 pass then load-error mid-file"
pass_total=0; fail_total=0; red=0; files=0
declare -a targets
if [[ $# -gt 0 ]]; then targets=("$@");
else for f in spec/tests/test-*.sx; do
[[ "$(basename "$f")" == "test-framework.sx" ]] && continue
targets+=("$f")
done; fi
for f in "${targets[@]}"; do
base=$(basename "$f")
if [[ -n "${SKIP[$base]:-}" ]]; then
echo "SKIP: $base${SKIP[$base]}"
continue
fi
files=$((files+1))
line=$(timeout 120 node "$RUNNER" "$f" 2>/dev/null | grep '^CORPUS-RESULT' || true)
if [[ -z "$line" ]]; then
echo "RED: $base — timeout or crash (no CORPUS-RESULT)"
red=$((red+1)); continue
fi
p=$(sed -n 's/.*pass=\([0-9]*\).*/\1/p' <<<"$line")
fl=$(sed -n 's/.*fail=\([0-9]*\).*/\1/p' <<<"$line")
st=$(sed -n 's/.*status=\([a-z-]*\).*/\1/p' <<<"$line")
pass_total=$((pass_total+p)); fail_total=$((fail_total+fl))
if [[ -n "${KNOWN_FAIL[$base]:-}" ]]; then
if [[ "$fl" -eq 0 && "$st" == "ok" ]]; then
echo "RED: $base — KNOWN_FAIL is now GREEN (${KNOWN_FAIL[$base]}); update the ledger"
red=$((red+1))
else
echo "KNOWN-FAIL: $base pass=$p fail=$fl ($( echo "${KNOWN_FAIL[$base]}" ))"
fi
continue
fi
if [[ "$st" != "ok" || "$fl" -ne 0 ]]; then
echo "RED: $base pass=$p fail=$fl status=$st"
red=$((red+1))
else
echo "OK: $base pass=$p"
fi
done
echo
echo "wasm-corpus: $files files run, $pass_total passed, $fail_total failed, $red red"
[[ $red -eq 0 ]]

View File

@@ -1,6 +1,7 @@
;; Assert condition is truthy, error with message
(define-library (sx harness)
(define-library
(sx harness)
(export
assert
assert=
@@ -12,6 +13,7 @@
harness-set!
make-interceptor
install-interceptors
harness-run-perform
io-calls
io-call-count
io-call-nth
@@ -24,71 +26,240 @@
assert-io-result
assert-state)
(begin
(define assert (fn (condition msg) (when (not condition) (error (or msg "Assertion failed")))))
;; Assert two values are equal
(define assert= (fn (actual expected msg) (when (not (= actual expected)) (error (or msg (str "Expected " expected ", got " actual))))))
;; Dict of mock IO operations for testing
(define default-platform {:current-user (fn () nil) :csrf-token (fn () "test-csrf-token") :app-url (fn (service &rest path) "/mock-app-url") :frag (fn (service comp &rest args) "") :sleep (fn (ms) nil) :local-storage-set (fn (key val) nil) :set-cookie (fn (name val &rest opts) nil) :url-for (fn (endpoint &rest args) "/mock-url") :create-element (fn (tag) nil) :request-path (fn () "/") :config (fn (key) nil) :set-attr (fn (el name val) nil) :set-text (fn (el text) nil) :remove-child (fn (parent child) nil) :fetch (fn (url &rest opts) {:status 200 :body "" :ok true}) :query (fn (service name &rest args) (list)) :add-class (fn (el cls) nil) :get-element (fn (id) nil) :now (fn () 0) :abort (fn (code) nil) :action (fn (service name &rest args) {:ok true}) :remove-class (fn (el cls) nil) :append-child (fn (parent child) nil) :request-arg (fn (name) nil) :emit-dom (fn (op &rest args) nil) :local-storage-get (fn (key) nil) :get-cookie (fn (name) nil)})
;; Create a test session with mock IO platform
(define make-harness :effects () (fn (&key platform) (let ((merged (if (nil? platform) default-platform (merge default-platform platform)))) {:log (list) :platform merged :state {:cookies {} :storage {} :dom nil}})))
;; Clear IO log and state for a new test
(define harness-reset! :effects () (fn (session) (dict-set! session "log" (list)) (dict-set! session "state" {:cookies {} :storage {} :dom nil}) session))
;; Append an IO call record to session log
(define harness-log :effects () (fn (session &key op) (let ((log (get session "log"))) (if (nil? op) log (filter (fn (entry) (= (get entry "op") op)) log)))))
;; Read state value from session store
(define harness-get :effects () (fn (session key) (get (get session "state") key)))
;; Write state value to session store
(define harness-set! :effects () (fn (session key value) (dict-set! (get session "state") key value) nil))
;; Wrap a mock fn to record calls in the IO log
(define make-interceptor :effects () (fn (session op-name mock-fn) (fn (&rest args) (let ((result (if (empty? args) (mock-fn) (if (= 1 (len args)) (mock-fn (first args)) (if (= 2 (len args)) (mock-fn (first args) (nth args 1)) (if (= 3 (len args)) (mock-fn (first args) (nth args 1) (nth args 2)) (apply mock-fn args)))))) (log (get session "log"))) (append! log {:args args :result result :op op-name}) result))))
;; Bind all interceptors into the eval environment
(define install-interceptors :effects () (fn (session env) (for-each (fn (key) (let ((mock-fn (get (get session "platform") key)) (interceptor (make-interceptor session key mock-fn))) (env-bind! env key interceptor))) (keys (get session "platform"))) env))
;; Query IO log: all calls, or filtered by op name
(define io-calls :effects () (fn (session op-name) (filter (fn (entry) (= (get entry "op") op-name)) (get session "log"))))
;; Count IO calls, optionally filtered by op name
(define io-call-count :effects () (fn (session op-name) (len (io-calls session op-name))))
;; Get the nth IO call record
(define io-call-nth :effects () (fn (session op-name n) (let ((calls (io-calls session op-name))) (if (< n (len calls)) (nth calls n) nil))))
;; Get args from the nth call to an operation
(define io-call-args :effects () (fn (session op-name n) (let ((call (io-call-nth session op-name n))) (if (nil? call) nil (get call "args")))))
;; Get return value from the nth call to an operation
(define io-call-result :effects () (fn (session op-name n) (let ((call (io-call-nth session op-name n))) (if (nil? call) nil (get call "result")))))
;; Assert an IO operation was called at least once
(define assert-io-called :effects () (fn (session op-name) (assert (> (io-call-count session op-name) 0) (str "Expected IO operation " op-name " to be called but it was not"))))
;; Assert an IO operation was never called
(define assert-no-io :effects () (fn (session op-name) (assert (= (io-call-count session op-name) 0) (str "Expected IO operation " op-name " not to be called but it was called " (io-call-count session op-name) " time(s)"))))
;; Assert exact call count for an operation
(define assert-io-count :effects () (fn (session op-name expected) (let ((actual (io-call-count session op-name))) (assert (= actual expected) (str "Expected " op-name " to be called " expected " time(s) but was called " actual " time(s)")))))
;; Assert args of the nth call match expected
(define assert-io-args :effects () (fn (session op-name n expected-args) (let ((actual (io-call-args session op-name n))) (assert (equal? actual expected-args) (str "Expected call " n " to " op-name " with args " (str expected-args) " but got " (str actual))))))
;; Assert result of the nth call matches expected
(define assert-io-result :effects () (fn (session op-name n expected) (let ((actual (io-call-result session op-name n))) (assert (equal? actual expected) (str "Expected call " n " to " op-name " to return " (str expected) " but got " (str actual))))))
;; Assert a state key has the expected value
(define assert-state :effects () (fn (session key expected) (let ((actual (harness-get session key))) (assert (equal? actual expected) (str "Expected state " key " to be " (str expected) " but got " (str actual))))))
)) ;; end define-library
(define
assert
(fn
(condition msg)
(when (not condition) (error (or msg "Assertion failed")))))
(define
assert=
(fn
(actual expected msg)
(when
(not (= actual expected))
(error (or msg (str "Expected " expected ", got " actual))))))
(define default-platform {:get-cookie (fn (name) nil) :local-storage-get (fn (key) nil) :emit-dom (fn (op &rest args) nil) :request-arg (fn (name) nil) :append-child (fn (parent child) nil) :remove-class (fn (el cls) nil) :action (fn (service name &rest args) {:ok true}) :abort (fn (code) nil) :now (fn () 0) :get-element (fn (id) nil) :add-class (fn (el cls) nil) :query (fn (service name &rest args) (list)) :fetch (fn (url &rest opts) {:ok true :body "" :status 200}) :remove-child (fn (parent child) nil) :set-text (fn (el text) nil) :set-attr (fn (el name val) nil) :config (fn (key) nil) :request-path (fn () "/") :create-element (fn (tag) nil) :url-for (fn (endpoint &rest args) "/mock-url") :set-cookie (fn (name val &rest opts) nil) :local-storage-set (fn (key val) nil) :sleep (fn (ms) nil) :frag (fn (service comp &rest args) "") :app-url (fn (service &rest path) "/mock-app-url") :csrf-token (fn () "test-csrf-token") :current-user (fn () nil)})
(define
make-harness
:effects ()
(fn
(&key platform)
(let
((merged (if (nil? platform) default-platform (merge default-platform platform))))
{:state {:dom nil :storage {} :cookies {}} :platform merged :log (list)})))
(define
harness-reset!
:effects ()
(fn
(session)
(dict-set! session "log" (list))
(dict-set! session "state" {:dom nil :storage {} :cookies {}})
session))
(define
harness-log
:effects ()
(fn
(session &key op)
(let
((log (get session "log")))
(if
(nil? op)
log
(filter (fn (entry) (= (get entry "op") op)) log)))))
(define
harness-get
:effects ()
(fn (session key) (get (get session "state") key)))
(define
harness-set!
:effects ()
(fn
(session key value)
(dict-set! (get session "state") key value)
nil))
(define
harness-invoke-mock
:effects ()
(fn
(mock-fn args)
(if
(empty? args)
(mock-fn)
(if
(= 1 (len args))
(mock-fn (first args))
(if
(= 2 (len args))
(mock-fn (first args) (nth args 1))
(if
(= 3 (len args))
(mock-fn
(first args)
(nth args 1)
(nth args 2))
(apply mock-fn args)))))))
(define
make-interceptor
:effects ()
(fn
(session op-name mock-fn)
(fn
(&rest args)
(let
((entry {:op op-name :result nil :args args}) (log (get session "log")))
(append! log entry)
(let
((result (harness-invoke-mock mock-fn args)))
(dict-set! entry "result" result)
result)))))
(define
install-interceptors
:effects ()
(fn
(session env)
(for-each
(fn
(key)
(let
((mock-fn (get (get session "platform") key))
(interceptor (make-interceptor session key mock-fn)))
(env-bind! env key interceptor)))
(keys (get session "platform")))
env))
(define
harness-run-perform
:effects ()
(fn
(session expr env)
(let
((drive (fn (self state) (if (cek-suspended? state) (let ((req (cek-io-request state))) (let ((op (get req "op")) (args (or (get req "args") (list)))) (let ((mock-fn (get (get session "platform") op))) (when (nil? mock-fn) (error (str "harness-run-perform: no mock for op " op))) (let ((entry {:op op :result nil :args args}) (log (get session "log"))) (append! log entry) (let ((result (harness-invoke-mock mock-fn args))) (dict-set! entry "result" result) (self self (cek-resume state result))))))) (cek-value state)))))
(drive drive (cek-step-loop (make-cek-state expr env (list)))))))
(define
io-calls
:effects ()
(fn
(session op-name)
(filter
(fn (entry) (= (get entry "op") op-name))
(get session "log"))))
(define
io-call-count
:effects ()
(fn (session op-name) (len (io-calls session op-name))))
(define
io-call-nth
:effects ()
(fn
(session op-name n)
(let
((calls (io-calls session op-name)))
(if (< n (len calls)) (nth calls n) nil))))
(define
io-call-args
:effects ()
(fn
(session op-name n)
(let
((call (io-call-nth session op-name n)))
(if (nil? call) nil (get call "args")))))
(define
io-call-result
:effects ()
(fn
(session op-name n)
(let
((call (io-call-nth session op-name n)))
(if (nil? call) nil (get call "result")))))
(define
assert-io-called
:effects ()
(fn
(session op-name)
(assert
(> (io-call-count session op-name) 0)
(str "Expected IO operation " op-name " to be called but it was not"))))
(define
assert-no-io
:effects ()
(fn
(session op-name)
(assert
(= (io-call-count session op-name) 0)
(str
"Expected IO operation "
op-name
" not to be called but it was called "
(io-call-count session op-name)
" time(s)"))))
(define
assert-io-count
:effects ()
(fn
(session op-name expected)
(let
((actual (io-call-count session op-name)))
(assert
(= actual expected)
(str
"Expected "
op-name
" to be called "
expected
" time(s) but was called "
actual
" time(s)")))))
(define
assert-io-args
:effects ()
(fn
(session op-name n expected-args)
(let
((actual (io-call-args session op-name n)))
(assert
(equal? actual expected-args)
(str
"Expected call "
n
" to "
op-name
" with args "
(str expected-args)
" but got "
(str actual))))))
(define
assert-io-result
:effects ()
(fn
(session op-name n expected)
(let
((actual (io-call-result session op-name n)))
(assert
(equal? actual expected)
(str
"Expected call "
n
" to "
op-name
" to return "
(str expected)
" but got "
(str actual))))))
(define
assert-state
:effects ()
(fn
(session key expected)
(let
((actual (harness-get session key)))
(assert
(equal? actual expected)
(str
"Expected state "
key
" to be "
(str expected)
" but got "
(str actual)))))))) ;; end define-library
;; Re-export to global namespace for backward compatibility
(import (sx harness))

View File

@@ -0,0 +1,65 @@
# W14/F8 differential probe corpus — one expression per line.
# Same expression evaluated on the native server (epoch protocol) and the
# shipped WASM kernel (K.eval); scripts/test-differential.sh diffs results.
# Classes drawn from review findings F-1 (integer arithmetic), F-3 (apply,
# dict key order), F-8 itemization, S-4 (float printing), K18/K53.
# integers & display (F-1)
(+ 1 2)
(- 10 3)
(* 6 7)
(/ 4 2)
(/ 1 2)
(/ 10 4)
(quotient 13 4)
(mod 10 3)
# float printing (S-4)
(+ 0.1 0.2)
(* 3 0.1)
(/ 1 3)
(str 0.3)
(str 1.5)
(str 2.0)
# overflow / expt (K18)
(expt 2 10)
(expt 2 62)
(expt 2 100)
(+ 9223372036854775807 1)
# apply (F-3)
(apply + (list 1 2 3))
(apply max (list 1 5 2))
(apply str (list "a" "b"))
# dict key order (F-3)
(keys {:b 2 :a 1 :c 3})
(str {:b 2 :a 1})
(vals {:b 2 :a 1})
# strings
(split "a,b,c" ",")
(split "a--b" "--")
(len "héllo")
(upcase "abc")
(str (char-code "A"))
(substring "hello" 1 3)
(join "-" (list "x" "y"))
# equality & comparison
(= 1 1.0)
(= (list 1 2) (list 1 2))
(equal? (list 1) (list 1))
(< 1 2 3)
# collections
(sort (list 3 1 2))
(range 3)
(reverse (list 1 2 3))
(nth (list 10 20 30) 1)
(contains? {:a 1} :a)
(get {:a 1} :zz 99)
# quasiquote / quote
(quasiquote (1 (unquote (+ 1 1)) 3))
(str (quote sym))
# conditionals & special forms
(if true 1 2)
(and 1 2 3)
(or nil false 7)
(do ((fn (x) x) 5) 99)
# error normalization (both sides should error)
(undefined-symbol-xyz)
(/ 1 0)

View File

@@ -0,0 +1,273 @@
hs-compat-asExpression > converts-a-complete-form-into-values
hs-compat-asExpression > converts-strings-into-fragments
hs-compat-asExpression > converts-value-as-json
hs-compat-blockLiteral > can-map-an-array
hs-compat-in > basic-no-query-return-values
hs-compat-typecheck > can-do-basic-non-string-typecheck-failure
hs-compat-typecheck > can-do-basic-string-non-null-typecheck
hs-compat-typecheck > can-do-basic-string-typecheck
hs-compat-typecheck > null-causes-null-safe-string-check-to-fail
hs-dev-asExpression > parses string as JSON to object
hs-dev-collectionExpressions > where binds after property access
hs-dev-comparisonOperator > I am between works
hs-dev-comparisonOperator > I am not between works
hs-dev-comparisonOperator > is still does equality when rhs variable exists
hs-dev-pick > can pick first n items
hs-dev-pick > can pick items using 'of' syntax
hs-dev-pick > can pick last n items
hs-dev-pick > can pick random item
hs-dev-pick > can pick random n items
hs-emit-classes > remove class from target
hs-emit-control-flow > tell rebinds me
hs-emit-def-behavior > def becomes define
hs-emit-dom-commands > hide sets display none
hs-emit-dom-commands > log passes through
hs-emit-dom-commands > show clears display
hs-emit-on > on every click
hs-extra-function-call > identity-call
hs-extra-lambda > array-map-block
hs-extra-lambda > arrow-true
hs-extra-typecheck > null-colon-string
hs-parse-assignment > put into
hs-parse-assignment > set property to string
hs-parse-basic-commands > add class to me
hs-parse-basic-commands > remove class from me
hs-parse-basic-commands > toggle between two classes
hs-parse-basic-commands > toggle class on me
hs-parse-conditional > if else end
hs-parse-conditional > if then end
hs-parse-conformance > increment @count → full AST
hs-parse-conformance > on click add .called → full AST
hs-parse-conformance > on click from #bar add .clicked → full AST
hs-parse-conformance > toggle between .foo and .bar → full AST
hs-parse-conformance > wait 100ms then add .done → full AST
hs-parse-events > on click add class
hs-parse-events > on click from target
hs-parse-every-modifier > on every click
hs-parse-expressions > attribute ref
hs-parse-expressions > style ref
hs-parse-send-trigger > trigger event on me
hs-parse-sequencing > wait then add
hs-parse-special-commands > decrement attribute
hs-parse-special-commands > hide
hs-parse-special-commands > increment attribute
hs-parse-special-commands > show target
hs-parse-unary > not expr
hs-runtime-e2e > source → SX shape
hs-runtime-make > make Map returns dict
hs-runtime-make > make Set returns list
hs-tokenize-arithmetic-ops > division operator
hs-tokenize-arithmetic-ops > mixed arithmetic
hs-tokenize-arithmetic-ops > modulo operator
hs-tokenize-arithmetic-ops > multiply operator
hs-tokenize-basics > keywords vs identifiers
hs-tokenize-basics > whitespace skipped
hs-tokenize-comments > line comment skipped
hs-tokenize-full-expressions > if true put "foo" into me.innerHTML else put "bar" into me.innerHTML end
hs-tokenize-full-expressions > increment @count then put it into me
hs-tokenize-full-expressions > on click add .called
hs-tokenize-full-expressions > on click[buttons==0] log event
hs-tokenize-full-expressions > on click from #bar add .clicked
hs-tokenize-full-expressions > on click send custom(foo:"fromBar") to #d2
hs-tokenize-full-expressions > put "Clicked" into my.innerHTML
hs-tokenize-full-expressions > set #d1.innerHTML to foo
hs-tokenize-full-expressions > toggle between .foo and .bar
hs-tokenize-full-expressions > wait 100ms then add .done
hs-upstream-add > can add a value to a set
hs-upstream-add > can add to an HTMLCollection
hs-upstream-add > can add to children
hs-upstream-add > can add to query in me
hs-upstream-add > supports async expressions in when clause
hs-upstream-append > append to undefined ignores the undefined
hs-upstream-append > can append a value to a DOM node
hs-upstream-append > can append a value to a set
hs-upstream-append > can append a value to I
hs-upstream-append > multiple appends work
hs-upstream-append > new DOM content added by append will be live
hs-upstream-askAnswer > confirm returns first choice on OK
hs-upstream-askAnswer > prompts and puts result in it
hs-upstream-call > call functions that return promises are waited on
hs-upstream-core/asyncError > rejected promise stops execution
hs-upstream-core/asyncError > rejected promise triggers catch block
hs-upstream-core/regressions > can invoke functions w/ numbers in name
hs-upstream-core/regressions > can pick detail fields out by name
hs-upstream-core/regressions > can refer to function in init blocks
hs-upstream-core/runtimeErrors > reports basic function invocation null errors properly
hs-upstream-core/runtimeErrors > reports basic function invocation null errors properly w/ of
hs-upstream-core/runtimeErrors > reports basic function invocation null errors properly w/ possessives
hs-upstream-core/runtimeErrors > reports null errors on add command properly
hs-upstream-core/runtimeErrors > reports null errors on decrement command properly
hs-upstream-core/runtimeErrors > reports null errors on default command properly
hs-upstream-core/runtimeErrors > reports null errors on hide command properly
hs-upstream-core/runtimeErrors > reports null errors on increment command properly
hs-upstream-core/runtimeErrors > reports null errors on measure command properly
hs-upstream-core/runtimeErrors > reports null errors on put command properly
hs-upstream-core/runtimeErrors > reports null errors on remove command properly
hs-upstream-core/runtimeErrors > reports null errors on send command properly
hs-upstream-core/runtimeErrors > reports null errors on sets properly
hs-upstream-core/runtimeErrors > reports null errors on settle command properly
hs-upstream-core/runtimeErrors > reports null errors on show command properly
hs-upstream-core/runtimeErrors > reports null errors on toggle command properly
hs-upstream-core/runtimeErrors > reports null errors on transition command properly
hs-upstream-core/runtimeErrors > reports null errors on trigger command properly
hs-upstream-core/runtime > has proper stack from event handler
hs-upstream-core/scoping > locally scoped variables don't clash with built-in variables
hs-upstream-empty > can empty a map
hs-upstream-empty > can empty an element
hs-upstream-empty > can empty a set
hs-upstream-empty > clear works on elements
hs-upstream-expressions/asExpression > can accept custom dynamic conversions
hs-upstream-expressions/asExpression > can use the a modifier if you like
hs-upstream-expressions/asExpression > collects duplicate text inputs into an array
hs-upstream-expressions/asExpression > converts a complete form into Values
hs-upstream-expressions/asExpression > converts a form element into Values
hs-upstream-expressions/asExpression > converts a form element into Values | FormEncoded
hs-upstream-expressions/asExpression > converts a form element into Values | JSONString
hs-upstream-expressions/asExpression > converts an element into HTML
hs-upstream-expressions/asExpression > converts a NodeList into HTML
hs-upstream-expressions/asExpression > converts array as Set
hs-upstream-expressions/asExpression > converts checkboxes into a Value correctly
hs-upstream-expressions/asExpression > converts multiple selects into a Value correctly
hs-upstream-expressions/asExpression > converts multiple selects with programmatically changed selections
hs-upstream-expressions/asExpression > converts object as Map
hs-upstream-expressions/asExpression > converts radio buttons into a Value correctly
hs-upstream-expressions/asExpression > converts value as Date
hs-upstream-expressions/asExpression > parses string as JSON to object
hs-upstream-expressions/asExpression > pipe operator chains conversions
hs-upstream-expressions/blockLiteral > basic block literals work
hs-upstream-expressions/blockLiteral > basic identity works
hs-upstream-expressions/blockLiteral > basic two arg identity works
hs-upstream-expressions/closest > closest does not consume a following where clause
hs-upstream-expressions/comparisonOperator > does not exist works
hs-upstream-expressions/cookies > basic clear cookie values work
hs-upstream-expressions/cookies > basic set cookie values work
hs-upstream-expressions/cookies > iterate cookies values work
hs-upstream-expressions/cookies > length is 0 when no cookies are set
hs-upstream-expressions/cookies > update cookie values work
hs-upstream-expressions/functionCalls > can access a property of a call's result
hs-upstream-expressions/functionCalls > can chain calls on the result of a call
hs-upstream-expressions/functionCalls > can invoke function on object
hs-upstream-expressions/functionCalls > can invoke function on object w/ async arg
hs-upstream-expressions/functionCalls > can invoke function on object w/ async root & arg
hs-upstream-expressions/functionCalls > can invoke global function
hs-upstream-expressions/functionCalls > can invoke global function w/ async arg
hs-upstream-expressions/functionCalls > can pass an array literal as an argument
hs-upstream-expressions/functionCalls > can pass an expression as an argument
hs-upstream-expressions/functionCalls > can pass an object literal as an argument
hs-upstream-expressions/functionCalls > can pass no arguments
hs-upstream-expressions/logicalOperator > and short-circuits when lhs promise resolves to false
hs-upstream-expressions/logicalOperator > should short circuit with and expression
hs-upstream-expressions/logicalOperator > should short circuit with or expression
hs-upstream-expressions/mathOperator > can use mixed expressions
hs-upstream-expressions/objectLiteral > expressions work in object literal field names
hs-upstream-expressions/propertyAccess > property access on function result
hs-upstream-expressions/some > some returns true for nonempty selector
hs-upstream-expressions/strings > string templates work w/ props
hs-upstream-expressions/strings > string templates work w/ props w/ braces
hs-upstream-expressions/symbol > resolves global context properly
hs-upstream-fetch > allows the event handler to change the fetch parameters
hs-upstream-fetch > as response does not throw on 404
hs-upstream-fetch > can catch an error that occurs when using fetch
hs-upstream-fetch > can do a simple fetch
hs-upstream-fetch > can do a simple fetch w/ a custom conversion
hs-upstream-fetch > can do a simple fetch w/ a naked URL
hs-upstream-fetch > can do a simple fetch w/ html
hs-upstream-fetch > can do a simple fetch w/ json
hs-upstream-fetch > can do a simple fetch w/ json using JSON syntax
hs-upstream-fetch > can do a simple fetch w/ json using Object syntax
hs-upstream-fetch > can do a simple fetch w/ json using Object syntax and an 'an' prefix
hs-upstream-fetch > can do a simple post
hs-upstream-fetch > can do a simple post alt syntax w/ curlies
hs-upstream-fetch > can do a simple post alt syntax without curlies
hs-upstream-fetch > can put response conversion after with
hs-upstream-fetch > can put response conversion before with
hs-upstream-fetch > do not throw passes through 404 response
hs-upstream-fetch > don't throw passes through 404 response
hs-upstream-fetch > Response can be converted to JSON via as JSON
hs-upstream-fetch > submits the fetch parameters to the event handler
hs-upstream-fetch > throws on non-2xx response by default
hs-upstream-fetch > triggers an event just before fetching
hs-upstream-hide > can hide element, with display:none by default
hs-upstream-hide > can hide element with display:none explicitly
hs-upstream-hide > can hide element with no target followed by command
hs-upstream-hide > can hide element with no target followed by then
hs-upstream-hide > can hide element with no target with a with
hs-upstream-hide > can hide element with opacity:0
hs-upstream-hide > can hide element with opacity style literal
hs-upstream-hide > can hide element, with visibility:hidden
hs-upstream-hide > can hide other elements
hs-upstream-if > if on new line does not join w/ else
hs-upstream-if > if properly supports nested if statements and end block
hs-upstream-js > can do both of the above
hs-upstream-js > can return values to _hyperscript
hs-upstream-js > handles rejected promises without hanging
hs-upstream-make > can make elements
hs-upstream-make > can make elements with id and classes
hs-upstream-make > can make named objects
hs-upstream-make > can make named objects w/ global scope
hs-upstream-make > can make named objects with arguments
hs-upstream-make > can make objects
hs-upstream-make > can make objects with arguments
hs-upstream-make > creates a div by default
hs-upstream-on > can catch exceptions thrown in hyperscript functions
hs-upstream-on > can catch exceptions thrown in js functions
hs-upstream-on > can ignore when target doesn't exist
hs-upstream-on > can pick detail fields out by name
hs-upstream-on > can pick event properties out by name
hs-upstream-on > listeners on other elements are removed when the registering element is removed
hs-upstream-on > multiple event handlers at a time are allowed to execute with the every keyword
hs-upstream-on > on intersection fires when the element is in the viewport
hs-upstream-on > rethrown exceptions trigger 'exception' event
hs-upstream-on > throttled at <time> drops events within the window
hs-upstream-put > waits on promises
hs-upstream-remove > can remove a value from a set
hs-upstream-repeat > can nest loops
hs-upstream-repeat > only executes the init expression once
hs-upstream-repeat > repeat forever works
hs-upstream-repeat > repeat forever works w/o keyword
hs-upstream-repeat > until keyword works
hs-upstream-repeat > while keyword works
hs-upstream-reset > can reset a textarea
hs-upstream-resize > fires when element is resized
hs-upstream-resize > on resize from window uses native window resize event
hs-upstream-resize > provides height in detail
hs-upstream-resize > works with from clause
hs-upstream-select > returns selected text
hs-upstream-send > can send events to any expression
hs-upstream-set > set waits on promises
hs-upstream-show > can filter over a set of elements using the its symbol
hs-upstream-socket > converts relative URL to ws:// on http pages
hs-upstream-socket > converts relative URL to wss:// on https pages
hs-upstream-socket > dispatchEvent sends JSON-encoded event over the socket
hs-upstream-socket > namespaced sockets work
hs-upstream-socket > on message as JSON handler decodes JSON payload
hs-upstream-socket > on message as JSON throws on non-JSON payload
hs-upstream-socket > on message handler fires on incoming text message
hs-upstream-socket > parses socket with absolute ws:// URL
hs-upstream-socket > rpc proxy blacklists then/catch/length/toJSON
hs-upstream-socket > rpc proxy default timeout rejects the promise
hs-upstream-socket > rpc proxy noTimeout avoids timeout rejection
hs-upstream-socket > rpc proxy reply with throw rejects the promise
hs-upstream-socket > rpc proxy sends a message and resolves the reply
hs-upstream-socket > rpc proxy timeout(n) rejects after a custom window
hs-upstream-socket > rpc reconnects after the underlying socket closes
hs-upstream-socket > with timeout parses and uses the configured timeout
hs-upstream-swap > can swap a variable with a property
hs-upstream-tell > works with an array
hs-upstream-tell > your symbol represents the thing being told
hs-upstream-toggle > can toggle display
hs-upstream-toggle > can toggle display on other elt
hs-upstream-toggle > can toggle display w/ my
hs-upstream-toggle > can toggle opacity
hs-upstream-toggle > can toggle opacity on other elt
hs-upstream-toggle > can toggle opacity w/ my
hs-upstream-toggle > can toggle until an event on another element
hs-upstream-toggle > can toggle visibility
hs-upstream-toggle > can toggle visibility on other elt
hs-upstream-toggle > can toggle visibility w/ my
hs-upstream-wait > can destructure properties in a wait
hs-upstream-wait > can wait on event
hs-upstream-wait > can wait on event on another element
hs-upstream-wait > waiting on an event sets 'it' to the event
hs-upstream-when > attribute observers are persistent (not recreated on re-run)
math > string->number

View File

@@ -1,3 +1,5 @@
(defsuite "chars"
;; Tests for character type (Phase 13)
;; Uses (make-char n) and (char-code "x") instead of #\x literals
;; (char literal parser syntax tested via sx-parse call)
@@ -183,3 +185,5 @@
(deftest
"char-ci>=? equal case-insensitive"
(assert= true (char-ci>=? (make-char 97) (make-char 65))))
)

View File

@@ -0,0 +1,308 @@
;; ==========================================================================
;; test-gate-pins.sx — W14 regression pins for the review's landed fixes
;;
;; The quick-wins batch (commit dc7aa709 + siblings) landed real semantics
;; fixes but shipped WITHOUT pinning tests, so a regression would pass
;; silently. This file pins each confirmed-and-fixed finding with a minimal
;; repro lifted from the review lane files (plans/sx-review/*.md). One suite
;; per finding.
;;
;; TEST-ONLY: no semantics edits. If a pin fails, the fix regressed — do NOT
;; relax the assertion; investigate the evaluator/primitive change.
;; NB: assert= uses `=` (not `equal?`); compare lists with `=`.
;; ==========================================================================
;; --------------------------------------------------------------------------
;; K18 [W7, high] expt silently wrapped at 63-bit int — now promotes to float
;; like +/*. Repro (core.md): (expt 2 62) -> -4611686018427387904 (wrapped);
;; (expt 2 100) -> 0. Fixed: both are positive floats.
;; --------------------------------------------------------------------------
(defsuite
"gate-K18-expt-overflow"
(deftest
"small integer exponents stay exact"
(do
(assert= (expt 2 0) 1)
(assert= (expt 2 10) 1024)))
(deftest
"expt 2^62 does not wrap to a negative int"
(assert (> (expt 2 62) 0)))
(deftest
"expt 2^100 does not wrap to zero"
(assert (> (expt 2 100) 0)))
(deftest
"expt 2^100 promotes to float"
(assert (number? (expt 2 100)))))
;; --------------------------------------------------------------------------
;; K20 [W7, high] contains? did not support dicts in the real runtime —
;; (contains? {:a 1} :a) threw "contains?: 2 args", contradicting its :doc
;; ("Dicts: key check"). Fixed: dict key membership works; lists/strings
;; unchanged. Repro (core.md).
;; --------------------------------------------------------------------------
(defsuite
"gate-K20-contains-dict"
(deftest
"contains? finds a present dict key"
(assert (contains? {:a 1 :b 2} :a)))
(deftest
"contains? reports a missing dict key as false"
(assert (not (contains? {:a 1 :b 2} :zz))))
(deftest
"contains? still works on list membership"
(do
(assert (contains? (list 10 20 30) 20))
(assert
(not (contains? (list 10 20 30) 99)))))
(deftest
"contains? still works on string substrings"
(assert (contains? "hello" "ell"))))
;; --------------------------------------------------------------------------
;; K09 [W5, high] R7RS longhand (unquote-splicing X) silently no-spliced —
;; only shorthand ,@/`splice-unquote` was recognized, so the longhand
;; serialized literally (zero-splice). Fixed: aliased to splice-unquote.
;; Repro (core.md).
;; --------------------------------------------------------------------------
(defsuite
"gate-K09-longhand-unquote-splicing"
(deftest
"longhand unquote-splicing splices a list"
(assert=
(quasiquote
(1
(unquote-splicing (list 2 3))
4))
(list 1 2 3 4)))
(deftest
"longhand unquote-splicing of an empty list contributes nothing"
(assert=
(quasiquote (0 (unquote-splicing (list)) 9))
(list 0 9)))
(deftest
"shorthand splice-unquote still works"
(assert=
(quasiquote (a (splice-unquote (list 2 3)) z))
(list (quote a) 2 3 (quote z)))))
;; --------------------------------------------------------------------------
;; K11 [W5, high] guard re-raise sentinel was a plain forgeable symbol — a
;; body/clause legitimately returning (list '__guard-reraise__ X) was
;; misread as a re-raise of X. Fixed: sentinel gensym'd per execution, so a
;; user value with that head is returned as data. Repro (core.md).
;; --------------------------------------------------------------------------
(defsuite
"gate-K11-guard-reraise-forgeable"
(deftest
"body value shaped like the sentinel is returned as data"
(assert=
(guard (e (true "caught")) (list (quote __guard-reraise__) "hi"))
(list (quote __guard-reraise__) "hi")))
(deftest
"clause returning the forged sentinel is not re-raised"
(assert=
(guard
(e (true (list (quote __guard-reraise__) "forged")))
(error "boom"))
(list (quote __guard-reraise__) "forged"))))
;; --------------------------------------------------------------------------
;; K39 [W5, med] `do` misparsed a first form whose head is a list (an IIFE)
;; as a Scheme do-loop binding spec. Repro (core.md): (do ((fn (x) x) 5) 99)
;; threw "first: expected list, got 5"; expected 99. Fixed: `do` is begin.
;; --------------------------------------------------------------------------
(defsuite
"gate-K39-do-iife-head"
(deftest
"do with an IIFE first form returns the last form (not a do-loop)"
(assert= (do ((fn (x) x) 5) 99) 99))
(deftest
"do with a single IIFE form returns its value"
(assert= (do ((fn () 42))) 42)))
;; --------------------------------------------------------------------------
;; K49 [W8, med] Five void elements (area base embed param track) were in
;; VOID_ELEMENTS but missing from HTML_TAGS — render fell through to
;; function-call dispatch: (render-to-html '(base :href "x")) threw
;; "Undefined symbol: base". dc7aa709 fixed the SPEC registry
;; (spec/render.sx). NB: the generated OCaml render library
;; (hosts/ocaml/lib/sx_render.ml, bootstrap_render.py output) still carries
;; a STALE html_tags_list without these five — the runner's native
;; `render-html` convenience therefore still errors. That regen drift is
;; W14 item F13 (regen-diff gate); this suite pins the spec side only.
;; --------------------------------------------------------------------------
(defsuite
"gate-K49-void-elements-renderable"
(deftest
"spec HTML_TAGS registry contains all five void elements"
(for-each
(fn
(t)
(assert (contains? HTML_TAGS t) (str "HTML_TAGS missing " t)))
(list "area" "base" "embed" "param" "track")))
(deftest
"spec render-to-html renders base self-closing with attr"
(assert-equal
"<base href=\"x\" />"
(render-to-html (quote (base :href "x")) (make-env))))
(deftest
"spec render-to-html renders all five as self-closing voids"
(for-each
(fn
(form)
(let
((html (render-to-html form (make-env))))
(assert
(string-contains? html "/>")
(str (first form) " not self-closing: " html))))
(list
(quote (area))
(quote (base))
(quote (embed))
(quote (param))
(quote (track))))))
;; --------------------------------------------------------------------------
;; crit-2 [W1, critical] signal-return frame stored the saved kont under :f
;; but the reader looked up "saved-kont" — the resume kont was always nil,
;; so the handler value became the WHOLE program's result and every frame
;; outside the signal site (including the covering test's own assert!) was
;; silently discarded. The shipped test "signal returns handler value to
;; call site" passed VACUOUSLY — the bug defeated its own test.
;;
;; A plain assert around the repro would inherit the same vacuity on
;; regression (the dropped continuation includes the assert frame). So this
;; pin uses a side-effect sentinel: test 1 runs the repro and then sets a
;; flag; test 2 independently asserts the flag was reached. If crit-2
;; regresses, test 1 still "passes" (vacuously) but test 2 FAILS.
;; Repro (core.md).
;; --------------------------------------------------------------------------
(define *gate-crit2-after-signal* false)
(define *gate-crit2-result* nil)
(define *gate-crit2-rc-result* nil)
(defsuite
"gate-crit2-signal-return-kont"
(deftest
"continuable signal resumes at the raise site"
(do
(set!
*gate-crit2-result*
(list
"outer"
(handler-bind
(((fn (c) true) (fn (c) 42)))
(+ 1 (signal-condition 5)))
"end"))
(set!
*gate-crit2-rc-result*
(handler-bind
(((fn (c) true) (fn (c) (+ c 100))))
(+ 1 (raise-continuable 42))))
(set! *gate-crit2-after-signal* true)
(assert= *gate-crit2-result* (list "outer" 43 "end"))
(assert= *gate-crit2-rc-result* 143)))
(deftest
"non-vacuity sentinel: the continuation after the signal actually ran"
(do
(assert
*gate-crit2-after-signal*
"continuation dropped — crit-2 regressed (previous test passed vacuously)")
(assert= *gate-crit2-result* (list "outer" 43 "end"))
(assert= *gate-crit2-rc-result* 143)))
(deftest
"handler value feeds the arithmetic frame, not the program result"
(assert=
(handler-bind
(((fn (c) true) (fn (c) (* c 10))))
(+ 1 (signal-condition 5)))
51)))
(defsuite
"gate-C22-throwing-mock-logged"
(deftest
"throwing mock still leaves an IO-log entry"
(let
((h (make-harness))
(f (make-interceptor h "fetch" (fn (url) (error "boom-io")))))
(let
((r (try-call (fn () (f "http://a")))))
(assert (not (get r "ok")) "mock error must propagate")
(assert-io-called h "fetch")
(assert-io-count h "fetch" 1)
(assert= (get (io-call-nth h "fetch" 0) "result") nil))))
(deftest
"successful mock entry gets its result updated in place"
(let
((h (make-harness))
(f (make-interceptor h "fetch" (fn (url) (str "got:" url)))))
(f "http://a")
(assert-io-count h "fetch" 1)
(assert-io-result h "fetch" 0 "got:http://a")
(assert-io-args h "fetch" 0 (list "http://a"))))
(deftest
"mixed throwing and successful calls are all counted"
(let
((h (make-harness))
(bomb (make-interceptor h "action" (fn (x) (error "nope"))))
(ok-f (make-interceptor h "action" (fn (x) "done"))))
(try-call (fn () (bomb 1)))
(ok-f 2)
(try-call (fn () (bomb 3)))
(assert-io-count h "action" 3)
(assert= (get (io-call-nth h "action" 1) "result") "done"))))
(defsuite
"gate-C21-perform-mode-harness"
(deftest
"single perform suspension is serviced by the mock"
(let
((h (make-harness :platform {:fetch (fn (u) (str "R:" u))})))
(assert=
(harness-run-perform h (quote (perform {:op "fetch" :args (list "a")})) (make-env))
"R:a")
(assert-io-count h "fetch" 1)
(assert-io-result h "fetch" 0 "R:a")))
(deftest
"perform result feeds the surrounding arithmetic frame"
(let
((h (make-harness :platform {:query (fn () 41)})))
(assert=
(harness-run-perform
h
(quote (+ 1 (perform {:op "query"})))
(make-env))
42)))
(deftest
"sequential performs each suspend and resume in order"
(let
((h (make-harness :platform {:fetch (fn (u) (str "R:" u))})))
(assert=
(harness-run-perform
h
(quote (list (perform {:op "fetch" :args (list "x")}) (perform {:op "fetch" :args (list "y")})))
(make-env))
(list "R:x" "R:y"))
(assert-io-count h "fetch" 2)
(assert-io-args h "fetch" 1 (list "y"))))
(deftest
"S10 probe: map over perform-suspending lambda keeps ALL elements"
(let
((h (make-harness :platform {:fetch (fn (u) (str "R:" u))})))
(assert=
(harness-run-perform
h
(quote (map (fn (u) (perform {:op "fetch" :args (list u)})) (list "a" "b" "c")))
(make-env))
(list "R:a" "R:b" "R:c"))
(assert-io-count h "fetch" 3)))
(deftest
"unmocked op raises a clear error instead of hanging"
(let
((h (make-harness :platform {})))
(let
((r (try-call (fn () (harness-run-perform h (quote (perform {:op "no-such-op"})) (make-env))))))
(assert (not (get r "ok")) "expected an error for unmocked op")
(assert
(contains? (get r "error") "no mock for op")
(get r "error"))))))

View File

@@ -180,11 +180,12 @@
(deftest "converts-foo-bar" (for-each run-hs-fixture (list {:src "1 as String" :expected "1"}))))
;; ── blockLiteral (4 fixtures) ──────────────────────────────
(defsuite "hs-compat-blockLiteral"
(deftest
"can-map-an-array"
(let
((r (eval-hs "['a', 'ab', 'abc'].map(\\ s -> s.length)")))
(assert= r (list 1 2 3) "map with block")))
(assert= r (list 1 2 3) "map with block"))))
;; ── boolean (2 fixtures) ──────────────────────────────
(defsuite
@@ -325,9 +326,10 @@
(for-each run-hs-fixture (list {:src "undefined does not exist" :expected true} {:src "null does not exist" :expected true}))))
;; ── cookies (9 fixtures) ──────────────────────────────
(defsuite "hs-compat-cookies"
(deftest
"update-cookie-values-work"
(for-each run-hs-fixture (list {:src "cookies.foo" :locals {:cookies {:foo "doh"}} :expected "doh"})))
(for-each run-hs-fixture (list {:src "cookies.foo" :locals {:cookies {:foo "doh"}} :expected "doh"}))))
;; ── in (4 fixtures) ──────────────────────────────
(defsuite
@@ -464,9 +466,10 @@
(assert= (len r) 0 "empty query result"))))
;; ── some (6 fixtures) ──────────────────────────────
(defsuite "hs-compat-some"
(deftest
"some-returns-true-for-nonempty-selector"
(for-each run-hs-fixture (list {:src "some [1]" :expected true})))
(for-each run-hs-fixture (list {:src "some [1]" :expected true}))))
;; ── stringPostfix (10 fixtures) ──────────────────────────────
(defsuite
@@ -695,13 +698,14 @@
(deftest "null-not-exist" (for-each run-hs-fixture (list {:src "null does not exist" :expected true})))
(deftest "undef-not-exist" (for-each run-hs-fixture (list {:src "undefined does not exist" :locals {:undefined nil} :expected true}))))
(defsuite "hs-compat-where"
(deftest
"where-with-property"
(let
((items (list {:age 15 :name "Alice"} {:age 30 :name "Bob"})))
(let
((r (eval-hs "items where its age > 20" {:locals {:items items}})))
(assert= (len r) 1 "one match"))))
(assert= (len r) 1 "one match")))))
(defsuite
"hs-0990-collection-ops"

View File

@@ -1,3 +1,5 @@
(defsuite "import-bind"
;; Tests for define-library, import, and the import suspension mechanism
;; ============================================================
@@ -190,3 +192,5 @@
(assert=
(cek-try (fn () inner-secret) (fn (e) "not-found"))
"not-found"))
)

View File

@@ -1,3 +1,5 @@
(defsuite "let-match"
;; Tests for let-match — CEK special form and bytecode compiler desugaring
;; let-match destructures a dict: (let-match {:key var} expr body...)
@@ -45,3 +47,5 @@
(let-match {:a a :b b} (dict :a (+ 1 2) :b (* 3 4))
(assert= a 3)
(assert= b 12)))
)

View File

@@ -1,7 +1,10 @@
;; Math completeness tests. C9 note: this file used NESTED deftests with no
;; defsuite, so every test reported with an empty suite label (" > sin").
;; Restructured to defsuite/deftest — labels are now "math > sin" etc.
(deftest
"math completeness"
(deftest
(defsuite
"math"
(defsuite
"trigonometry"
(deftest
"sin"
@@ -124,7 +127,10 @@
true
(< (abs (- (cos (acos 0.5)) 0.5)) 0.0001)
"cos(acos(x)) = x")
(assert= true (< (abs (- (exp (log 2)) 2)) 0.0001) "exp(log(x)) = x")
(assert=
true
(< (abs (- (exp (log 2)) 2)) 0.0001)
"exp(log(x)) = x")
(assert=
(* 12 18)
(* (gcd 12 18) (lcm 12 18))

View File

@@ -1,3 +1,5 @@
(defsuite "ports"
;; Phase 14 — String ports + eof-object
(deftest
@@ -230,3 +232,5 @@
(c3 (read-char in))
(when (not (eof-object? c3)) (write-char c3 out)))
(assert= "abc" (get-output-string out) "roundtrip via ports")))))
)

View File

@@ -0,0 +1,93 @@
;; ==========================================================================
;; test-adapter-dom-render.sx — C23 (W14): actual render-OUTPUT tests for
;; the DOM adapter. The pre-existing test-adapter-dom.sx asserts membership
;; predicates only ("if is a render form") — zero tests inspected what
;; render-to-dom actually builds, leaving the 1512-line adapter the
;; thinnest-tested relative to size (hosts.md C23).
;;
;; Runs against the OCaml runner's mock DOM (host-* primitives) through the
;; real (web adapter-dom) library, disk-resolved by import.
;;
;; Adapter output contract (probed 2026-07-04):
;; - text renders as a CHILD TEXT NODE (nodeType 3, textContent set);
;; the parent's own textContent prop stays "" in the mock
;; - control-flow forms (when/map/...) wrap their output in a FRAGMENT
;; child; `if` inlines the chosen branch directly
;; ==========================================================================
(import (web adapter-dom))
(defsuite
"adapter-dom-render-output"
(deftest
"simple element: tag, text child node"
(let
((el (render-to-dom (quote (div "hello")) (make-env))))
(assert= (dom-node-name el) "DIV")
(let
((kids (dom-get-prop el "children")))
(assert= (len kids) 1)
(assert= (dom-get-prop (first kids) "nodeType") 3)
(assert= (dom-text-content (first kids)) "hello"))))
(deftest
"class and id land on the element"
(let
((el (render-to-dom (quote (div :class "card" :id "main" "x")) (make-env))))
(assert= (dom-get-prop el "className") "card")
(assert
(or
(= (dom-get-prop el "id") "main")
(= (get (dom-get-prop el "attributes") "id") "main"))
"id not set as prop or attribute")))
(deftest
"nested children are appended in order"
(let
((el (render-to-dom (quote (ul (li "a") (li "b") (li "c"))) (make-env))))
(let
((kids (dom-get-prop el "children")))
(assert= (len kids) 3)
(assert= (dom-node-name (first kids)) "LI")
(assert=
(dom-text-content
(first (dom-get-prop (nth kids 2) "children")))
"c"))))
(deftest
"void element renders with no children"
(let
((el (render-to-dom (quote (input :type "text")) (make-env))))
(assert= (dom-node-name el) "INPUT")
(assert= (len (dom-get-prop el "children")) 0)))
(deftest
"when false yields an empty fragment"
(let
((el (render-to-dom (quote (div (when false (span "hidden")))) (make-env))))
(let
((kids (dom-get-prop el "children")))
(assert= (len kids) 1)
(assert= (dom-node-name (first kids)) "FRAGMENT")
(assert= (len (dom-get-prop (first kids) "children")) 0))))
(deftest
"when true renders the branch inside its fragment"
(let
((el (render-to-dom (quote (div (when true (span "shown")))) (make-env))))
(let
((frag (first (dom-get-prop el "children"))))
(assert= (dom-node-name frag) "FRAGMENT")
(assert=
(dom-node-name (first (dom-get-prop frag "children")))
"SPAN"))))
(deftest
"map render form produces one child per element inside its fragment"
(let
((el (render-to-dom (quote (ul (map (fn (x) (li x)) (list "1" "2" "3")))) (make-env))))
(let
((frag (first (dom-get-prop el "children"))))
(assert= (dom-node-name frag) "FRAGMENT")
(let
((items (dom-get-prop frag "children")))
(assert= (len items) 3)
(assert= (dom-node-name (first items)) "LI")))))
(deftest
"if render form inlines the chosen branch directly"
(let
((el (render-to-dom (quote (div (if true (b "yes") (i "no")))) (make-env))))
(assert= (dom-node-name (first (dom-get-prop el "children"))) "B"))))