diff --git a/.claude/scheduled_tasks.lock b/.claude/scheduled_tasks.lock index 2077752d..958a0e62 100644 --- a/.claude/scheduled_tasks.lock +++ b/.claude/scheduled_tasks.lock @@ -1 +1 @@ -{"sessionId":"c4d97db1-361c-4a04-a99b-c838f9385469","pid":2426590,"procStart":"349789073","acquiredAt":1780789990975} \ No newline at end of file +{"sessionId":"d510140d-6197-40b7-9bd2-125ca304ad7a","pid":697492,"procStart":"125353752","acquiredAt":1783118024555} \ No newline at end of file diff --git a/hosts/ocaml/browser/eval_wasm_probes.js b/hosts/ocaml/browser/eval_wasm_probes.js new file mode 100755 index 00000000..91034a49 --- /dev/null +++ b/hosts/ocaml/browser/eval_wasm_probes.js @@ -0,0 +1,68 @@ +#!/usr/bin/env node +// eval_wasm_probes.js — W14/F8: evaluate a file of probe expressions (one +// per line, '#'-comments allowed) on the SHIPPED browser kernel and print +// PROBE +// per line, for diffing against the native server (scripts/test-differential.sh). +// Boot stubs mirror test_wasm_native.js / run_wasm_corpus.js. + +const fs = require('fs'); +const path = require('path'); + +const PROJECT_ROOT = path.resolve(__dirname, '../../..'); +const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm'); + +const probeFile = process.argv[2]; +if (!probeFile) { console.error('usage: eval_wasm_probes.js '); process.exit(2); } + +global.window = global; +global.document = { + createElement: () => ({ style: {}, setAttribute() {}, appendChild() {}, children: [] }), + createDocumentFragment: () => ({ appendChild() {}, children: [], childNodes: [] }), + head: { appendChild() {} }, body: { appendChild() {} }, + querySelector: () => null, querySelectorAll: () => [], + createTextNode: s => ({ textContent: s }), addEventListener() {}, + createComment: s => ({ textContent: s || '' }), + getElementsByTagName: () => [], +}; +global.localStorage = { getItem: () => null, setItem() {}, removeItem() {} }; +global.CustomEvent = class { constructor(n, o) { this.type = n; this.detail = (o || {}).detail || {}; } }; +global.MutationObserver = class { observe() {} disconnect() {} }; +global.requestIdleCallback = fn => setTimeout(fn, 0); +global.matchMedia = () => ({ matches: false }); +global.navigator = { serviceWorker: { register: () => Promise.resolve() } }; +global.location = { href: '', pathname: '/', hostname: 'localhost' }; +global.history = { pushState() {}, replaceState() {} }; +global.fetch = () => Promise.resolve({ ok: true, text: () => Promise.resolve('') }); + +async function main() { + require(path.join(WASM_DIR, 'sx_browser.bc.wasm.js')); + const K = await new Promise((resolve, reject) => { + let tries = 0; + const poll = setInterval(() => { + if (globalThis.SxKernel) { clearInterval(poll); resolve(globalThis.SxKernel); } + else if (++tries > 200) { clearInterval(poll); reject(new Error('SxKernel not found')); } + }, 50); + }); + + const lines = fs.readFileSync(probeFile, 'utf8').split('\n'); + let n = 0; + for (const raw of lines) { + const line = raw.trim(); + if (!line || line.startsWith('#')) continue; + n++; + let out; + try { + // Serialize through the kernel's own printer so both hosts emit SX + // text (K.eval returns raw JS values otherwise — [object Object]). + const r = K.eval(`(sx-serialize ${line})`); + out = (typeof r === 'string') ? r : String(r); + } catch (e) { + out = 'ERROR'; + } + // errors normalized: kernel returns "Error: ..." strings for eval errors + if (typeof out === 'string' && out.startsWith('Error')) out = 'ERROR'; + console.log(`PROBE ${n} ${out.replace(/\n/g, '\\n')}`); + } +} + +main().catch(e => { console.error('FATAL:', e.message); process.exit(1); }); diff --git a/hosts/ocaml/browser/run_wasm_corpus.js b/hosts/ocaml/browser/run_wasm_corpus.js new file mode 100755 index 00000000..92c6141f --- /dev/null +++ b/hosts/ocaml/browser/run_wasm_corpus.js @@ -0,0 +1,170 @@ +#!/usr/bin/env node +// run_wasm_corpus.js — W14/F2: run spec-test files through the SHIPPED +// browser kernel (sx_browser.bc.wasm.js), headless in Node. +// +// The review (conformance.md F-2) found no runner feeds spec/tests through +// the shipped browser artifact — F-1/F-3 host divergences existed +// undetected precisely because of that. This runs ONE test file per +// invocation (process isolation: a hanging file is killed by the driver's +// timeout without taking down the sweep) and prints a parseable summary: +// CORPUS-RESULT pass= fail= status=ok|load-error +// +// Usage: node hosts/ocaml/browser/run_wasm_corpus.js spec/tests/test-eval.sx +// Driver: scripts/test-wasm-corpus.sh (sweeps the corpus, applies skips). +// +// Boot stubs and module preload mirror test_wasm_native.js (the blessed +// boot path for the shipped kernel). + +const fs = require('fs'); +const path = require('path'); + +const PROJECT_ROOT = path.resolve(__dirname, '../../..'); +const WASM_DIR = path.join(PROJECT_ROOT, 'shared/static/wasm'); + +const target = process.argv[2]; +if (!target) { console.error('usage: run_wasm_corpus.js '); process.exit(2); } + +// --- DOM stubs (as test_wasm_native.js) --- +function makeElement(tag) { + const el = { + tagName: tag, _attrs: {}, _children: [], style: {}, + childNodes: [], children: [], textContent: '', + nodeType: 1, + setAttribute(k, v) { el._attrs[k] = String(v); }, + getAttribute(k) { return el._attrs[k] || null; }, + removeAttribute(k) { delete el._attrs[k]; }, + appendChild(c) { el._children.push(c); el.childNodes.push(c); el.children.push(c); return c; }, + insertBefore(c) { el._children.push(c); el.childNodes.push(c); el.children.push(c); return c; }, + removeChild(c) { return c; }, + replaceChild(n) { return n; }, + cloneNode() { return makeElement(tag); }, + addEventListener() {}, removeEventListener() {}, dispatchEvent() {}, + get innerHTML() { + return el._children.map(c => { + if (c._isText) return c.textContent || ''; + if (c._isComment) return ''; + return c.outerHTML || ''; + }).join(''); + }, + set innerHTML(v) { el._children = []; el.childNodes = []; el.children = []; }, + get outerHTML() { + let s = '<' + tag; + for (const k of Object.keys(el._attrs).sort()) s += ` ${k}="${el._attrs[k]}"`; + s += '>'; + if (['br', 'hr', 'img', 'input', 'meta', 'link'].includes(tag)) return s; + return s + el.innerHTML + ''; + }, + dataset: new Proxy({}, { + get(_, k) { return el._attrs['data-' + k.replace(/[A-Z]/g, c => '-' + c.toLowerCase())]; }, + set(_, k, v) { el._attrs['data-' + k.replace(/[A-Z]/g, c => '-' + c.toLowerCase())] = v; return true; } + }), + querySelectorAll() { return []; }, + querySelector() { return null; }, + }; + return el; +} + +global.window = global; +global.document = { + createElement: makeElement, + createDocumentFragment() { return makeElement('fragment'); }, + head: makeElement('head'), body: makeElement('body'), + querySelector() { return null; }, querySelectorAll() { return []; }, + createTextNode(s) { return { _isText: true, textContent: String(s), nodeType: 3 }; }, + addEventListener() {}, + createComment(s) { return { _isComment: true, textContent: s || '', nodeType: 8 }; }, + getElementsByTagName() { return []; }, +}; +global.localStorage = { getItem() { return null; }, setItem() {}, removeItem() {} }; +global.CustomEvent = class { constructor(n, o) { this.type = n; this.detail = (o || {}).detail || {}; } }; +global.MutationObserver = class { observe() {} disconnect() {} }; +global.requestIdleCallback = fn => setTimeout(fn, 0); +global.matchMedia = () => ({ matches: false }); +global.navigator = { serviceWorker: { register() { return Promise.resolve(); } } }; +global.location = { href: '', pathname: '/', hostname: 'localhost' }; +global.history = { pushState() {}, replaceState() {} }; +global.fetch = () => Promise.resolve({ ok: true, text() { return Promise.resolve(''); } }); +global.XMLHttpRequest = class { open() {} send() {} }; + +async function main() { + require(path.join(WASM_DIR, 'sx_browser.bc.wasm.js')); + const K = await new Promise((resolve, reject) => { + let tries = 0; + const poll = setInterval(() => { + if (globalThis.SxKernel) { clearInterval(poll); resolve(globalThis.SxKernel); } + else if (++tries > 200) { clearInterval(poll); reject(new Error('SxKernel not found after 10s')); } + }, 50); + }); + + // --- 8 FFI host primitives (as test_wasm_native.js) --- + K.registerNative('host-global', args => (args[0] in globalThis) ? globalThis[args[0]] : null); + K.registerNative('host-get', args => { + const [obj, prop] = args; + if (obj == null) return null; + const v = obj[prop]; + return v === undefined ? null : v; + }); + K.registerNative('host-set!', args => { if (args[0] != null) args[0][args[1]] = args[2]; return args[2]; }); + K.registerNative('host-call', args => { + const [obj, method, ...rest] = args; + if (obj == null || typeof obj[method] !== 'function') return null; + const r = obj[method].apply(obj, rest); + return r === undefined ? null : r; + }); + K.registerNative('host-new', args => new (Function.prototype.bind.apply(args[0], [null, ...args.slice(1)]))); + K.registerNative('host-callback', args => function () { return K.callFn(args[0], Array.from(arguments)); }); + K.registerNative('host-typeof', args => typeof args[0]); + K.registerNative('host-await', args => args[0]); + + K.eval('(define SX_VERSION "wasm-corpus-1.0")'); + K.eval('(define SX_ENGINE "ocaml-vm-wasm-corpus")'); + K.eval('(define parse sx-parse)'); + K.eval('(define serialize sx-serialize)'); + + // --- Web stack modules (source form; bytecode covered elsewhere) --- + const sxDir = path.join(WASM_DIR, 'sx'); + const modules = [ + 'render', 'core-signals', 'signals', 'deps', 'router', 'page-helpers', 'freeze', + 'bytecode', 'compiler', 'vm', 'dom', 'browser', + 'adapter-html', 'adapter-sx', 'adapter-dom', + 'boot-helpers', 'hypersx', + 'harness', 'harness-reactive', 'harness-web', + 'engine', 'orchestration', 'boot', + ]; + if (K.beginModuleLoad) K.beginModuleLoad(); + for (const mod of modules) { + K.load(fs.readFileSync(path.join(sxDir, mod + '.sx'), 'utf8')); + } + if (K.endModuleLoad) K.endModuleLoad(); + + // --- Test framework hooks --- + let pass = 0, fail = 0; + const suiteStack = []; + K.registerNative('report-pass', () => { pass++; return null; }); + K.registerNative('report-fail', args => { + fail++; + const suitePath = suiteStack.join(' > '); + console.error(`FAIL: ${suitePath ? suitePath + ' > ' : ''}${args[0]}\n ${args[1]}`); + return null; + }); + K.registerNative('push-suite', args => { suiteStack.push(args[0]); return null; }); + K.registerNative('pop-suite', () => { suiteStack.pop(); return null; }); + K.eval('(define test-allowed? (fn (name) true))'); + K.eval('(define try-call (fn (thunk) (let ((result (cek-try thunk (fn (err) err)))) (if (and (= (type-of result) "string") (starts-with? result "Error")) {"ok" false "error" result} {"ok" true "error" nil}))))'); + + K.load(fs.readFileSync(path.join(PROJECT_ROOT, 'spec/tests/test-framework.sx'), 'utf8')); + + // --- Run the target file --- + const rel = path.relative(PROJECT_ROOT, path.resolve(target)); + let status = 'ok'; + try { + K.load(fs.readFileSync(path.resolve(target), 'utf8')); + } catch (e) { + status = 'load-error'; + console.error(`LOAD-ERROR: ${rel}: ${e.message}`); + } + console.log(`CORPUS-RESULT ${rel} pass=${pass} fail=${fail} status=${status}`); + process.exit(status !== 'ok' || fail > 0 ? 1 : 0); +} + +main().catch(e => { console.error('FATAL:', e.message); process.exit(1); }); diff --git a/plans/agent-briefings/sx-gate-loop.md b/plans/agent-briefings/sx-gate-loop.md index f449a75c..4d37d610 100644 --- a/plans/agent-briefings/sx-gate-loop.md +++ b/plans/agent-briefings/sx-gate-loop.md @@ -1,57 +1,348 @@ -# sx-gate loop — W14 test gate (first live test of git→gitea→agentic→tmux) + -**Forge agent:** `agents/ws-W14` in the `rose-ash/sx-review` forge (git-sx/gitea-sx/agentic-sx). -**Goal (from the forge briefing):** make the verification infrastructure trustworthy — runner env -== production env, a WASM corpus runner, harness honesty, and pinning tests for the fixes already -landed. This is **W14** in `plans/sx-review/PLAN.md` (read that section — it lists the findings). -**Findings:** C0b C9 C21 C22 C23 C3 C4 C5 C6 C7 F2 F3 F4 F5 F6 F7 F8 F9 F10 F11 F12 K19 K104. +# W14 — Test gate & conformance infrastructure loop -## Why this workstream first -The review's prime directive: no semantic fix should merge before its pinning test + a working -gate exist, because the verification infra currently can't tell you whether a fix works. W14 -produces that infra. It changes **no language semantics**, so it cannot regress the 5762p/274f -baseline — the ideal first payload while we test the agentic launch technology. +Forge agent **ws-W14**. Role: build out **W14** from the SX review remediation plan +(`plans/sx-review/PLAN.md`, §"W14. Test gate & conformance infrastructure") — +*the enabler that makes every other fix verifiable*. One checklist item per fire. -## Hard guardrails (this is a monitored test loop) -- **Commit locally, do NOT push.** No `git push` at all. (This is a test; the maintainer reviews - before anything reaches origin.) -- **Stay in W14 scope** — tests, runners, harness, gate tooling. Do NOT edit `spec/*.sx`, - `hosts/ocaml/lib/*.ml`, or any language semantics. If a task tempts you toward semantics, skip it - and note why in the Progress log. -- **Never `pkill sx_server`** (shared binary). Bound every `sx_server`/build/test with `timeout`. -- You are on branch `loops/sx-gate` in worktree `/root/rose-ash-loops/sx-gate`. Build/test here only. -- If the OCaml build or full suite is involved, compare against the recorded baseline - **5762 passed / 274 failed** (fail set is the 273 hs-* + 1 r7rs radix; see PLAN W14/F10). +You are on branch `loops/sx-ws-w14`, worktree `/root/rose-ash-loops/sx-ws-w14`. -## One iteration per fire — pick the first unchecked `[ ]`, implement, test, commit (no push), -tick the box, prepend one dated line to the Progress log, then stop. +## Hard guardrails (read every fire) -- [ ] **Pin the dc7aa709 quick-wins batch.** Add regression tests (spec/tests/ or a new suite) that - lock in the fixes that currently have none: K09 `unquote-splicing` longhand splices; K11 guard - re-raise sentinel is unforgeable (`(guard (e (true (list 'quoted x))) ...)` returns the list); - K18 `(expt 2 100)` is a float not 0; K20 `(contains? {:a 1} :a)` is true; K39 `(do ((fn (x) x) 5) 99)` - → 99; K49 the five void elements render. (K02 is already non-vacuously covered.) Confirm they pass - on the current binary. -- [ ] **Pin C1/C1b/S4 at the host level** (a small OCaml or shell test): a malformed command line - returns an error response and the process survives; an error page is not cached. -- [ ] **WASM corpus runner (F2).** Stand up a Node harness that runs a curated spec/tests subset - against the shipped WASM kernel (seed: the conformance lane's `run_wasm.js` pattern, referenced in - PLAN). Curated subset, not the full 6k (js_of_ocaml is ~24s/test — see F18). Wire it as a script. -- [ ] **Harness honesty (C22/K104):** make `spec/harness.sx` log the IO call *before* invoking the - mock so a throwing mock is recorded. Add a test that a throwing mock leaves a log entry. -- [ ] **Runner-vs-prod env audit (F7/K42):** list every binding that exists only in `run_tests.ml` - but not the production kernel env (`values`/`call-with-values` are the known ones). Write the audit - to `plans/sx-review/runner-env-gap.md`. (Fixing them is later; the audit is the W14 task.) -- [ ] **Protocol fuzz suite (C3/C4/C5/C6):** a bounded test that feeds the epoch loop malformed - lines (`(epoch)`, `(epoch foo)`, stray `(io-response …)`, two-exprs-per-line) and asserts the - process never dies and responses stay correctly tagged. -- [ ] **hs-upstream skip-list (F10/F18):** make the native runner's 272 hs-* failures a skip-list so - a red FAIL column means something. Record the count moved. +- **TEST-ONLY.** No semantics edits. Do NOT touch `spec/evaluator.sx`, + `spec/primitives.sx`, `spec/parser.sx`, `spec/render.sx`, the OCaml kernel, + or any host runtime. W14 pins behavior with tests and productionizes the + *test/runner* surface; the actual fixes are other workstreams (W1–W12). + A pin that *fails* means the finding regressed — do NOT relax the assertion, + record it as a blocker. +- **NO PUSH.** Commit locally on `loops/sx-ws-w14` only. Never push; never touch + `main` or `architecture`. +- **`.sx` files: use `sx-tree` MCP tools only** (a hook blocks Read/Write/Edit + on `.sx`). `sx_write_file` takes params **`file`** and **`source`** (NOT + `content` — a wrong key yields a `yojson … got null` error and no write). + `.md`/`.sh`/`.ml` files: normal tools are fine. +- **Never `pkill`/`kill` `sx_server`** — sibling loops share the binary. Bound + every run with `timeout` (e.g. `timeout 300 …`); if it hangs, let the timeout end it. +- **One item per fire, then stop.** No batching. + +## Per-iteration procedure + +1. Pick the first unchecked `[ ]` in the checklist. +2. Implement (test file or runner/harness change), lifting minimal repros from + the review lane files (`plans/sx-review/{core,hosts,conformance}.md`) — they + are a ready-made corpus of confirmed reprs. +3. Build + run the affected tests: + `sx_build` (target ocaml) then + `timeout 300 ./hosts/ocaml/_build/default/bin/run_tests.exe ` + to run a single file. New `spec/tests/test-*.sx` files are auto-discovered. +4. Confirm green (a pin must PASS on current HEAD — the fix already landed). +5. Commit locally: `git add -A && git commit` with a `W14:` prefix. +6. Tick the box, prepend one dated line to the Progress log, stop. + +## Checklist + +### A. Test-debt pins — dc7aa709's landed fixes shipped without regression tests +Pin each confirmed-and-fixed finding with a minimal repro. Add suites to +`spec/tests/test-gate-pins.sx` (one `defsuite` per finding). + +- [x] K18 [W7] — `expt` overflow now float-promotes (no 63-bit wrap) +- [x] K20 [W7] — `contains?` now supports dict key membership +- [x] K09/K11/K39 [W5] — longhand `unquote-splicing`, guard sentinel gensym, `do` IIFE-head +- [x] K49 [W8] — five void elements (area/base/embed/param/track) renderable + (spec side; native regen drift → see Blocked). NB: the depth/cycle guard + is K16 [W8], still OPEN — not a W14 pin target until its fix lands +- [x] crit-2 [W1] — signal-return kont pinned NON-VACUOUSLY (side-effect + sentinel across two tests; a plain assert would inherit the vacuity) +- [x] C1/C1b [W3] — command-channel crash guards pinned + (`scripts/test-protocol-gate.sh`, seed for section E's fuzz suite) +- [x] S4 [hosts] — soft error pages not cached (HTTP-mode pin in + `scripts/test-protocol-gate.sh`; NB S4 lives in hosts.md, not + conformance — "housekeeping" was a mislabel from F-15's tag) + +### B. Runner/production env unification +- [x] Audit runner-only bindings — inventory + bidirectional ledger in + `scripts/test-env-parity.sh` (KNOWN_DRIFT: values, call-with-values, + contains-char?, trim-right, sha3-256; consequence pin: + canonical-serialize broken on server; BOTH runners' sha3-256 are FAKE + stubs → test CIDs ≠ production CIDs) + +### C. Harness honesty +- [x] K19 — harness/runtime parity pinned (`scripts/test-harness-parity.sh`: + drives mcp_tree sx_eval over JSON-RPC vs fresh sx_server over epoch, + 12-probe battery from the finding, errors compared by message) +- [x] C22/K104 — FIXED harness (spec/harness.sx make-interceptor: log entry + appended before the mock runs, :result updated via dict-set!) + 3 pins +- [x] C21 — BUILT `harness-run-perform` (spec/harness.sx): drives real CEK + suspend/resume, services performs from session mocks, C22-style + logging; 5 pins incl. the S10 map-over-perform probe (CEK keeps all + elements — the drop class is serving-JIT-side). Runner-only (needs + cek-* driver bindings) +- [x] C23 — adapter-dom render-output tests + (`web/tests/test-adapter-dom-render.sx`, 8 tests vs runner mock DOM; + follow-up depth still open: boolean attrs, on-*/bind/ref/key, + reactive attrs, hydration cursor) + +### D. WASM corpus runner +- [x] F2 — BUILT `hosts/ocaml/browser/run_wasm_corpus.js` (one file per + node process, shipped-kernel boot per test_wasm_native.js) + + `scripts/test-wasm-corpus.sh` sweep driver with SKIP/KNOWN_FAIL + ledger. Baseline: 83 files, 80 fully green, 5192 passes, 0 test + failures; 3 partial load-errors (hash-table/r7rs/sets, opaque jsoo + exception mid-file). Full sweep ~13 min — wiring into + sx-build-all.sh left as maintainer call (gate definition D3) + +### E. Epoch-loop protocol fuzz + skip-list +- [x] C3/C4/C5/C6/C7 — protocol-quirk ledger (pins current behavior, + bidirectional) + seeded 60-line fuzz-liveness property in + `scripts/test-protocol-gate.sh` (11/11) +- [x] F10 — expected-failures BASELINE GATE instead of a skip-list + (`scripts/test-suite-baseline.sh` + `spec/tests/known-failures.txt`, + 273 pinned: 271 hs-* + 2 empty-suite-label entries → C9 evidence). + New failure OR vanished failure = red; hs loops' scoreboards untouched +- [x] C9 — empty suite labels ELIMINATED: 6 files had suite-less top-level + deftests (chars 43, import-bind 14, ports 12, let-match 8, math + nested-deftests, 4 hs strays) — wrapped/restructured into defsuites; + baseline identities updated in the same commit + +### F. Differential battery +- [x] F8 — cross-host differential battery: `spec/tests/differential-probes.txt` + (49 probes) × native server vs shipped WASM kernel via + `scripts/test-differential.sh` + `eval_wasm_probes.js`. 46 agree, + 3 ledgered KNOWN_DIVERGENT (F-3: bare-server `apply` does not spread — + runner masks it, F-7 class). Refinement: the F-1 float-display + divergence is a K.eval JS-boundary artifact — guest `sx-serialize` + output AGREES across hosts + +**CHECKLIST COMPLETE 2026-07-04** — all W14 items delivered. Open handoffs: +sx_render.ml regen drift (Blocked, hosts lane), adapter-dom depth tests, +3 WASM load-error bisects (hash-table/r7rs/sets), CI wiring of the four +gate scripts (D3 maintainer decision). ## Progress log (newest first) - -- (none yet — first fire will add the first entry) -## Recording back to the forge -After each commit, note the sha here; the maintainer (or a later step) records it as a -`test`-kind commit on `agents/ws-W14` in the forge so the program stays the system of record. +- 2026-07-04 — **F8 differential battery — CHECKLIST COMPLETE**. Committed + replacement for the review's ephemeral 130-probe corpus: + `spec/tests/differential-probes.txt` (49 probes across F-1 int/float + display, K18 overflow, F-3 apply + dict order, S-4 float printing, + strings/collections/special forms/error cases) evaluated on the native + server (epoch protocol) and the shipped WASM kernel + (`eval_wasm_probes.js`, guest `sx-serialize`), diffed by + `scripts/test-differential.sh` with a KNOWN_DIVERGENT ledger (heal → + red → delete entry). Result: 46/49 agree; 3 divergences, all one root + cause — **bare sx_server's `apply` does not spread its arg list** + ((apply + (list 1 2 3)) → "Expected number, got list"; WASM spreads + correctly; the test runner masks it with its own apply — F-7 class). + Finding refinement: F-1's float-display divergence (0.3 vs 0.3000…4) is + purely a K.eval JS-boundary artifact — guest-serialized output agrees. + W14 delivered: 7 pin suites (spec/tests/test-gate-pins.sx, 29 tests), + 4 gate scripts (protocol-gate 11, env-parity 7, harness-parity 12, + wasm-corpus 83-file, suite-baseline 273-pin, differential 49-probe), + 2 harness capabilities (C22 log-first, C21 perform-mode), C9 label + cleanup, adapter-dom render coverage. Test-only throughout. +- 2026-07-04 — **C9 empty-suite labels (item E.3) — section E COMPLETE**. + The sweep found the defect much wider than the finding: SIX files carried + suite-less top-level deftests (test-chars 43, test-import-bind 14, + test-ports 12, test-let-match 8, test-math as deftest-nested-in-deftest, + test-hyperscript-conformance 4 strays between suites). Fixes: file-level + defsuite wraps (validated via sx_validate after mechanical wrap), + test-math restructured deftest→defsuite (labels now "math > sin"), + hs strays wrapped in section-comment-named suites (hs-compat- + blockLiteral/cookies/some/where). The two baseline-visible identities + renamed in known-failures.txt in the SAME commit. Full-gate validated + GREEN (5798p/273f — −2 passes are the wrapper deftests that no longer + self-report; fail set byte-identical). Test-only. +- 2026-07-04 — **F10 baseline gate (item E.2)**. Deliberately NOT a + skip-list: skip-listing the hs red band in the runner would rewrite the + hs loops' scoreboards mid-flight. Instead + `scripts/test-suite-baseline.sh` diffs the full suite's FAIL set against + checked-in `spec/tests/known-failures.txt` (273 entries: 271 hs-* + 2 + with EMPTY suite labels — live C9 evidence, `can-map-an-array` "map with + block" and `string->number` 2-arg, the "r7rs radix shadow"). Red on a + NEW failure (regression) and red on a VANISHED failure (fix landed — + delete from baseline, locking in the win). Identity = "suite > name" + with error text stripped (messages churn). Current suite: 5800p/273f + (up 38 passes from dc7aa709's 5762 — sections A–D added pins). Validated + end-to-end: GREEN, exit 0, ~12 min runtime. Test-only. +- 2026-07-04 — **C3–C7 protocol fuzz suite (item E.1)**. All five findings + are still OPEN server-side (sx_server.ml fixes are host-runtime work), + so the suite pins CURRENT behavior as a bidirectional ledger — verified + each live first: C3 stray io-response → extra Unknown-command reply + (dead 13-vs-14-char guard); C4 malformed (epoch) → error reply + stale + epoch tag (envelope changed since the finding: dc7aa709's guard now + answers rather than ignores); C5 decreasing epoch accepted; C6 two + commands one line → one error, neither runs; C7 vm-trace sans compiler → + opaque "Not callable: nil". Plus a real fuzz property: 60 + deterministically-seeded hostile lines (unbalanced parens, control chars, + unicode, 2KB lines, stray io-responses, epoch mutations) then a + well-formed command — server must still answer and exit cleanly. + protocol-gate now 11/11. When a server fix lands, the matching ledger + pin fails loudly → update to assert the corrected behavior. Test-only. +- 2026-07-04 — **F2 WASM corpus runner (section D COMPLETE)**. The review's + headline conformance gap: no runner ever fed spec/tests through the + SHIPPED browser artifact (F-1/F-3 divergences existed undetected). Built + `run_wasm_corpus.js` (boots sx_browser.bc.wasm.js headless in Node with + the test_wasm_native.js stub block, loads the 23 web-stack modules, + registers framework hooks, runs ONE file per process → parseable + `CORPUS-RESULT` line; process isolation means a hung file can't kill the + sweep) + `scripts/test-wasm-corpus.sh` (sweep driver, SKIP/KNOWN_FAIL + ledger with green-flip detection). **Empirical baseline: 83 files, 80 + fully green, 5192 passes, ZERO test failures on the shipped kernel** — + including test-gate-pins (29/29) and test-letrec-resume (the kernel + provides cek-* driver bindings, broader than bare sx_server). 3 partial + load-errors (test-hash-table 22p, test-r7rs 87p, test-sets 30p — opaque + jsoo exception mid-file, diagnosing which form = follow-up). Full sweep + ~13 min; CI wiring deferred to the D3 gate-definition decision. Test-only. +- 2026-07-04 — **C23 adapter-dom render-output tests (item C.4) — section C + COMPLETE**. Key discovery: the "browser-only" exclusion of adapter-dom + testing is FALSE for render output — `(import (web adapter-dom))` + disk-resolves in the OCaml runner and `render-to-dom` works against its + mock DOM (dom-* → host-* → mock elements). New + `web/tests/test-adapter-dom-render.sx` (8 tests): tag/text-child-node, + class+id, ordered children, void element, when-false empty FRAGMENT, + when-true branch-in-fragment, map N-children-in-fragment, if inlines + branch. Probed the adapter's output contract first (text = nodeType-3 + child; control flow = FRAGMENT wrapper; if inlines). Auto-included in + default runs (not on the exclusion list) — first render-output coverage + of the 1512-line adapter in the standard gate. Follow-up depth (boolean + attrs, on-*/bind/ref/key, reactive, hydration) noted on the checklist. + 254/0 standalone. Test-only. +- 2026-07-04 — **C21 perform-mode harness (item C.3)**. Added + `harness-run-perform` to spec/harness.sx (exported): drives + `make-cek-state`/`cek-step-loop`, services each + `(perform {:op X :args L})` suspension from the session's platform mocks + (entry logged before invocation, C22-consistent), `cek-resume`s with the + mock value, loops to terminal. Self-recursion via the `(self self …)` + pattern (avoids letrec-injection K06 territory). Extracted the arity + dispatch into shared `harness-invoke-mock`. 5 pins in + `gate-C21-perform-mode-harness` — notably the **S10 probe**: `(map (fn (u) + (perform …)) '("a" "b" "c"))` keeps ALL elements through 3 suspensions on + the CEK path, confirming the element-drop class is serving-JIT-side, not + CEK. Caveat noted in the docstring: needs the runner's cek-* driver + bindings (absent on bare sx_server/MCP — the env-parity theme again). + 290/0. Test-infra-only. +- 2026-07-04 — **C22/K104 throwing-mock fix + pins (item C.2)**. First + actual FIX of the loop — in scope because spec/harness.sx is W14-owned + test infrastructure (PLAN approach item 4 assigns "log IO before invoking + the mock" to W14). TDD: reproduced pre-fix (caught error, 0 log entries), + then restructured `make-interceptor` to append the entry BEFORE the mock + runs (`:result nil` while pending, `dict-set!` in place on return). + Verified: throwing mock leaves entry, happy path updates result, mixed + sequence counts all 3. Added suite `gate-C22-throwing-mock-logged` + (3 tests). Harness self-suite (15) + test-relate-picker (only other + harness consumer) green; 285/0 pins run. Tooling notes: replace/insert + tools take `new_source` (not `replacement`); find_all paths still + disagree with read_subtree/replace_node on define-library files — + sx_write_file remains the reliable route. Test-infra-only. +- 2026-07-04 — **K19 harness-parity pin (item C.1)**. Authored + `scripts/test-harness-parity.sh`: drives `mcp_tree.exe` `sx_eval` with + raw JSON-RPC over stdio and a fresh `sx_server.exe` over the epoch + protocol, running the finding's exact 12-probe battery (empty?/get/ + split/equal?/contains?/keyword-name/char-code/parse-number) through both + and failing on ANY divergence. Errors normalized to their inner message + so identical failures compare equal (`keyword-name :kw` errors the same + way on both — keywords evaluate to strings before the call). Result: + 12/12 parity — dc7aa709's 8-entry stopgap alignment holds; this pin keeps + it honest until the real fix (mcp_tree links sx_primitives) lands in the + hosts lane. Test-only. +- 2026-07-04 — **Section B: env-parity audit + ledger**. Probed a fresh + `sx_server` over the epoch protocol (`deps-check` + live eval). Confirmed + runner-only drift: `values`/`call-with-values` (run_tests.ml:1131/1140), + `contains-char?` (rt.ml:728 + rt.js:85), `trim-right` (**JS runner only** + — absent even from the OCaml runner), `sha3-256` (rt.ml:745 + rt.js:88). + Consequence verified live: `(canonical-serialize 42)` on the server → + `Undefined symbol: contains-char?` (content addressing broken for ANY + number outside the runners). **Worse than the finding**: BOTH runners' + `sha3-256` are FAKE stubs (OCaml uses `Hashtbl.hash`!) while production + has real `crypto-sha3-256` — every CID computed in tests differs from + production CIDs. Authored `scripts/test-env-parity.sh` as a bidirectional + ledger: MUST_HAVE regressions fail; a KNOWN_DRIFT binding *appearing* + also fails (forces ledger + consequence-pin update when W5/W7/W12 land + fixes). 7/7 green. Test-only. +- 2026-07-04 — **S4 error-page-cache pin (item A.7) — section A COMPLETE**. + Extended `scripts/test-protocol-gate.sh` with an HTTP-mode case: fresh + `sx_server.exe --http ` (timeout-bounded, own PID killed at + end), GET the same nonexistent path twice, assert BOTH requests re-render + (2 `[sx-http]` lines — pre-fix the 2nd was cache-served at 0.0005s) and + the `[cache] … error page, not cached` is_err gate line appears. Findings + from prototyping: standalone worktree renders ALL docs pages as soft error + pages (no content), so a positive "real page IS cached" control is not + assertable here — documented in the script; startup takes ~12-15s (poll + loop, 40s budget). 5/5 protocol-gate green + 267/0 sx pins. Test-only. +- 2026-07-04 — **C1/C1b command-channel pins (item A.6)**. These are + protocol-level, not .sx-suite pins: authored + `scripts/test-protocol-gate.sh` — each case spawns its OWN timeout-bounded + `sx_server.exe` (no shared process touched) and asserts three things: an + `(error N "Malformed command line: ...")` response is emitted, the + follow-up epoch still evaluates (process survived), and no `Fatal error` + escapes / exit is clean. Cases: C1 unterminated list (exact review repro), + C1 plain-garbage line, C1b non-ASCII byte (`café`), plus a well-formed + control session. 4/4 green. The script is deliberately structured to grow + into section E's fuzz suite (C3–C7). Test-only. +- 2026-07-04 — **crit-2 non-vacuous pin (item A.5)**. The original bug's + signature — handler value becomes the WHOLE program result, discarding + every outer frame *including the covering test's own assert* — means a + plain `(assert= repro expected)` pin would pass vacuously on regression. + Added suite `gate-crit2-signal-return-kont` with a **side-effect sentinel**: + test 1 runs both repros (`("outer" 43 "end")` list shape + `raise-continuable` + → 143) then `set!`s a top-level flag; test 2 independently asserts the flag + — if the continuation is ever dropped again, test 1 "passes" but test 2 + fails loudly. Third test pins the exact shipped-test expr (51). Verified + both repro shapes live via sx_eval first. 267 passed / 0 failed. Test-only. +- 2026-07-03 — **K49 void-elements pin (item A.4) + regen-drift DISCOVERY**. + Corrected the checklist label first: K49 is "five void elements + unrenderable" (core.md:335), not the depth guard (that's K16, OPEN). Added + suite `gate-K49-void-elements-renderable` (3 tests): spec `HTML_TAGS` + contains all five; `(render-to-html '(base :href "x") (make-env))` → + ``; all five render self-closing. Runner-env gotchas: + `current-env`/`symbol` are not bound in run_tests — use `(make-env)` and + literal quoted forms. **Discovery:** the first draft pinned via the + runner's native `render-html` and FAILED — `hosts/ocaml/lib/sx_render.ml` + (generated) was never regenerated after dc7aa709's spec fix, so the native + render path still errors on the five tags. Recorded under Blocked; live + evidence for F13 (regen-diff gate). 264 passed / 0 failed. Test-only. +- 2026-07-03 — **K09/K11/K39 W5 special-form pins (item A.3)**. Three suites + added to `spec/tests/test-gate-pins.sx`: `gate-K09-longhand-unquote-splicing` + (R7RS longhand `(unquote-splicing X)` now splices, incl. empty-list case; + shorthand still works), `gate-K11-guard-reraise-forgeable` (a body/clause + value shaped like `(list '__guard-reraise__ X)` is returned as data, not + misread as a re-raise — sentinel is now gensym'd), `gate-K39-do-iife-head` + (`(do ((fn (x) x) 5) 99)` → 99, not a misparsed do-loop — exact core.md + repro). Gotchas hit and fixed: quasiquoted bare idents are *symbols* not + strings, and `assert=` compares with `=` (not `equal?`, which returns false + on these spliced lists). 261 passed / 0 failed under OCaml run_tests. Test-only. +- 2026-07-03 — **K20 contains?-dict pin (item A.2)**. Mapped K-codes by + core.md severity order (K17 append!, K18 expt, K19 harness-drift, K20 + contains?-dict). Added suite `gate-K20-contains-dict` to + `spec/tests/test-gate-pins.sx` (4 tests): present dict key → true, missing + key → false, list membership unchanged, string substring unchanged. Repro + from core.md ("(contains? {:a 1} :a) threw `contains?: 2 args`"). 8/8 green + across both suites under OCaml run_tests. Test-only. +- 2026-07-03 — **K18 expt-overflow pin (item A.1)**. Bootstrapped this briefing + from PLAN.md §W14 (the referenced file did not exist yet). Added + `spec/tests/test-gate-pins.sx` with suite `gate-K18-expt-overflow` (4 tests): + small exponents stay exact (`2^0=1`, `2^10=1024`), `2^62 > 0` (no negative + 63-bit wrap), `2^100 > 0` (no wrap-to-zero), `2^100` is a number (float + promotion). Verified 4/4 green under the OCaml run_tests kernel. Test-only. + +## Blocked +- **K49 native path — sx_render.ml regen drift** (found 2026-07-03 while + pinning A.4): dc7aa709 fixed HTML_TAGS in `spec/render.sx` but never re-ran + `hosts/ocaml/bootstrap_render.py`, so the generated + `hosts/ocaml/lib/sx_render.ml` still carries a stale `html_tags_list` + without area/base/embed/param/track. The runner's native `render-html` + convenience (and any native fast-path render) therefore STILL throws + `Undefined symbol: base` — dc7aa709's "verified on the native binary" claim + did not cover this path. Fix = regen (hosts lane, semantics-adjacent — out + of scope for this test-only loop). This is a live instance of **F13** + (regen-diff CI gate, section-B/D territory): a regen-diff check would have + caught it at commit time. The K49 pin covers the spec side only; when the + regen lands, extend the suite with `render-html`-path assertions. diff --git a/scripts/test-differential.sh b/scripts/test-differential.sh new file mode 100755 index 00000000..6fe366b3 --- /dev/null +++ b/scripts/test-differential.sh @@ -0,0 +1,94 @@ +#!/bin/bash +# test-differential.sh — W14/F8: cross-host differential battery. +# +# Evaluates every expression in spec/tests/differential-probes.txt on: +# A) the native server (sx_server.exe, epoch protocol) — its printer +# B) the SHIPPED browser kernel (eval_wasm_probes.js, guest sx-serialize) +# and diffs the outputs. The review's original 130-probe corpus was +# ephemeral (F-8); this is the committed replacement. +# +# KNOWN_DIVERGENT is the ledger of confirmed, still-open divergences — +# keyed by the probe EXPRESSION. Red on a NEW divergence (host drift) and +# red on a HEALED one (fix landed: delete the entry, locking in parity). +# +# Method note (finding refinement, 2026-07-04): comparing raw K.eval +# JS-boundary values shows float-display divergences (0.3 vs +# 0.30000000000000004) that DISAPPEAR under guest-level (sx-serialize …) — +# the F-1 float-display class is a JS-boundary artifact, not a kernel +# serialization divergence. This battery compares guest serialization. +set -uo pipefail +cd "$(dirname "$0")/.." + +SERVER=hosts/ocaml/_build/default/bin/sx_server.exe +WASM=shared/static/wasm/sx_browser.bc.wasm.js +PROBES=spec/tests/differential-probes.txt +[[ -x "$SERVER" ]] || { echo "SKIP: $SERVER not built" >&2; exit 2; } +[[ -f "$WASM" ]] || { echo "SKIP: $WASM missing" >&2; exit 2; } + +# --- KNOWN_DIVERGENT ledger (verified live 2026-07-04) ------------------- +# F-3/K53: bare sx_server's `apply` does NOT spread its argument list — +# (apply + (list 1 2 3)) errors "Expected number, got list"; (apply str l) +# returns the serialized list as one string. The WASM kernel spreads +# correctly. The test runner masks this with its own apply (F-7 class). +declare -A KNOWN_DIVERGENT +KNOWN_DIVERGENT['(apply + (list 1 2 3))']="F-3: native apply does not spread" +KNOWN_DIVERGENT['(apply max (list 1 5 2))']="F-3: native apply does not spread" +KNOWN_DIVERGENT['(apply str (list "a" "b"))']="F-3: native apply does not spread" + +native=$(mktemp); wasm=$(mktemp) + +python3 - "$SERVER" "$PROBES" > "$native" <<'PY' +import json, subprocess, sys +server, probefile = sys.argv[1], sys.argv[2] +probes = [l.strip() for l in open(probefile) if l.strip() and not l.startswith('#')] +inp = [] +for i, p in enumerate(probes): + inp.append(f"(epoch {i+1})") + inp.append(f"(eval {json.dumps(p)})") +out = subprocess.run(["timeout", "120", server], input="\n".join(inp) + "\n", + capture_output=True, text=True).stdout +res, cur = {}, None +for l in out.splitlines(): + if l.startswith("(ok-len "): + cur = int(l.split()[1]); res[cur] = None + elif l.startswith("(error "): + idx = int(l.split()[1]); res[idx] = "ERROR"; cur = None + elif cur is not None and res.get(cur) is None: + res[cur] = l; cur = None +for i, p in enumerate(probes): + print(f"PROBE {i+1} {res.get(i+1, '')}") +PY + +timeout 300 node hosts/ocaml/browser/eval_wasm_probes.js "$PROBES" > "$wasm" 2>/dev/null + +pass=0; fail=0; i=0 +while IFS= read -r expr; do + [[ -z "$expr" || "$expr" == \#* ]] && continue + i=$((i+1)) + a=$(sed -n "s/^PROBE $i //p" "$native") + b=$(sed -n "s/^PROBE $i //p" "$wasm") + known="${KNOWN_DIVERGENT[$expr]:-}" + if [[ "$a" == "$b" ]]; then + if [[ -n "$known" ]]; then + echo "RED: $expr — KNOWN_DIVERGENT now AGREES ($known); delete from ledger" + fail=$((fail+1)) + else + pass=$((pass+1)) + fi + else + if [[ -n "$known" ]]; then + echo "KNOWN-DIVERGENT: $expr ($known)" + pass=$((pass+1)) + else + echo "RED: $expr" + echo " native: $a" + echo " wasm: $b" + fail=$((fail+1)) + fi + fi +done < <(grep -v '^\s*#' "$PROBES" | grep -v '^\s*$') + +rm -f "$native" "$wasm" +echo +echo "differential: $i probes, $pass in agreement/ledgered, $fail red" +[[ $fail -eq 0 ]] diff --git a/scripts/test-env-parity.sh b/scripts/test-env-parity.sh new file mode 100755 index 00000000..3babf652 --- /dev/null +++ b/scripts/test-env-parity.sh @@ -0,0 +1,100 @@ +#!/bin/bash +# test-env-parity.sh — W14 section-B ledger: runner env vs production env. +# +# The review (F7, K42, JS5, core.md "canonical.sx depends on test-runner-only +# helpers") found bindings that exist ONLY in the test runners, so suites +# pass against an environment production never provides. Rule (PLAN.md W14): +# "if the spec needs it, it's a kernel primitive; if not, the test can't +# have it." +# +# This script is a LEDGER, not a wish: it asserts today's confirmed drift +# stays exactly as recorded. Both directions fail loudly: +# - a MUST_HAVE going missing on the server -> regression, fix the kernel +# - a KNOWN_DRIFT binding appearing on the server -> the fix landed; +# move it to MUST_HAVE and update the consequence pins below. +# +# Confirmed inventory (2026-07-04, all verified live over the epoch protocol): +# +# binding OCaml runner JS runner fresh sx_server +# values real (rt.ml:1131) ? ABSENT +# call-with-values real (rt.ml:1140) ? ABSENT +# contains-char? real (rt.ml:728) real (:85) ABSENT +# trim-right ABSENT real (:87) ABSENT +# sha3-256 FAKE Hashtbl.hash FAKE stub ABSENT (real = crypto-sha3-256) +# +# Consequences (pinned in section 3): +# - (canonical-serialize 42) on a fresh server errors "Undefined symbol: +# contains-char?" -> content addressing broken for ANY number outside +# the test runners. +# - every CID computed inside run_tests uses a FAKE hash, so test CIDs +# never equal production CIDs (crypto-sha3-256 is real SHA3). +# +# Each probe spawns its OWN timeout-bounded sx_server.exe. No shared process. +set -uo pipefail + +cd "$(dirname "$0")/.." +SERVER=hosts/ocaml/_build/default/bin/sx_server.exe + +if [[ ! -x "$SERVER" ]]; then + echo "SKIP: $SERVER not built (run sx_build target=ocaml first)" >&2 + exit 2 +fi + +pass=0 +fail=0 + +# deps_unresolved EXPR -> prints the (unresolved ...) list for EXPR on a fresh server +deps_unresolved() { + printf '(epoch 1)\n(deps-check "%s")\n' "$1" \ + | timeout 60 "$SERVER" 2>/dev/null \ + | grep -o ':unresolved ([^)]*)' || true +} + +# --- Section 1: MUST_HAVE — spec-needed bindings production must provide --- +MUST_HAVE_EXPR='(list (equal? 1 1) (apply + (list 1 2)) (contains? {:a 1} :a) (crypto-sha3-256 \"x\") (split \"a-b\" \"-\"))' +unres=$(deps_unresolved "$MUST_HAVE_EXPR") +if [[ -z "$unres" || "$unres" == ':unresolved ()' ]]; then + echo "PASS: MUST_HAVE core bindings all resolve on fresh sx_server" + pass=$((pass+1)) +else + echo "FAIL: MUST_HAVE binding missing on fresh sx_server: $unres" + fail=$((fail+1)) +fi + +# --- Section 2: KNOWN_DRIFT — runner-only bindings, asserted ABSENT ------- +# If one of these starts resolving, its kernel fix landed: move it to +# MUST_HAVE above and update the consequence pin in section 3. +for name in values call-with-values contains-char? trim-right sha3-256; do + unres=$(deps_unresolved "($name)") + if grep -q -- "$name" <<<"$unres"; then + echo "PASS: KNOWN_DRIFT '$name' still absent on fresh sx_server (ledger accurate)" + pass=$((pass+1)) + else + echo "FAIL: KNOWN_DRIFT '$name' now RESOLVES on fresh sx_server — fix landed?" + echo " Update this ledger: move '$name' to MUST_HAVE and revisit section 3." + fail=$((fail+1)) + fi +done + +# --- Section 3: consequence pin — canonical.sx on the production server --- +# Current reality: canonical-serialize of ANY number errors on a fresh +# server because canonical-number calls runner-only contains-char?. +out=$(printf '(epoch 1)\n(load "spec/canonical.sx")\n(epoch 2)\n(eval "(canonical-serialize 42)")\n' \ + | timeout 60 "$SERVER" 2>&1) +if grep -q 'error 2 .*contains-char?' <<<"$out"; then + echo "PASS: consequence pin — canonical-serialize on numbers still broken on server (as recorded)" + pass=$((pass+1)) +elif grep -q '^(ok 2 ' <<<"$out"; then + echo "FAIL: consequence pin — canonical-serialize 42 now WORKS on the server." + echo " The canonical-helpers fix landed: flip this pin to assert success" + echo " and pin the exact canonical form + CID stability." + fail=$((fail+1)) +else + echo "FAIL: consequence pin — unexpected server output:" + sed 's/^/ /' <<<"$out" + fail=$((fail+1)) +fi + +echo +echo "env-parity: $pass passed, $fail failed" +[[ $fail -eq 0 ]] diff --git a/scripts/test-harness-parity.sh b/scripts/test-harness-parity.sh new file mode 100755 index 00000000..1192ac3c --- /dev/null +++ b/scripts/test-harness-parity.sh @@ -0,0 +1,107 @@ +#!/bin/bash +# test-harness-parity.sh — W14 section-C pin for K19 (harness honesty). +# +# K19 (review, core.md): the MCP tree server (mcp_tree.ml) carries a +# PARALLEL primitive table, and it drifted from the real runtime +# (sx_primitives.ml) — e.g. (get {:a 1} :a 99) returned nil in the harness +# but 1 in production, (split "a--b" "--") was char-class vs substring. +# CLAUDE.md mandates harness verification, so drift silently produces +# false findings/passes. dc7aa709 aligned 8 entries as a stopgap; the real +# fix (mcp_tree links sx_primitives directly) is hosts-lane work. +# +# This pin runs the finding's exact probe battery through BOTH environments +# — mcp_tree.exe sx_eval (JSON-RPC over stdio) and a fresh sx_server.exe +# (epoch protocol) — and fails on ANY divergence. Errors are compared by +# message, values by serialized form. Both subprocesses are fresh and +# timeout-bounded; no shared process is touched. +# +# Exit: 0 = full parity; 1 = drift (harness lies about the runtime again). +set -uo pipefail +cd "$(dirname "$0")/.." + +MCP=hosts/ocaml/_build/default/bin/mcp_tree.exe +SERVER=hosts/ocaml/_build/default/bin/sx_server.exe +for bin in "$MCP" "$SERVER"; do + if [[ ! -x "$bin" ]]; then + echo "SKIP: $bin not built (run sx_build target=ocaml first)" >&2 + exit 2 + fi +done + +python3 - "$MCP" "$SERVER" <<'PYEOF' +import json, re, subprocess, sys + +MCP, SERVER = sys.argv[1], sys.argv[2] + +# K19 probe battery — the finding's confirmed drift cases + stopgap entries. +PROBES = [ + '(empty? "")', '(empty? {})', + '(get {:a 1} :a 99)', '(get {:a 1} :zz 99)', '(get (list 10 20) 1)', + '(split "a--b" "--")', '(split "abc" "")', + '(equal? (list 1 2) (list 1 2))', + '(contains? {:a 1} :a)', '(keyword-name :kw)', + '(char-code "A")', '(parse-number "42")', +] + +def norm_error(msg): + """Extract the quoted inner error message so harness/server error + envelopes compare equal when the underlying failure is the same.""" + m = re.search(r'Unhandled exception: \\?"(.*?)\\?"', msg) + if m: + return " " + m.group(1) + return " " + msg.strip()[:80] + +# --- harness side: mcp_tree sx_eval over JSON-RPC --- +lines = [ + json.dumps({"jsonrpc": "2.0", "id": 1, "method": "initialize", + "params": {"protocolVersion": "2024-11-05", "capabilities": {}, + "clientInfo": {"name": "parity", "version": "0"}}}), + json.dumps({"jsonrpc": "2.0", "method": "notifications/initialized"}), +] +for i, p in enumerate(PROBES): + lines.append(json.dumps({"jsonrpc": "2.0", "id": 100 + i, "method": "tools/call", + "params": {"name": "sx_eval", "arguments": {"expr": p}}})) +out = subprocess.run(["timeout", "60", MCP], input="\n".join(lines) + "\n", + capture_output=True, text=True).stdout +harness = {} +for l in out.splitlines(): + try: + j = json.loads(l) + except ValueError: + continue + if isinstance(j.get("id"), int) and j["id"] >= 100: + txt = j.get("result", {}).get("content", [{}])[0].get("text", "").strip() + if txt.startswith("Error:") or j.get("result", {}).get("isError"): + txt = norm_error(txt) + harness[j["id"] - 100] = txt + +# --- server side: fresh sx_server over the epoch protocol --- +inp = [] +for i, p in enumerate(PROBES): + inp.append(f"(epoch {i + 1})") + inp.append(f"(eval {json.dumps(p)})") +sout = subprocess.run(["timeout", "60", SERVER], input="\n".join(inp) + "\n", + capture_output=True, text=True).stdout +server, cur = {}, None +for l in sout.splitlines(): + if l.startswith("(ok-len "): + cur = int(l.split()[1]); server[cur - 1] = None + elif l.startswith("(error "): + idx = int(l.split()[1]); server[idx - 1] = norm_error(l); cur = None + elif cur is not None and server.get(cur - 1) is None: + server[cur - 1] = l.strip(); cur = None + +fails = 0 +for i, p in enumerate(PROBES): + h = harness.get(i, "") + s = server.get(i, "") + if h == s: + print(f"PASS: {p:40s} both -> {h!r}") + else: + print(f"FAIL: {p:40s} harness={h!r} server={s!r}") + fails += 1 + +print() +print(f"harness-parity: {len(PROBES) - fails} passed, {fails} failed") +sys.exit(1 if fails else 0) +PYEOF diff --git a/scripts/test-protocol-gate.sh b/scripts/test-protocol-gate.sh new file mode 100755 index 00000000..12eb8cbf --- /dev/null +++ b/scripts/test-protocol-gate.sh @@ -0,0 +1,233 @@ +#!/bin/bash +# test-protocol-gate.sh — W14 pins for the epoch/command-channel protocol. +# +# Pins C1/C1b (review, plans/sx-review/hosts.md): a malformed or non-ASCII +# line on the top-level command channel used to raise an uncaught +# Sx_types.Parse_error and KILL the whole sx_server process (the shared +# channel used by bridges and conformance runners). Fixed in dc7aa709: +# the server now answers `(error N "Malformed command line: ...")` and +# keeps serving. +# +# Each case spawns its OWN timeout-bounded sx_server.exe subprocess — +# no shared/sibling process is ever touched. Designed to grow into the +# W14 section-E protocol fuzz suite (C3-C7). +# +# Usage: bash scripts/test-protocol-gate.sh +# Exit: 0 = all pins green; 1 = a pin failed (fix regressed). +set -uo pipefail + +cd "$(dirname "$0")/.." +SERVER=hosts/ocaml/_build/default/bin/sx_server.exe + +if [[ ! -x "$SERVER" ]]; then + echo "SKIP: $SERVER not built (run sx_build target=ocaml first)" >&2 + exit 2 +fi + +pass=0 +fail=0 + +# run_case NAME INPUT EXPECT_SENTINEL +# Feeds INPUT to a fresh server. Asserts: +# 1. an (error ... "Malformed command line: ...") response is emitted +# 2. the follow-up epoch still evaluates (EXPECT_SENTINEL in output) +# 3. the process exits cleanly (no Fatal error, exit 0 on stdin EOF) +run_case() { + local name="$1" input="$2" sentinel="$3" + local out rc + out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1) + rc=$? + local ok=1 + if ! grep -q 'Malformed command line' <<<"$out"; then + echo "FAIL: $name — no malformed-line error response"; ok=0 + fi + if ! grep -q "^${sentinel}\$" <<<"$out"; then + echo "FAIL: $name — follow-up epoch did not run (process died?)"; ok=0 + fi + if grep -q 'Fatal error' <<<"$out"; then + echo "FAIL: $name — Fatal error escaped to the top level"; ok=0 + fi + if [[ $rc -ne 0 ]]; then + echo "FAIL: $name — nonzero exit ($rc)"; ok=0 + fi + if [[ $ok -eq 1 ]]; then + echo "PASS: $name" + pass=$((pass+1)) + else + echo " --- output ---"; sed 's/^/ /' <<<"$out"; echo " --------------" + fail=$((fail+1)) + fi +} + +# C1: unterminated list on the command channel (exact review repro) +run_case "C1 unterminated list survives" \ + '(epoch 2)\n(eval "(+ 1 2"\n(epoch 3)\n(eval "99")\n' \ + '99' + +# C1: plain-garbage line (second C1 repro shape) +run_case "C1 garbage line survives" \ + '(epoch 1)\nnot an s-expr ]]] {{{\n(epoch 2)\n(eval "42")\n' \ + '42' + +# C1b: non-ASCII byte on the command channel (exact review repro; \xc3\xa9 = é) +run_case "C1b non-ASCII line survives" \ + '(epoch 1)\n(eval (quote caf\xc3\xa9))\n(epoch 2)\n(eval "99")\n' \ + '99' + +# Control: a well-formed session still works end to end +ctrl=$(printf '(epoch 1)\n(eval "(+ 40 2)")\n' | timeout 60 "$SERVER" 2>&1) +if grep -q '^42$' <<<"$ctrl"; then + echo "PASS: control well-formed session" + pass=$((pass+1)) +else + echo "FAIL: control well-formed session"; sed 's/^/ /' <<<"$ctrl" + fail=$((fail+1)) +fi + +# --------------------------------------------------------------------------- +# C3–C7 protocol-quirk LEDGER (hosts.md, all OPEN server-side). These pin +# CURRENT behavior, verified live 2026-07-04 — they are documentation, not +# endorsement. When a server fix lands and a pin fails, update the ledger +# to assert the corrected behavior (bidirectional, like test-env-parity.sh). +# --------------------------------------------------------------------------- + +# ledger_case NAME INPUT GREP_MUST GREP_MUST2 +ledger_case() { + local name="$1" input="$2" must="$3" must2="${4:-}" + local out + out=$(printf '%b' "$input" | timeout 60 "$SERVER" 2>&1) + local ok=1 + grep -q -- "$must" <<<"$out" || { echo "FAIL: $name — expected: $must"; ok=0; } + if [[ -n "$must2" ]]; then + grep -q -- "$must2" <<<"$out" || { echo "FAIL: $name — expected: $must2"; ok=0; } + fi + if grep -q 'Fatal error' <<<"$out"; then + echo "FAIL: $name — process died"; ok=0 + fi + if [[ $ok -eq 1 ]]; then echo "PASS: $name"; pass=$((pass+1)); + else echo " --- output ---"; sed 's/^/ /' <<<"$out"; fail=$((fail+1)); fi +} + +# C3: stray (io-response ...) is answered as Unknown command (dead guard) — +# an EXTRA response the client didn't ask for; process keeps serving. +ledger_case "C3 ledger: stray io-response gets an extra error reply" \ + '(epoch 1)\n(io-response 1 42)\n(eval "5")\n' \ + 'Unknown command: (io-response 1 42)' '^5$' + +# C4: malformed (epoch) doesn't update the epoch — next reply tagged with +# the OLD epoch (0 here), i.e. stale from the client's viewpoint. +ledger_case "C4 ledger: malformed epoch marker leaves epoch stale" \ + '(epoch)\n(eval "2")\n' \ + '(ok-len 0 1)' '^2$' + +# C5: no monotonic-epoch enforcement — a decreasing epoch is accepted. +ledger_case "C5 ledger: decreasing epoch accepted silently" \ + '(epoch 9)\n(epoch 3)\n(eval "42")\n' \ + '(ok-len 3 2)' '^42$' + +# C6: two commands on one line -> one error, NEITHER executed. +ledger_case "C6 ledger: two commands on one line both dropped" \ + '(epoch 1)\n(eval "1") (eval "2")\n(eval "3")\n' \ + 'Expected single command, got 2' '^3$' + +# C7: vm-trace without the compiler loaded errors opaquely. +ledger_case "C7 ledger: vm-trace sans compiler is opaque Not-callable-nil" \ + '(epoch 1)\n(vm-trace "(+ 1 2)")\n' \ + 'Not callable: nil' + +# --------------------------------------------------------------------------- +# Fuzz-liveness property: after 60 deterministic hostile lines (unbalanced +# parens, control chars, unicode, long lines, stray io-responses, epoch +# mutations), the server must still answer a well-formed command and exit +# cleanly. Seeded PRNG — reproducible corpus. +# --------------------------------------------------------------------------- +fuzz=$(python3 - <<'PY' +import random +r = random.Random(1404) +lines = [] +frag = ['(', ')', '((', '))', '(eval', '(epoch', 'io-response', '"', '\\', + 'café', '\x01', '\x1b[2J', ':kw', '{', '}', '(+ 1', 'nil)', '#|', '|#'] +for i in range(60): + kind = r.randrange(5) + if kind == 0: + lines.append(''.join(r.choice(frag) for _ in range(r.randrange(1, 8)))) + elif kind == 1: + lines.append('(epoch ' + r.choice(['', 'foo', '-1', '999999999999999999999', ')']) + ')') + elif kind == 2: + lines.append('(io-response %d %s' % (r.randrange(99), r.choice([')', '', '42']) )) + elif kind == 3: + lines.append('x' * r.randrange(200, 2000)) + else: + lines.append('(eval "' + r.choice(['(+ 1', '(list', '\\\\', '((((']) + '")') +print('\n'.join(lines)) +PY +) +out=$(printf '%s\n(epoch 777)\n(eval "\\"alive\\"")\n' "$fuzz" | timeout 90 "$SERVER" 2>&1) +rc=$? +if grep -q '^"alive"$' <<<"$out" && ! grep -q 'Fatal error' <<<"$out" && [[ $rc -eq 0 ]]; then + echo "PASS: fuzz-liveness — server survives 60 hostile lines and still answers" + pass=$((pass+1)) +else + echo "FAIL: fuzz-liveness (rc=$rc)"; tail -6 <<<"$out" | sed 's/^/ /' + fail=$((fail+1)) +fi + +# --------------------------------------------------------------------------- +# S4 (review, hosts.md): soft error pages must NOT be stored in the HTTP +# response cache. Pre-fix, a routing-failure page was cached as HTTP 200 and +# served byte-identically from cache to every later visitor (cold 2s → warm +# 0.0005s, ONE render line). Post-fix (dc7aa709), http_render_page returns +# (html, is_error) and cache insertion is gated on `not is_err` (the skip is +# logged as "[cache] → error page, not cached"). +# +# Pin: GET the same nonexistent path twice against a fresh --http server and +# assert BOTH requests re-render (two [sx-http] render lines) plus the +# is_err gate line appearing in the log. NB: in a standalone worktree all +# docs pages render as soft error pages (no content), so a positive +# "real page IS cached" control is not assertable here. +# --------------------------------------------------------------------------- +s4_case() { + local port=$((18000 + RANDOM % 2000)) + local log; log=$(mktemp) + timeout 90 "$SERVER" --http "$port" >"$log" 2>&1 & + local srv=$! + local up=0 + for _ in $(seq 1 40); do + if curl -s -o /dev/null "http://localhost:$port/" 2>/dev/null; then up=1; break; fi + sleep 1 + done + if [[ $up -ne 1 ]]; then + echo "FAIL: S4 — http server did not come up on :$port" + kill "$srv" 2>/dev/null; rm -f "$log" + fail=$((fail+1)); return + fi + local miss="/sx/gate-pin-missing-$$-$RANDOM" + curl -s -o /dev/null "http://localhost:$port$miss" + curl -s -o /dev/null "http://localhost:$port$miss" + sleep 1 + local renders + renders=$(grep -c "sx-http\] $miss " "$log") + local ok=1 + if [[ "$renders" -ne 2 ]]; then + echo "FAIL: S4 — expected 2 renders of $miss (not cache-served), got $renders" + ok=0 + fi + if ! grep -q 'error page, not cached' "$log"; then + echo "FAIL: S4 — is_err cache gate line absent from server log" + ok=0 + fi + if [[ $ok -eq 1 ]]; then + echo "PASS: S4 soft error page not cached (both GETs re-rendered)" + pass=$((pass+1)) + else + echo " --- log tail ---"; tail -12 "$log" | sed 's/^/ /'; echo " ---------------" + fail=$((fail+1)) + fi + kill "$srv" 2>/dev/null + rm -f "$log" +} +s4_case + +echo +echo "protocol-gate: $pass passed, $fail failed" +[[ $fail -eq 0 ]] diff --git a/scripts/test-suite-baseline.sh b/scripts/test-suite-baseline.sh new file mode 100755 index 00000000..4c51583d --- /dev/null +++ b/scripts/test-suite-baseline.sh @@ -0,0 +1,61 @@ +#!/bin/bash +# test-suite-baseline.sh — W14/F10: make FAIL mean something again. +# +# The review (conformance.md F-10): the OCaml suite is not green — a +# permanent ~274-failure band (in-progress hs-* + r7rs radix shadow) is +# normalized, so real regressions hide inside the red noise and nobody can +# tell a new failure from the band. +# +# This gate pins the band instead of ignoring it: the full suite's FAIL +# set is diffed against the checked-in baseline +# (spec/tests/known-failures.txt). Two red conditions, both loud: +# NEW failure -> a real regression: fix it (or, if intentional, +# justify + add to the baseline in the same commit) +# VANISHED failure -> something got fixed: delete it from the baseline +# so the win is locked in +# Neither touches the runner or the hs loops' scoreboards — the band still +# prints as FAIL lines for the teams working through it. +# +# Usage: bash scripts/test-suite-baseline.sh +# Runtime: full suite, ~5–15 min. Exit 0 = fail set identical to baseline. +set -uo pipefail +cd "$(dirname "$0")/.." + +RUNNER=hosts/ocaml/_build/default/bin/run_tests.exe +BASELINE=spec/tests/known-failures.txt +[[ -x "$RUNNER" ]] || { echo "SKIP: $RUNNER not built" >&2; exit 2; } +[[ -f "$BASELINE" ]] || { echo "SKIP: $BASELINE missing" >&2; exit 2; } + +log=$(mktemp) +timeout 3000 "$RUNNER" > "$log" 2>&1 +rc=$? +if [[ $rc -ne 0 && $rc -ne 1 ]]; then + echo "RED: runner exited $rc (timeout/crash)"; tail -5 "$log"; rm -f "$log"; exit 1 +fi + +# Normalize: keep the stable test identity (suite > name), drop messages +# (error text may contain addresses/timings that churn). +current=$(mktemp) +grep '^ FAIL: ' "$log" | sed 's/^ FAIL: //; s/: .*$//' | sort -u > "$current" + +new_failures=$(comm -13 <(sort -u "$BASELINE") "$current") +vanished=$(comm -23 <(sort -u "$BASELINE") "$current") + +summary=$(grep '^Results:' "$log" | tail -1) +red=0 +if [[ -n "$new_failures" ]]; then + echo "RED: NEW failures not in baseline:" + sed 's/^/ + /' <<<"$new_failures" + red=1 +fi +if [[ -n "$vanished" ]]; then + echo "RED: baseline entries now PASSING (delete them from $BASELINE):" + sed 's/^/ - /' <<<"$vanished" + red=1 +fi +if [[ $red -eq 0 ]]; then + echo "GREEN: fail set identical to baseline ($(wc -l < "$BASELINE") known failures)" +fi +echo "$summary" +rm -f "$log" "$current" +exit $red diff --git a/scripts/test-wasm-corpus.sh b/scripts/test-wasm-corpus.sh new file mode 100755 index 00000000..dd36ae47 --- /dev/null +++ b/scripts/test-wasm-corpus.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# test-wasm-corpus.sh — W14/F2: sweep the spec test corpus through the +# SHIPPED browser kernel (sx_browser.bc.wasm.js) headless in Node. +# +# The review (conformance.md F-2) found the shipped browser artifact never +# runs the corpus — F-1/F-3 native/WASM divergences existed undetected. +# Each file runs in its OWN node process via run_wasm_corpus.js (a hang is +# killed by per-file timeout without ending the sweep). +# +# The SKIP list documents files that structurally cannot run on the browser +# kernel (runner-only bindings, native-only machinery) — the F-5/F-6/F-10 +# "one-host-gated" theme, recorded honestly per file with the reason. +# KNOWN_FAIL documents files that RUN but currently have failing tests on +# the shipped kernel (host divergence, F-1/F-3 class): they execute and +# report, but don't gate. Everything else must be GREEN — exit 1 otherwise; +# a KNOWN_FAIL going green also fails (ledger must be updated). +# +# Usage: bash scripts/test-wasm-corpus.sh [file.sx ...] +set -uo pipefail +cd "$(dirname "$0")/.." + +RUNNER=hosts/ocaml/browser/run_wasm_corpus.js +KERNEL=shared/static/wasm/sx_browser.bc.wasm.js +[[ -f "$KERNEL" ]] || { echo "SKIP: $KERNEL missing (run sx-build-all first)" >&2; exit 2; } + +# --- classification (empirical sweep 2026-07-04; see sx-gate-loop.md) --- +# Sweep baseline: 83 files, 80 fully green, 5192 passes, 0 test failures. +# The shipped kernel even provides the CEK driver bindings (make-env, +# cek-step-loop, ...) — broader than a bare sx_server. +declare -A SKIP KNOWN_FAIL +skip() { SKIP[$1]=$2; } +known() { KNOWN_FAIL[$1]=$2; } +# Partial load-errors: the kernel throws mid-file (opaque jsoo exception, +# message "undefined"); tests before the failing form pass and report. +known test-hash-table.sx "partial: 22 pass then load-error mid-file" +known test-r7rs.sx "partial: 87 pass then load-error mid-file" +known test-sets.sx "partial: 30 pass then load-error mid-file" + +pass_total=0; fail_total=0; red=0; files=0 +declare -a targets +if [[ $# -gt 0 ]]; then targets=("$@"); +else for f in spec/tests/test-*.sx; do + [[ "$(basename "$f")" == "test-framework.sx" ]] && continue + targets+=("$f") +done; fi + +for f in "${targets[@]}"; do + base=$(basename "$f") + if [[ -n "${SKIP[$base]:-}" ]]; then + echo "SKIP: $base — ${SKIP[$base]}" + continue + fi + files=$((files+1)) + line=$(timeout 120 node "$RUNNER" "$f" 2>/dev/null | grep '^CORPUS-RESULT' || true) + if [[ -z "$line" ]]; then + echo "RED: $base — timeout or crash (no CORPUS-RESULT)" + red=$((red+1)); continue + fi + p=$(sed -n 's/.*pass=\([0-9]*\).*/\1/p' <<<"$line") + fl=$(sed -n 's/.*fail=\([0-9]*\).*/\1/p' <<<"$line") + st=$(sed -n 's/.*status=\([a-z-]*\).*/\1/p' <<<"$line") + pass_total=$((pass_total+p)); fail_total=$((fail_total+fl)) + if [[ -n "${KNOWN_FAIL[$base]:-}" ]]; then + if [[ "$fl" -eq 0 && "$st" == "ok" ]]; then + echo "RED: $base — KNOWN_FAIL is now GREEN (${KNOWN_FAIL[$base]}); update the ledger" + red=$((red+1)) + else + echo "KNOWN-FAIL: $base pass=$p fail=$fl ($( echo "${KNOWN_FAIL[$base]}" ))" + fi + continue + fi + if [[ "$st" != "ok" || "$fl" -ne 0 ]]; then + echo "RED: $base pass=$p fail=$fl status=$st" + red=$((red+1)) + else + echo "OK: $base pass=$p" + fi +done + +echo +echo "wasm-corpus: $files files run, $pass_total passed, $fail_total failed, $red red" +[[ $red -eq 0 ]] diff --git a/spec/harness.sx b/spec/harness.sx index 1f1ae91d..2e1201de 100644 --- a/spec/harness.sx +++ b/spec/harness.sx @@ -1,6 +1,7 @@ ;; Assert condition is truthy, error with message -(define-library (sx harness) +(define-library + (sx harness) (export assert assert= @@ -12,6 +13,7 @@ harness-set! make-interceptor install-interceptors + harness-run-perform io-calls io-call-count io-call-nth @@ -24,71 +26,240 @@ assert-io-result assert-state) (begin - -(define assert (fn (condition msg) (when (not condition) (error (or msg "Assertion failed"))))) - -;; Assert two values are equal -(define assert= (fn (actual expected msg) (when (not (= actual expected)) (error (or msg (str "Expected " expected ", got " actual)))))) - -;; Dict of mock IO operations for testing -(define default-platform {:current-user (fn () nil) :csrf-token (fn () "test-csrf-token") :app-url (fn (service &rest path) "/mock-app-url") :frag (fn (service comp &rest args) "") :sleep (fn (ms) nil) :local-storage-set (fn (key val) nil) :set-cookie (fn (name val &rest opts) nil) :url-for (fn (endpoint &rest args) "/mock-url") :create-element (fn (tag) nil) :request-path (fn () "/") :config (fn (key) nil) :set-attr (fn (el name val) nil) :set-text (fn (el text) nil) :remove-child (fn (parent child) nil) :fetch (fn (url &rest opts) {:status 200 :body "" :ok true}) :query (fn (service name &rest args) (list)) :add-class (fn (el cls) nil) :get-element (fn (id) nil) :now (fn () 0) :abort (fn (code) nil) :action (fn (service name &rest args) {:ok true}) :remove-class (fn (el cls) nil) :append-child (fn (parent child) nil) :request-arg (fn (name) nil) :emit-dom (fn (op &rest args) nil) :local-storage-get (fn (key) nil) :get-cookie (fn (name) nil)}) - -;; Create a test session with mock IO platform -(define make-harness :effects () (fn (&key platform) (let ((merged (if (nil? platform) default-platform (merge default-platform platform)))) {:log (list) :platform merged :state {:cookies {} :storage {} :dom nil}}))) - -;; Clear IO log and state for a new test -(define harness-reset! :effects () (fn (session) (dict-set! session "log" (list)) (dict-set! session "state" {:cookies {} :storage {} :dom nil}) session)) - -;; Append an IO call record to session log -(define harness-log :effects () (fn (session &key op) (let ((log (get session "log"))) (if (nil? op) log (filter (fn (entry) (= (get entry "op") op)) log))))) - -;; Read state value from session store -(define harness-get :effects () (fn (session key) (get (get session "state") key))) - -;; Write state value to session store -(define harness-set! :effects () (fn (session key value) (dict-set! (get session "state") key value) nil)) - -;; Wrap a mock fn to record calls in the IO log -(define make-interceptor :effects () (fn (session op-name mock-fn) (fn (&rest args) (let ((result (if (empty? args) (mock-fn) (if (= 1 (len args)) (mock-fn (first args)) (if (= 2 (len args)) (mock-fn (first args) (nth args 1)) (if (= 3 (len args)) (mock-fn (first args) (nth args 1) (nth args 2)) (apply mock-fn args)))))) (log (get session "log"))) (append! log {:args args :result result :op op-name}) result)))) - -;; Bind all interceptors into the eval environment -(define install-interceptors :effects () (fn (session env) (for-each (fn (key) (let ((mock-fn (get (get session "platform") key)) (interceptor (make-interceptor session key mock-fn))) (env-bind! env key interceptor))) (keys (get session "platform"))) env)) - -;; Query IO log: all calls, or filtered by op name -(define io-calls :effects () (fn (session op-name) (filter (fn (entry) (= (get entry "op") op-name)) (get session "log")))) - -;; Count IO calls, optionally filtered by op name -(define io-call-count :effects () (fn (session op-name) (len (io-calls session op-name)))) - -;; Get the nth IO call record -(define io-call-nth :effects () (fn (session op-name n) (let ((calls (io-calls session op-name))) (if (< n (len calls)) (nth calls n) nil)))) - -;; Get args from the nth call to an operation -(define io-call-args :effects () (fn (session op-name n) (let ((call (io-call-nth session op-name n))) (if (nil? call) nil (get call "args"))))) - -;; Get return value from the nth call to an operation -(define io-call-result :effects () (fn (session op-name n) (let ((call (io-call-nth session op-name n))) (if (nil? call) nil (get call "result"))))) - -;; Assert an IO operation was called at least once -(define assert-io-called :effects () (fn (session op-name) (assert (> (io-call-count session op-name) 0) (str "Expected IO operation " op-name " to be called but it was not")))) - -;; Assert an IO operation was never called -(define assert-no-io :effects () (fn (session op-name) (assert (= (io-call-count session op-name) 0) (str "Expected IO operation " op-name " not to be called but it was called " (io-call-count session op-name) " time(s)")))) - -;; Assert exact call count for an operation -(define assert-io-count :effects () (fn (session op-name expected) (let ((actual (io-call-count session op-name))) (assert (= actual expected) (str "Expected " op-name " to be called " expected " time(s) but was called " actual " time(s)"))))) - -;; Assert args of the nth call match expected -(define assert-io-args :effects () (fn (session op-name n expected-args) (let ((actual (io-call-args session op-name n))) (assert (equal? actual expected-args) (str "Expected call " n " to " op-name " with args " (str expected-args) " but got " (str actual)))))) - -;; Assert result of the nth call matches expected -(define assert-io-result :effects () (fn (session op-name n expected) (let ((actual (io-call-result session op-name n))) (assert (equal? actual expected) (str "Expected call " n " to " op-name " to return " (str expected) " but got " (str actual)))))) - -;; Assert a state key has the expected value -(define assert-state :effects () (fn (session key expected) (let ((actual (harness-get session key))) (assert (equal? actual expected) (str "Expected state " key " to be " (str expected) " but got " (str actual)))))) - - -)) ;; end define-library + (define + assert + (fn + (condition msg) + (when (not condition) (error (or msg "Assertion failed"))))) + (define + assert= + (fn + (actual expected msg) + (when + (not (= actual expected)) + (error (or msg (str "Expected " expected ", got " actual)))))) + (define default-platform {:get-cookie (fn (name) nil) :local-storage-get (fn (key) nil) :emit-dom (fn (op &rest args) nil) :request-arg (fn (name) nil) :append-child (fn (parent child) nil) :remove-class (fn (el cls) nil) :action (fn (service name &rest args) {:ok true}) :abort (fn (code) nil) :now (fn () 0) :get-element (fn (id) nil) :add-class (fn (el cls) nil) :query (fn (service name &rest args) (list)) :fetch (fn (url &rest opts) {:ok true :body "" :status 200}) :remove-child (fn (parent child) nil) :set-text (fn (el text) nil) :set-attr (fn (el name val) nil) :config (fn (key) nil) :request-path (fn () "/") :create-element (fn (tag) nil) :url-for (fn (endpoint &rest args) "/mock-url") :set-cookie (fn (name val &rest opts) nil) :local-storage-set (fn (key val) nil) :sleep (fn (ms) nil) :frag (fn (service comp &rest args) "") :app-url (fn (service &rest path) "/mock-app-url") :csrf-token (fn () "test-csrf-token") :current-user (fn () nil)}) + (define + make-harness + :effects () + (fn + (&key platform) + (let + ((merged (if (nil? platform) default-platform (merge default-platform platform)))) + {:state {:dom nil :storage {} :cookies {}} :platform merged :log (list)}))) + (define + harness-reset! + :effects () + (fn + (session) + (dict-set! session "log" (list)) + (dict-set! session "state" {:dom nil :storage {} :cookies {}}) + session)) + (define + harness-log + :effects () + (fn + (session &key op) + (let + ((log (get session "log"))) + (if + (nil? op) + log + (filter (fn (entry) (= (get entry "op") op)) log))))) + (define + harness-get + :effects () + (fn (session key) (get (get session "state") key))) + (define + harness-set! + :effects () + (fn + (session key value) + (dict-set! (get session "state") key value) + nil)) + (define + harness-invoke-mock + :effects () + (fn + (mock-fn args) + (if + (empty? args) + (mock-fn) + (if + (= 1 (len args)) + (mock-fn (first args)) + (if + (= 2 (len args)) + (mock-fn (first args) (nth args 1)) + (if + (= 3 (len args)) + (mock-fn + (first args) + (nth args 1) + (nth args 2)) + (apply mock-fn args))))))) + (define + make-interceptor + :effects () + (fn + (session op-name mock-fn) + (fn + (&rest args) + (let + ((entry {:op op-name :result nil :args args}) (log (get session "log"))) + (append! log entry) + (let + ((result (harness-invoke-mock mock-fn args))) + (dict-set! entry "result" result) + result))))) + (define + install-interceptors + :effects () + (fn + (session env) + (for-each + (fn + (key) + (let + ((mock-fn (get (get session "platform") key)) + (interceptor (make-interceptor session key mock-fn))) + (env-bind! env key interceptor))) + (keys (get session "platform"))) + env)) + (define + harness-run-perform + :effects () + (fn + (session expr env) + (let + ((drive (fn (self state) (if (cek-suspended? state) (let ((req (cek-io-request state))) (let ((op (get req "op")) (args (or (get req "args") (list)))) (let ((mock-fn (get (get session "platform") op))) (when (nil? mock-fn) (error (str "harness-run-perform: no mock for op " op))) (let ((entry {:op op :result nil :args args}) (log (get session "log"))) (append! log entry) (let ((result (harness-invoke-mock mock-fn args))) (dict-set! entry "result" result) (self self (cek-resume state result))))))) (cek-value state))))) + (drive drive (cek-step-loop (make-cek-state expr env (list))))))) + (define + io-calls + :effects () + (fn + (session op-name) + (filter + (fn (entry) (= (get entry "op") op-name)) + (get session "log")))) + (define + io-call-count + :effects () + (fn (session op-name) (len (io-calls session op-name)))) + (define + io-call-nth + :effects () + (fn + (session op-name n) + (let + ((calls (io-calls session op-name))) + (if (< n (len calls)) (nth calls n) nil)))) + (define + io-call-args + :effects () + (fn + (session op-name n) + (let + ((call (io-call-nth session op-name n))) + (if (nil? call) nil (get call "args"))))) + (define + io-call-result + :effects () + (fn + (session op-name n) + (let + ((call (io-call-nth session op-name n))) + (if (nil? call) nil (get call "result"))))) + (define + assert-io-called + :effects () + (fn + (session op-name) + (assert + (> (io-call-count session op-name) 0) + (str "Expected IO operation " op-name " to be called but it was not")))) + (define + assert-no-io + :effects () + (fn + (session op-name) + (assert + (= (io-call-count session op-name) 0) + (str + "Expected IO operation " + op-name + " not to be called but it was called " + (io-call-count session op-name) + " time(s)")))) + (define + assert-io-count + :effects () + (fn + (session op-name expected) + (let + ((actual (io-call-count session op-name))) + (assert + (= actual expected) + (str + "Expected " + op-name + " to be called " + expected + " time(s) but was called " + actual + " time(s)"))))) + (define + assert-io-args + :effects () + (fn + (session op-name n expected-args) + (let + ((actual (io-call-args session op-name n))) + (assert + (equal? actual expected-args) + (str + "Expected call " + n + " to " + op-name + " with args " + (str expected-args) + " but got " + (str actual)))))) + (define + assert-io-result + :effects () + (fn + (session op-name n expected) + (let + ((actual (io-call-result session op-name n))) + (assert + (equal? actual expected) + (str + "Expected call " + n + " to " + op-name + " to return " + (str expected) + " but got " + (str actual)))))) + (define + assert-state + :effects () + (fn + (session key expected) + (let + ((actual (harness-get session key))) + (assert + (equal? actual expected) + (str + "Expected state " + key + " to be " + (str expected) + " but got " + (str actual)))))))) ;; end define-library ;; Re-export to global namespace for backward compatibility (import (sx harness)) diff --git a/spec/tests/differential-probes.txt b/spec/tests/differential-probes.txt new file mode 100644 index 00000000..45a90c9a --- /dev/null +++ b/spec/tests/differential-probes.txt @@ -0,0 +1,65 @@ +# W14/F8 differential probe corpus — one expression per line. +# Same expression evaluated on the native server (epoch protocol) and the +# shipped WASM kernel (K.eval); scripts/test-differential.sh diffs results. +# Classes drawn from review findings F-1 (integer arithmetic), F-3 (apply, +# dict key order), F-8 itemization, S-4 (float printing), K18/K53. +# integers & display (F-1) +(+ 1 2) +(- 10 3) +(* 6 7) +(/ 4 2) +(/ 1 2) +(/ 10 4) +(quotient 13 4) +(mod 10 3) +# float printing (S-4) +(+ 0.1 0.2) +(* 3 0.1) +(/ 1 3) +(str 0.3) +(str 1.5) +(str 2.0) +# overflow / expt (K18) +(expt 2 10) +(expt 2 62) +(expt 2 100) +(+ 9223372036854775807 1) +# apply (F-3) +(apply + (list 1 2 3)) +(apply max (list 1 5 2)) +(apply str (list "a" "b")) +# dict key order (F-3) +(keys {:b 2 :a 1 :c 3}) +(str {:b 2 :a 1}) +(vals {:b 2 :a 1}) +# strings +(split "a,b,c" ",") +(split "a--b" "--") +(len "héllo") +(upcase "abc") +(str (char-code "A")) +(substring "hello" 1 3) +(join "-" (list "x" "y")) +# equality & comparison +(= 1 1.0) +(= (list 1 2) (list 1 2)) +(equal? (list 1) (list 1)) +(< 1 2 3) +# collections +(sort (list 3 1 2)) +(range 3) +(reverse (list 1 2 3)) +(nth (list 10 20 30) 1) +(contains? {:a 1} :a) +(get {:a 1} :zz 99) +# quasiquote / quote +(quasiquote (1 (unquote (+ 1 1)) 3)) +(str (quote sym)) +# conditionals & special forms +(if true 1 2) +(and 1 2 3) +(or nil false 7) +(do ((fn (x) x) 5) 99) +# error normalization (both sides should error) +(undefined-symbol-xyz) +(/ 1 0) diff --git a/spec/tests/known-failures.txt b/spec/tests/known-failures.txt new file mode 100644 index 00000000..59bcc774 --- /dev/null +++ b/spec/tests/known-failures.txt @@ -0,0 +1,273 @@ +hs-compat-asExpression > converts-a-complete-form-into-values +hs-compat-asExpression > converts-strings-into-fragments +hs-compat-asExpression > converts-value-as-json +hs-compat-blockLiteral > can-map-an-array +hs-compat-in > basic-no-query-return-values +hs-compat-typecheck > can-do-basic-non-string-typecheck-failure +hs-compat-typecheck > can-do-basic-string-non-null-typecheck +hs-compat-typecheck > can-do-basic-string-typecheck +hs-compat-typecheck > null-causes-null-safe-string-check-to-fail +hs-dev-asExpression > parses string as JSON to object +hs-dev-collectionExpressions > where binds after property access +hs-dev-comparisonOperator > I am between works +hs-dev-comparisonOperator > I am not between works +hs-dev-comparisonOperator > is still does equality when rhs variable exists +hs-dev-pick > can pick first n items +hs-dev-pick > can pick items using 'of' syntax +hs-dev-pick > can pick last n items +hs-dev-pick > can pick random item +hs-dev-pick > can pick random n items +hs-emit-classes > remove class from target +hs-emit-control-flow > tell rebinds me +hs-emit-def-behavior > def becomes define +hs-emit-dom-commands > hide sets display none +hs-emit-dom-commands > log passes through +hs-emit-dom-commands > show clears display +hs-emit-on > on every click +hs-extra-function-call > identity-call +hs-extra-lambda > array-map-block +hs-extra-lambda > arrow-true +hs-extra-typecheck > null-colon-string +hs-parse-assignment > put into +hs-parse-assignment > set property to string +hs-parse-basic-commands > add class to me +hs-parse-basic-commands > remove class from me +hs-parse-basic-commands > toggle between two classes +hs-parse-basic-commands > toggle class on me +hs-parse-conditional > if else end +hs-parse-conditional > if then end +hs-parse-conformance > increment @count → full AST +hs-parse-conformance > on click add .called → full AST +hs-parse-conformance > on click from #bar add .clicked → full AST +hs-parse-conformance > toggle between .foo and .bar → full AST +hs-parse-conformance > wait 100ms then add .done → full AST +hs-parse-events > on click add class +hs-parse-events > on click from target +hs-parse-every-modifier > on every click +hs-parse-expressions > attribute ref +hs-parse-expressions > style ref +hs-parse-send-trigger > trigger event on me +hs-parse-sequencing > wait then add +hs-parse-special-commands > decrement attribute +hs-parse-special-commands > hide +hs-parse-special-commands > increment attribute +hs-parse-special-commands > show target +hs-parse-unary > not expr +hs-runtime-e2e > source → SX shape +hs-runtime-make > make Map returns dict +hs-runtime-make > make Set returns list +hs-tokenize-arithmetic-ops > division operator +hs-tokenize-arithmetic-ops > mixed arithmetic +hs-tokenize-arithmetic-ops > modulo operator +hs-tokenize-arithmetic-ops > multiply operator +hs-tokenize-basics > keywords vs identifiers +hs-tokenize-basics > whitespace skipped +hs-tokenize-comments > line comment skipped +hs-tokenize-full-expressions > if true put "foo" into me.innerHTML else put "bar" into me.innerHTML end +hs-tokenize-full-expressions > increment @count then put it into me +hs-tokenize-full-expressions > on click add .called +hs-tokenize-full-expressions > on click[buttons==0] log event +hs-tokenize-full-expressions > on click from #bar add .clicked +hs-tokenize-full-expressions > on click send custom(foo:"fromBar") to #d2 +hs-tokenize-full-expressions > put "Clicked" into my.innerHTML +hs-tokenize-full-expressions > set #d1.innerHTML to foo +hs-tokenize-full-expressions > toggle between .foo and .bar +hs-tokenize-full-expressions > wait 100ms then add .done +hs-upstream-add > can add a value to a set +hs-upstream-add > can add to an HTMLCollection +hs-upstream-add > can add to children +hs-upstream-add > can add to query in me +hs-upstream-add > supports async expressions in when clause +hs-upstream-append > append to undefined ignores the undefined +hs-upstream-append > can append a value to a DOM node +hs-upstream-append > can append a value to a set +hs-upstream-append > can append a value to I +hs-upstream-append > multiple appends work +hs-upstream-append > new DOM content added by append will be live +hs-upstream-askAnswer > confirm returns first choice on OK +hs-upstream-askAnswer > prompts and puts result in it +hs-upstream-call > call functions that return promises are waited on +hs-upstream-core/asyncError > rejected promise stops execution +hs-upstream-core/asyncError > rejected promise triggers catch block +hs-upstream-core/regressions > can invoke functions w/ numbers in name +hs-upstream-core/regressions > can pick detail fields out by name +hs-upstream-core/regressions > can refer to function in init blocks +hs-upstream-core/runtimeErrors > reports basic function invocation null errors properly +hs-upstream-core/runtimeErrors > reports basic function invocation null errors properly w/ of +hs-upstream-core/runtimeErrors > reports basic function invocation null errors properly w/ possessives +hs-upstream-core/runtimeErrors > reports null errors on add command properly +hs-upstream-core/runtimeErrors > reports null errors on decrement command properly +hs-upstream-core/runtimeErrors > reports null errors on default command properly +hs-upstream-core/runtimeErrors > reports null errors on hide command properly +hs-upstream-core/runtimeErrors > reports null errors on increment command properly +hs-upstream-core/runtimeErrors > reports null errors on measure command properly +hs-upstream-core/runtimeErrors > reports null errors on put command properly +hs-upstream-core/runtimeErrors > reports null errors on remove command properly +hs-upstream-core/runtimeErrors > reports null errors on send command properly +hs-upstream-core/runtimeErrors > reports null errors on sets properly +hs-upstream-core/runtimeErrors > reports null errors on settle command properly +hs-upstream-core/runtimeErrors > reports null errors on show command properly +hs-upstream-core/runtimeErrors > reports null errors on toggle command properly +hs-upstream-core/runtimeErrors > reports null errors on transition command properly +hs-upstream-core/runtimeErrors > reports null errors on trigger command properly +hs-upstream-core/runtime > has proper stack from event handler +hs-upstream-core/scoping > locally scoped variables don't clash with built-in variables +hs-upstream-empty > can empty a map +hs-upstream-empty > can empty an element +hs-upstream-empty > can empty a set +hs-upstream-empty > clear works on elements +hs-upstream-expressions/asExpression > can accept custom dynamic conversions +hs-upstream-expressions/asExpression > can use the a modifier if you like +hs-upstream-expressions/asExpression > collects duplicate text inputs into an array +hs-upstream-expressions/asExpression > converts a complete form into Values +hs-upstream-expressions/asExpression > converts a form element into Values +hs-upstream-expressions/asExpression > converts a form element into Values | FormEncoded +hs-upstream-expressions/asExpression > converts a form element into Values | JSONString +hs-upstream-expressions/asExpression > converts an element into HTML +hs-upstream-expressions/asExpression > converts a NodeList into HTML +hs-upstream-expressions/asExpression > converts array as Set +hs-upstream-expressions/asExpression > converts checkboxes into a Value correctly +hs-upstream-expressions/asExpression > converts multiple selects into a Value correctly +hs-upstream-expressions/asExpression > converts multiple selects with programmatically changed selections +hs-upstream-expressions/asExpression > converts object as Map +hs-upstream-expressions/asExpression > converts radio buttons into a Value correctly +hs-upstream-expressions/asExpression > converts value as Date +hs-upstream-expressions/asExpression > parses string as JSON to object +hs-upstream-expressions/asExpression > pipe operator chains conversions +hs-upstream-expressions/blockLiteral > basic block literals work +hs-upstream-expressions/blockLiteral > basic identity works +hs-upstream-expressions/blockLiteral > basic two arg identity works +hs-upstream-expressions/closest > closest does not consume a following where clause +hs-upstream-expressions/comparisonOperator > does not exist works +hs-upstream-expressions/cookies > basic clear cookie values work +hs-upstream-expressions/cookies > basic set cookie values work +hs-upstream-expressions/cookies > iterate cookies values work +hs-upstream-expressions/cookies > length is 0 when no cookies are set +hs-upstream-expressions/cookies > update cookie values work +hs-upstream-expressions/functionCalls > can access a property of a call's result +hs-upstream-expressions/functionCalls > can chain calls on the result of a call +hs-upstream-expressions/functionCalls > can invoke function on object +hs-upstream-expressions/functionCalls > can invoke function on object w/ async arg +hs-upstream-expressions/functionCalls > can invoke function on object w/ async root & arg +hs-upstream-expressions/functionCalls > can invoke global function +hs-upstream-expressions/functionCalls > can invoke global function w/ async arg +hs-upstream-expressions/functionCalls > can pass an array literal as an argument +hs-upstream-expressions/functionCalls > can pass an expression as an argument +hs-upstream-expressions/functionCalls > can pass an object literal as an argument +hs-upstream-expressions/functionCalls > can pass no arguments +hs-upstream-expressions/logicalOperator > and short-circuits when lhs promise resolves to false +hs-upstream-expressions/logicalOperator > should short circuit with and expression +hs-upstream-expressions/logicalOperator > should short circuit with or expression +hs-upstream-expressions/mathOperator > can use mixed expressions +hs-upstream-expressions/objectLiteral > expressions work in object literal field names +hs-upstream-expressions/propertyAccess > property access on function result +hs-upstream-expressions/some > some returns true for nonempty selector +hs-upstream-expressions/strings > string templates work w/ props +hs-upstream-expressions/strings > string templates work w/ props w/ braces +hs-upstream-expressions/symbol > resolves global context properly +hs-upstream-fetch > allows the event handler to change the fetch parameters +hs-upstream-fetch > as response does not throw on 404 +hs-upstream-fetch > can catch an error that occurs when using fetch +hs-upstream-fetch > can do a simple fetch +hs-upstream-fetch > can do a simple fetch w/ a custom conversion +hs-upstream-fetch > can do a simple fetch w/ a naked URL +hs-upstream-fetch > can do a simple fetch w/ html +hs-upstream-fetch > can do a simple fetch w/ json +hs-upstream-fetch > can do a simple fetch w/ json using JSON syntax +hs-upstream-fetch > can do a simple fetch w/ json using Object syntax +hs-upstream-fetch > can do a simple fetch w/ json using Object syntax and an 'an' prefix +hs-upstream-fetch > can do a simple post +hs-upstream-fetch > can do a simple post alt syntax w/ curlies +hs-upstream-fetch > can do a simple post alt syntax without curlies +hs-upstream-fetch > can put response conversion after with +hs-upstream-fetch > can put response conversion before with +hs-upstream-fetch > do not throw passes through 404 response +hs-upstream-fetch > don't throw passes through 404 response +hs-upstream-fetch > Response can be converted to JSON via as JSON +hs-upstream-fetch > submits the fetch parameters to the event handler +hs-upstream-fetch > throws on non-2xx response by default +hs-upstream-fetch > triggers an event just before fetching +hs-upstream-hide > can hide element, with display:none by default +hs-upstream-hide > can hide element with display:none explicitly +hs-upstream-hide > can hide element with no target followed by command +hs-upstream-hide > can hide element with no target followed by then +hs-upstream-hide > can hide element with no target with a with +hs-upstream-hide > can hide element with opacity:0 +hs-upstream-hide > can hide element with opacity style literal +hs-upstream-hide > can hide element, with visibility:hidden +hs-upstream-hide > can hide other elements +hs-upstream-if > if on new line does not join w/ else +hs-upstream-if > if properly supports nested if statements and end block +hs-upstream-js > can do both of the above +hs-upstream-js > can return values to _hyperscript +hs-upstream-js > handles rejected promises without hanging +hs-upstream-make > can make elements +hs-upstream-make > can make elements with id and classes +hs-upstream-make > can make named objects +hs-upstream-make > can make named objects w/ global scope +hs-upstream-make > can make named objects with arguments +hs-upstream-make > can make objects +hs-upstream-make > can make objects with arguments +hs-upstream-make > creates a div by default +hs-upstream-on > can catch exceptions thrown in hyperscript functions +hs-upstream-on > can catch exceptions thrown in js functions +hs-upstream-on > can ignore when target doesn't exist +hs-upstream-on > can pick detail fields out by name +hs-upstream-on > can pick event properties out by name +hs-upstream-on > listeners on other elements are removed when the registering element is removed +hs-upstream-on > multiple event handlers at a time are allowed to execute with the every keyword +hs-upstream-on > on intersection fires when the element is in the viewport +hs-upstream-on > rethrown exceptions trigger 'exception' event +hs-upstream-on > throttled at