fix: cek_run propagates IO suspension via _cek_io_suspend_hook
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m52s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 1m52s
When a `perform` fired inside a tree-walked eval_expr path — sf_letrec init
exprs / non-last body exprs, expand_macro body, qq_expand unquote,
sf_dynamic_wind / sf_scope / sf_provide bodies — cek_run raised
"IO suspension in non-IO context" and swallowed the suspension. The hook
that converts the CEK suspended state to VmSuspended (so the outer driver
sees it as a resumable suspension object) was defined in sx_vm.ml but
never invoked from cek_run.
Repro in Node.js (hosts/ocaml/browser/test_letrec_resume.js):
(letrec ((x (perform {:op "io"}))) "ok") ;; threw the error
(letrec ((x 1)) (perform {:op "io"}) "after") ;; threw the error
The originally reported browser symptom — "[sx] resume: Not callable: nil"
after hs-wait resumes inside a letrec — was the same root cause showing
through the JIT/VM resume path instead of as a top-level error.
Fix: cek_run and cek_run_iterative now check !_cek_io_suspend_hook and
invoke it when the loop terminates in a suspended state. The hook (set by
sx_vm.ml in the browser, by run_tests.ml in the test runner) converts the
suspension to VmSuspended / resolves IO synchronously. When the hook is
unset (pure-CEK harness), the legacy Eval_error is raised so misuse stays
visible.
Also patches:
- hosts/ocaml/bootstrap.py — regex-patches the transpiled cek_run on regen
so the fix survives a fresh `python3 hosts/ocaml/bootstrap.py` cycle.
- hosts/ocaml/browser/sx_browser.ml — api_eval / api_eval_vm / api_eval_expr
now catch VmSuspended and surface a clean error string (K.eval has no
driver to resume; callers who want resumption use callFn).
Tests:
- spec/tests/test-letrec-resume-treewalk.sx — 7 CEK-level regression tests
covering letrec init / non-last body, scope/provide bodies, sibling
fn-after-perform. All 7 fail in baseline ("IO suspension in non-IO
context"), all 7 pass with the fix.
- hosts/ocaml/browser/test_letrec_resume.js — 13 WASM kernel tests via
callFn driveSync, including the wait-boot pattern from the briefing.
All 13 pass.
Suite results: 4557 pass / 1338 fail (was 4550 / 1339); +7 new passes,
-1 flaky timeout (hs-upstream-if sieve), no regressions.
This commit is contained in:
@@ -82,7 +82,10 @@ let cek_run_iterative state =
|
||||
s := cek_step !s
|
||||
done;
|
||||
(match cek_suspended_p !s with
|
||||
| Bool true -> raise (Eval_error "IO suspension in non-IO context")
|
||||
| Bool true ->
|
||||
(match !_cek_io_suspend_hook with
|
||||
| Some hook -> hook !s
|
||||
| None -> raise (Eval_error "IO suspension in non-IO context"))
|
||||
| _ -> cek_value !s)
|
||||
with Eval_error msg ->
|
||||
_last_error_kont_ref := cek_kont !s;
|
||||
@@ -308,6 +311,23 @@ def compile_spec_to_ml(spec_dir: str | None = None) -> str:
|
||||
output
|
||||
)
|
||||
|
||||
# Patch transpiled cek_run to invoke _cek_io_suspend_hook on suspension
|
||||
# instead of unconditionally raising Eval_error. This is the fix for the
|
||||
# tree-walk eval_expr path: sf_letrec init exprs / non-last body exprs,
|
||||
# macro bodies, qq_expand, dynamic-wind / scope / provide bodies all use
|
||||
# `trampoline (eval_expr ...)` and were swallowing CEK suspensions as
|
||||
# "IO suspension in non-IO context" errors. With the hook, the suspension
|
||||
# propagates as VmSuspended to the outer driver (browser callFn / server
|
||||
# eval_expr_io). When the hook is unset (pure-CEK harness), the legacy
|
||||
# error is preserved as the fallback.
|
||||
output = re.sub(
|
||||
r'\(raise \(Eval_error \(value_to_str \(String "IO suspension in non-IO context"\)\)\)\)',
|
||||
'(match !_cek_io_suspend_hook with Some hook -> hook final | None -> '
|
||||
'(raise (Eval_error (value_to_str (String "IO suspension in non-IO context")))))',
|
||||
output,
|
||||
count=1,
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
|
||||
|
||||
@@ -344,6 +344,12 @@ let api_eval src_js =
|
||||
sync_env_to_vm ();
|
||||
return_via_side_channel (value_to_js result)
|
||||
with
|
||||
| Sx_vm.VmSuspended _ ->
|
||||
(* Top-level eval encountered an IO suspension propagated via the
|
||||
cek_run hook (perform inside letrec init / non-last body / macro /
|
||||
qq tree-walked path). K.eval doesn't drive resumption — surface as
|
||||
a clear error so the caller knows to use callFn instead. *)
|
||||
Js.Unsafe.inject (Js.string "Error: IO suspension in non-IO context (use callFn for IO-aware paths)")
|
||||
| Eval_error msg -> Js.Unsafe.inject (Js.string ("Error: " ^ msg))
|
||||
| Parse_error msg -> Js.Unsafe.inject (Js.string ("Parse error: " ^ msg))
|
||||
|
||||
@@ -371,6 +377,8 @@ let api_eval_vm src_js =
|
||||
) _vm_globals;
|
||||
return_via_side_channel (value_to_js result)
|
||||
with
|
||||
| Sx_vm.VmSuspended _ ->
|
||||
Js.Unsafe.inject (Js.string "Error: IO suspension in non-IO context (use callFn for IO-aware paths)")
|
||||
| Eval_error msg -> Js.Unsafe.inject (Js.string ("Error: " ^ msg))
|
||||
| Parse_error msg -> Js.Unsafe.inject (Js.string ("Parse error: " ^ msg))
|
||||
| Not_found -> Js.Unsafe.inject (Js.string "Error: compile-module not loaded")
|
||||
@@ -381,7 +389,10 @@ let api_eval_expr expr_js _env_js =
|
||||
let result = Sx_ref.eval_expr expr (Env global_env) in
|
||||
sync_env_to_vm ();
|
||||
return_via_side_channel (value_to_js result)
|
||||
with Eval_error msg ->
|
||||
with
|
||||
| Sx_vm.VmSuspended _ ->
|
||||
Js.Unsafe.inject (Js.string "Error: IO suspension in non-IO context (use callFn for IO-aware paths)")
|
||||
| Eval_error msg ->
|
||||
Js.Unsafe.inject (Js.string ("Error: " ^ msg))
|
||||
|
||||
let api_load src_js =
|
||||
|
||||
172
hosts/ocaml/browser/test_letrec_resume.js
Normal file
172
hosts/ocaml/browser/test_letrec_resume.js
Normal file
@@ -0,0 +1,172 @@
|
||||
#!/usr/bin/env node
|
||||
// Repro: letrec sibling bindings nil after perform/resume in browser kernel
|
||||
//
|
||||
// Bug: After a CEK IO suspension (perform / hs-wait) resumes in the
|
||||
// WASM browser kernel, calling a sibling letrec binding could return
|
||||
// nil, with the error surfaced as `[sx] resume: Not callable: nil`.
|
||||
//
|
||||
// Root cause: cek-run / cek_run_iterative raised
|
||||
// `"IO suspension in non-IO context"` when a tree-walked eval_expr
|
||||
// (sf_letrec init exprs / non-last body, macro body, qq unquote, scope
|
||||
// body, provide body, dynamic-wind) hit a perform. The CEK suspension
|
||||
// was created correctly but never propagated through the OCaml-side
|
||||
// _cek_io_suspend_hook, so the outer driver never saw VmSuspended.
|
||||
//
|
||||
// Fix: cek_run / cek_run_iterative now invoke _cek_io_suspend_hook on
|
||||
// suspension (raising VmSuspended for the outer driver). When the hook
|
||||
// is unset (pure-CEK harness), they fall back to the legacy error.
|
||||
//
|
||||
// This test exercises the WASM kernel through K.callFn — the path that
|
||||
// browser event handlers use. Suspension surfaces as a JS object with
|
||||
// {suspended, request, resume(result)} that the test drives synchronously.
|
||||
//
|
||||
// Companion: spec/tests/test-letrec-resume-treewalk.sx tests the
|
||||
// CEK-only path through the OCaml test runner.
|
||||
|
||||
const path = require('path');
|
||||
const fs = require('fs');
|
||||
|
||||
const KERNEL = path.join(__dirname, '..', '_build', 'default', 'browser', 'sx_browser.bc.js');
|
||||
if (!fs.existsSync(KERNEL)) {
|
||||
console.error('FATAL: missing ' + KERNEL + ' — run `dune build` from hosts/ocaml first');
|
||||
process.exit(2);
|
||||
}
|
||||
require(KERNEL);
|
||||
const K = globalThis.SxKernel;
|
||||
|
||||
let passed = 0, failed = 0;
|
||||
const failures = [];
|
||||
|
||||
function test(name, fn) {
|
||||
try {
|
||||
const r = fn();
|
||||
if (r === true) {
|
||||
passed++;
|
||||
console.log(' PASS: ' + name);
|
||||
} else {
|
||||
failed++;
|
||||
failures.push({ name, error: 'got ' + JSON.stringify(r) });
|
||||
console.log(' FAIL: ' + name + ' — got ' + JSON.stringify(r));
|
||||
}
|
||||
} catch (e) {
|
||||
failed++;
|
||||
failures.push({ name, error: e.message || String(e) });
|
||||
console.log(' FAIL: ' + name + ' — ' + (e.message || e));
|
||||
}
|
||||
}
|
||||
|
||||
function driveSync(result) {
|
||||
while (result && typeof result === 'object' && result.suspended) {
|
||||
result = result.resume(null);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function callExpr(src) {
|
||||
K.eval('(define _t-fn (fn () ' + src + '))');
|
||||
const fn = K.eval('_t-fn');
|
||||
return driveSync(K.callFn(fn, []));
|
||||
}
|
||||
|
||||
console.log('\n=== letrec + perform/resume regression tests ===\n');
|
||||
|
||||
test('basic letrec without perform', () =>
|
||||
callExpr('(letrec ((f (fn () "ok"))) (f))') === 'ok');
|
||||
|
||||
test('callFn perform suspends and resumes with nil', () => {
|
||||
K.eval('(define _t-perform (fn () (perform {:op "io"})))');
|
||||
let r = K.callFn(K.eval('_t-perform'), []);
|
||||
if (!r || !r.suspended) return 'no suspension: ' + JSON.stringify(r);
|
||||
return r.resume(null) === null;
|
||||
});
|
||||
|
||||
test('letrec, single binding, perform/resume', () =>
|
||||
callExpr('(letrec ((f (fn () (perform {:op "io"})))) (f))') === null);
|
||||
|
||||
test('letrec, 2 bindings, body calls sibling after suspended call', () =>
|
||||
callExpr(`
|
||||
(letrec
|
||||
((wait-then (fn () (do (perform {:op "io"}) "wait-done")))
|
||||
(other-fn (fn () "other-result")))
|
||||
(do (wait-then) (other-fn)))`) === 'other-result');
|
||||
|
||||
test('letrec, suspending fn calls sibling after own perform', () =>
|
||||
callExpr(`
|
||||
(letrec
|
||||
((wait-and-call (fn () (do (perform {:op "io"}) (other-fn))))
|
||||
(other-fn (fn () "from-sibling")))
|
||||
(wait-and-call))`) === 'from-sibling');
|
||||
|
||||
test('letrec, fn references sibling value after perform/resume', () =>
|
||||
callExpr(`
|
||||
(letrec
|
||||
((shared "shared-state")
|
||||
(do-fn (fn () (do (perform {:op "io"}) shared))))
|
||||
(do-fn))`) === 'shared-state');
|
||||
|
||||
test('letrec, recursive self-call after perform (wait-boot pattern)', () => {
|
||||
K.eval('(define _wb-c 0)');
|
||||
K.eval('(set! _wb-c 0)');
|
||||
return callExpr(`
|
||||
(letrec ((wait-boot (fn ()
|
||||
(do (perform {:op "io"})
|
||||
(if (>= _wb-c 1)
|
||||
"done"
|
||||
(do (set! _wb-c (+ 1 _wb-c))
|
||||
(wait-boot)))))))
|
||||
(wait-boot))`) === 'done';
|
||||
});
|
||||
|
||||
test('top-level define + perform + sibling call after resume', () => {
|
||||
K.eval('(define do-suspend-x (fn () (do (perform {:op "io"}) (do-other-x))))');
|
||||
K.eval('(define do-other-x (fn () "ok-from-other"))');
|
||||
return callExpr('(do-suspend-x)') === 'ok-from-other';
|
||||
});
|
||||
|
||||
test('letrec, two performs (sequential) then sibling call', () =>
|
||||
callExpr(`
|
||||
(letrec
|
||||
((wait-twice (fn () (do (perform {:op "io1"}) (perform {:op "io2"}) (other))))
|
||||
(other (fn () "after-double")))
|
||||
(wait-twice))`) === 'after-double');
|
||||
|
||||
// === Tree-walk paths that previously raised "IO suspension in non-IO context" ===
|
||||
|
||||
test('letrec init expr with perform — suspension propagates (no error)', () => {
|
||||
let r;
|
||||
try { r = callExpr('(letrec ((x (perform {:op "io"}))) "ok")'); }
|
||||
catch (e) { return 'threw: ' + e.message; }
|
||||
return r === null || r === 'ok';
|
||||
});
|
||||
|
||||
test('letrec non-last body with perform — suspension propagates (no error)', () => {
|
||||
let r;
|
||||
try { r = callExpr('(letrec ((x 1)) (perform {:op "io"}) "after")'); }
|
||||
catch (e) { return 'threw: ' + e.message; }
|
||||
return r === null || r === 'after';
|
||||
});
|
||||
|
||||
test('macro body with perform — suspension propagates', () => {
|
||||
K.eval('(defmacro _m1 (form) (do (perform {:op "io"}) form))');
|
||||
let r;
|
||||
try { r = callExpr('(_m1 "macro-ok")'); }
|
||||
catch (e) { return 'threw: ' + e.message; }
|
||||
return r === 'macro-ok' || r === null;
|
||||
});
|
||||
|
||||
test('quasiquote unquote with perform — suspension propagates', () => {
|
||||
let r;
|
||||
try { r = callExpr('(let ((y "yyy")) `(a ,(do (perform {:op "io"}) y) c))'); }
|
||||
catch (e) { return 'threw: ' + e.message; }
|
||||
return r !== undefined;
|
||||
});
|
||||
|
||||
console.log('\n--- Results ---');
|
||||
console.log('passed: ' + passed);
|
||||
console.log('failed: ' + failed);
|
||||
if (failed > 0) {
|
||||
console.log('\nFailures:');
|
||||
failures.forEach(f => console.log(' - ' + f.name + ': ' + f.error));
|
||||
process.exit(1);
|
||||
}
|
||||
process.exit(0);
|
||||
@@ -614,7 +614,7 @@ and cek_step_loop state =
|
||||
|
||||
(* cek-run *)
|
||||
and cek_run state =
|
||||
(let final = (cek_step_loop (state)) in (if sx_truthy ((cek_suspended_p (final))) then (raise (Eval_error (value_to_str (String "IO suspension in non-IO context")))) else (cek_value (final))))
|
||||
(let final = (cek_step_loop (state)) in (if sx_truthy ((cek_suspended_p (final))) then (match !_cek_io_suspend_hook with Some hook -> hook final | None -> (raise (Eval_error (value_to_str (String "IO suspension in non-IO context"))))) else (cek_value (final))))
|
||||
|
||||
(* cek-resume *)
|
||||
and cek_resume suspended_state result' =
|
||||
@@ -1052,7 +1052,14 @@ let cek_run_iterative state =
|
||||
s := cek_step !s
|
||||
done;
|
||||
(match cek_suspended_p !s with
|
||||
| Bool true -> raise (Eval_error "IO suspension in non-IO context")
|
||||
| Bool true ->
|
||||
(* Propagate suspension via the OCaml-side hook so it converts to
|
||||
VmSuspended and flows to the outer driver (value_to_js / resume
|
||||
callback). Without the hook (pure CEK harness), keep the legacy
|
||||
error so test runners surface the misuse. *)
|
||||
(match !_cek_io_suspend_hook with
|
||||
| Some hook -> hook !s
|
||||
| None -> raise (Eval_error "IO suspension in non-IO context"))
|
||||
| _ -> cek_value !s)
|
||||
with Eval_error msg ->
|
||||
_last_error_kont_ref := cek_kont !s;
|
||||
|
||||
61
spec/tests/test-letrec-resume-treewalk.sx
Normal file
61
spec/tests/test-letrec-resume-treewalk.sx
Normal file
@@ -0,0 +1,61 @@
|
||||
;; Letrec + perform/resume — tree-walk paths regression
|
||||
;;
|
||||
;; Before the fix: cek-run raised "IO suspension in non-IO context" when
|
||||
;; a perform fired inside any tree-walked eval_expr path:
|
||||
;; - sf_letrec init exprs / non-last body exprs (the original bug)
|
||||
;; - sf_dynamic_wind, sf_scope, sf_provide bodies
|
||||
;; - expand_macro body, qq_expand unquote
|
||||
;;
|
||||
;; After the fix: cek-run invokes _cek_io_suspend_hook which (in this test
|
||||
;; runner) resolves the IO and returns the result, allowing evaluation to
|
||||
;; complete successfully. End-to-end: the expression returns a value
|
||||
;; instead of throwing the swallowed-suspension error.
|
||||
;;
|
||||
;; The test runner's hook resolves IO requests via resolve_io (mock).
|
||||
;; In production browser, the hook raises VmSuspended → JS resume callback.
|
||||
|
||||
(defsuite
|
||||
"letrec-resume-treewalk-init"
|
||||
(deftest
|
||||
"perform in letrec init expr — evaluates without error"
|
||||
(let
|
||||
((result (eval-expr-cek (quote (letrec ((x (perform {:op "request-arg" :args (list "k")}))) (or x "default"))) (make-env))))
|
||||
(assert (or (string? result) (nil? result)))))
|
||||
(deftest
|
||||
"perform in letrec non-last body — evaluates without error"
|
||||
(let
|
||||
((result (eval-expr-cek (quote (letrec ((x 42)) (perform {:op "request-arg" :args (list "k")}) (+ x 1))) (make-env))))
|
||||
(assert= result 43)))
|
||||
(deftest
|
||||
"letrec init perform result reachable — no nil sibling"
|
||||
(let
|
||||
((result (eval-expr-cek (quote (letrec ((x 99) (y (fn () x))) (perform {:op "request-arg" :args (list "k")}) (y))) (make-env))))
|
||||
(assert= result 99))))
|
||||
|
||||
|
||||
(defsuite
|
||||
"letrec-resume-treewalk-bodies"
|
||||
(deftest
|
||||
"perform inside scope body — evaluates without error"
|
||||
(let
|
||||
((result (eval-expr-cek (quote (scope "test-scope" :value 1 (perform {:op "request-arg" :args (list "k")}) "scope-done")) (make-env))))
|
||||
(assert= result "scope-done")))
|
||||
(deftest
|
||||
"perform inside provide body — evaluates without error"
|
||||
(let
|
||||
((result (eval-expr-cek (quote (provide "test-key" 7 (perform {:op "request-arg" :args (list "k")}) "provide-done")) (make-env))))
|
||||
(assert= result "provide-done"))))
|
||||
|
||||
|
||||
(defsuite
|
||||
"letrec-resume-treewalk-recursive"
|
||||
(deftest
|
||||
"letrec sibling fn called after perform/resume — sibling not nil"
|
||||
(let
|
||||
((result (eval-expr-cek (quote (letrec ((sib "from-sibling") (suspending (fn () (do (perform {:op "request-arg" :args (list "k")}) sib)))) (suspending))) (make-env))))
|
||||
(assert= result "from-sibling")))
|
||||
(deftest
|
||||
"letrec — suspending fn calls sibling fn after own perform"
|
||||
(let
|
||||
((result (eval-expr-cek (quote (letrec ((wait-and-call (fn () (do (perform {:op "request-arg" :args (list "k")}) (other)))) (other (fn () "called-after-resume"))) (wait-and-call))) (make-env))))
|
||||
(assert= result "called-after-resume"))))
|
||||
Reference in New Issue
Block a user