fix: cek_run propagates IO suspension via _cek_io_suspend_hook

When a `perform` fired inside a tree-walked eval_expr path — sf_letrec init exprs / non-last body exprs, expand_macro body, qq_expand unquote, sf_dynamic_wind / sf_scope / sf_provide bodies — cek_run raised "IO suspension in non-IO context" and swallowed the suspension. The hook that converts the CEK suspended state to VmSuspended (so the outer driver sees it as a resumable suspension object) was defined in sx_vm.ml but never invoked from cek_run. Repro in Node.js (hosts/ocaml/browser/test_letrec_resume.js): (letrec ((x (perform {:op "io"}))) "ok") ;; threw the error (letrec ((x 1)) (perform {:op "io"}) "after") ;; threw the error The originally reported browser symptom — "[sx] resume: Not callable: nil" after hs-wait resumes inside a letrec — was the same root cause showing through the JIT/VM resume path instead of as a top-level error. Fix: cek_run and cek_run_iterative now check !_cek_io_suspend_hook and invoke it when the loop terminates in a suspended state. The hook (set by sx_vm.ml in the browser, by run_tests.ml in the test runner) converts the suspension to VmSuspended / resolves IO synchronously. When the hook is unset (pure-CEK harness), the legacy Eval_error is raised so misuse stays visible. Also patches: - hosts/ocaml/bootstrap.py — regex-patches the transpiled cek_run on regen so the fix survives a fresh `python3 hosts/ocaml/bootstrap.py` cycle. - hosts/ocaml/browser/sx_browser.ml — api_eval / api_eval_vm / api_eval_expr now catch VmSuspended and surface a clean error string (K.eval has no driver to resume; callers who want resumption use callFn). Tests: - spec/tests/test-letrec-resume-treewalk.sx — 7 CEK-level regression tests covering letrec init / non-last body, scope/provide bodies, sibling fn-after-perform. All 7 fail in baseline ("IO suspension in non-IO context"), all 7 pass with the fix. - hosts/ocaml/browser/test_letrec_resume.js — 13 WASM kernel tests via callFn driveSync, including the wait-boot pattern from the briefing. All 13 pass. Suite results: 4557 pass / 1338 fail (was 4550 / 1339); +7 new passes, -1 flaky timeout (hs-upstream-if sieve), no regressions.
2026-05-07 10:13:48 +00:00
parent a5044cfc08
commit fc13acb805
5 changed files with 275 additions and 4 deletions
--- a/hosts/ocaml/browser/sx_browser.ml
+++ b/hosts/ocaml/browser/sx_browser.ml
@@ -344,6 +344,12 @@ let api_eval src_js =
    sync_env_to_vm ();
    return_via_side_channel (value_to_js result)
  with
+  | Sx_vm.VmSuspended _ ->
+    (* Top-level eval encountered an IO suspension propagated via the
+       cek_run hook (perform inside letrec init / non-last body / macro /
+       qq tree-walked path). K.eval doesn't drive resumption — surface as
+       a clear error so the caller knows to use callFn instead. *)
+    Js.Unsafe.inject (Js.string "Error: IO suspension in non-IO context (use callFn for IO-aware paths)")
  | Eval_error msg -> Js.Unsafe.inject (Js.string ("Error: " ^ msg))
  | Parse_error msg -> Js.Unsafe.inject (Js.string ("Parse error: " ^ msg))

@@ -371,6 +377,8 @@ let api_eval_vm src_js =
    ) _vm_globals;
    return_via_side_channel (value_to_js result)
  with
+  | Sx_vm.VmSuspended _ ->
+    Js.Unsafe.inject (Js.string "Error: IO suspension in non-IO context (use callFn for IO-aware paths)")
  | Eval_error msg -> Js.Unsafe.inject (Js.string ("Error: " ^ msg))
  | Parse_error msg -> Js.Unsafe.inject (Js.string ("Parse error: " ^ msg))
  | Not_found -> Js.Unsafe.inject (Js.string "Error: compile-module not loaded")
@@ -381,7 +389,10 @@ let api_eval_expr expr_js _env_js =
    let result = Sx_ref.eval_expr expr (Env global_env) in
    sync_env_to_vm ();
    return_via_side_channel (value_to_js result)
-  with Eval_error msg ->
+  with
+  | Sx_vm.VmSuspended _ ->
+    Js.Unsafe.inject (Js.string "Error: IO suspension in non-IO context (use callFn for IO-aware paths)")
+  | Eval_error msg ->
    Js.Unsafe.inject (Js.string ("Error: " ^ msg))

 let api_load src_js =