vm-ext: fix serving-JIT perform-in-HO-callback miscompile (host bug)
Root cause (found via bin/repro_jit_resume.ml, 9 surgical cases): when a `perform` (durable kv read) fires inside a native HO-primitive callback (map/filter/reduce/for-each/some/every?), the VmSuspended unwound through the primitive's native OCaml loop (List.map etc.), destroying the loop's iteration state. The remaining elements were dropped and the stack left misaligned, so the NEXT CALL_PRIM (map/rest/drop) read wrong args — "map: expected (fn list)", "rest: 1 list arg", "drop: list and number". Only triggers in the http-listen + cek_run_with_io serving path (epoch eval has no synchronous resolver, so conformance was 271/271). (A) lib/sx_vm.ml call_closure_reuse: when a callback suspends AND a synchronous IO resolver is installed (serving mode), resolve the callback's IO inline and run it to completion right there, returning its value to the native loop — so the loop is never unwound. Scoped to the resolver-set path; the CEK-driven path (flow/reactive/async tests) keeps its existing reuse_stack behaviour, so nothing else changes. reuse_stack is isolated across the nested resume. (A') lib/sx_vm.ml resume_vm: re-assert _active_vm := Some vm for the duration of the resumed run (mirrors call_closure). call_closure restored _active_vm to the caller when VmSuspended unwound, so HO callbacks during a resume could land on the wrong VM. Latent-bug fix. (B) bin/sx_server.ml register_jit_hook: the resolve_loop runs inside the VmSuspended handler, so a non-VmSuspended exception from resume_vm escaped to the http handler (→ 500). Catch it and fall back to CEK for THIS call (mark jit_failed, return None → interpreter re-runs it). Self-heals on the first hit, not a retry. Defense-in-depth; with (A) it shouldn't trigger. Verification: repro 9/9 (incl. host shape: map[cb→interpreted-helper perform]→drop = (7 8); reduce; nested map). Standard + --full OCaml conformance unchanged at 4834/1110 (baseline identical — the 1110 are pre-existing environmental: host-call-fn/browser-platform symbols, rational display, tw/regex). Host loop to re-verify 271/271 serving and drop its (jit-exclude! "host/*" "dream-*" "dr/*") band-aid. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1216,7 +1216,23 @@ let register_jit_hook env =
|
||||
let rec resolve_loop req vm =
|
||||
let result = resolver req (Nil) in
|
||||
(try Some (Sx_vm.resume_vm vm result)
|
||||
with Sx_vm.VmSuspended (req2, vm2) -> resolve_loop req2 vm2)
|
||||
with
|
||||
| Sx_vm.VmSuspended (req2, vm2) -> resolve_loop req2 vm2
|
||||
| e ->
|
||||
(* (B) Resume raised mid-execution. resolve_loop runs inside
|
||||
the VmSuspended handler, so without catching here the
|
||||
error escapes to the http handler (→ 500). Recover THIS
|
||||
call on the CEK instead: mark jit_failed and return None
|
||||
so the interpreter re-runs it (idempotent for the host's
|
||||
durable reads). Self-heals on the first hit, not a retry. *)
|
||||
let fn_name = match l.l_name with Some n -> n | None -> "?" in
|
||||
if not (Hashtbl.mem _jit_warned fn_name) then begin
|
||||
Hashtbl.replace _jit_warned fn_name true;
|
||||
Printf.eprintf "[jit] %s resume fallback to CEK: %s\n%!"
|
||||
fn_name (Printexc.to_string e)
|
||||
end;
|
||||
l.l_compiled <- Some Sx_vm.jit_failed_sentinel;
|
||||
None)
|
||||
in
|
||||
resolve_loop request saved_vm
|
||||
| None -> Some (make_vm_suspend_marker request saved_vm))
|
||||
@@ -1249,7 +1265,16 @@ let register_jit_hook env =
|
||||
let rec resolve_loop req vm =
|
||||
let result = resolver req (Nil) in
|
||||
(try Some (Sx_vm.resume_vm vm result)
|
||||
with Sx_vm.VmSuspended (req2, vm2) -> resolve_loop req2 vm2)
|
||||
with
|
||||
| Sx_vm.VmSuspended (req2, vm2) -> resolve_loop req2 vm2
|
||||
| e ->
|
||||
(* (B) See note above — recover a failed resume on the
|
||||
CEK instead of escaping to the handler (→ 500). *)
|
||||
Printf.eprintf "[jit] %s resume fallback to CEK: %s\n%!"
|
||||
fn_name (Printexc.to_string e);
|
||||
Hashtbl.replace _jit_warned fn_name true;
|
||||
l.l_compiled <- Some Sx_vm.jit_failed_sentinel;
|
||||
None)
|
||||
in
|
||||
resolve_loop request saved_vm
|
||||
| None -> Some (make_vm_suspend_marker request saved_vm))
|
||||
|
||||
Reference in New Issue
Block a user