vm-ext: fix serving-JIT perform-in-HO-callback miscompile (host bug)
Root cause (found via bin/repro_jit_resume.ml, 9 surgical cases): when a `perform` (durable kv read) fires inside a native HO-primitive callback (map/filter/reduce/for-each/some/every?), the VmSuspended unwound through the primitive's native OCaml loop (List.map etc.), destroying the loop's iteration state. The remaining elements were dropped and the stack left misaligned, so the NEXT CALL_PRIM (map/rest/drop) read wrong args — "map: expected (fn list)", "rest: 1 list arg", "drop: list and number". Only triggers in the http-listen + cek_run_with_io serving path (epoch eval has no synchronous resolver, so conformance was 271/271). (A) lib/sx_vm.ml call_closure_reuse: when a callback suspends AND a synchronous IO resolver is installed (serving mode), resolve the callback's IO inline and run it to completion right there, returning its value to the native loop — so the loop is never unwound. Scoped to the resolver-set path; the CEK-driven path (flow/reactive/async tests) keeps its existing reuse_stack behaviour, so nothing else changes. reuse_stack is isolated across the nested resume. (A') lib/sx_vm.ml resume_vm: re-assert _active_vm := Some vm for the duration of the resumed run (mirrors call_closure). call_closure restored _active_vm to the caller when VmSuspended unwound, so HO callbacks during a resume could land on the wrong VM. Latent-bug fix. (B) bin/sx_server.ml register_jit_hook: the resolve_loop runs inside the VmSuspended handler, so a non-VmSuspended exception from resume_vm escaped to the http handler (→ 500). Catch it and fall back to CEK for THIS call (mark jit_failed, return None → interpreter re-runs it). Self-heals on the first hit, not a retry. Defense-in-depth; with (A) it shouldn't trigger. Verification: repro 9/9 (incl. host shape: map[cb→interpreted-helper perform]→drop = (7 8); reduce; nested map). Standard + --full OCaml conformance unchanged at 4834/1110 (baseline identical — the 1110 are pre-existing environmental: host-call-fn/browser-platform symbols, rational display, tw/regex). Host loop to re-verify 271/271 serving and drop its (jit-exclude! "host/*" "dream-*" "dr/*") band-aid. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
202
hosts/ocaml/bin/repro_jit_resume.ml
Normal file
202
hosts/ocaml/bin/repro_jit_resume.ml
Normal file
@@ -0,0 +1,202 @@
|
||||
(* Surgical repro for the serving-JIT OP_PERFORM/resume stack misalignment.
|
||||
Mirrors what register_jit_hook's resolve_loop does: call_closure, catch
|
||||
VmSuspended, resolve IO (return Nil), resume_vm — looping on re-suspend.
|
||||
No CEK evaluator needed for the direct/multi-frame/reuse paths. *)
|
||||
|
||||
open Sx_types
|
||||
|
||||
let req_dict () =
|
||||
let h = Hashtbl.create 1 in
|
||||
Hashtbl.replace h "op" (String "noop");
|
||||
Dict h
|
||||
|
||||
(* Mirror the serving hook's resolve loop exactly. *)
|
||||
let drive cl =
|
||||
let globals = cl.vm_closure_env |> ignore; cl.vm_env_ref in
|
||||
let rec resolve_loop req vm =
|
||||
let _ = req in
|
||||
(try Sx_vm.resume_vm vm Nil
|
||||
with Sx_vm.VmSuspended (r2, v2) -> resolve_loop r2 v2)
|
||||
in
|
||||
try Sx_vm.call_closure cl [] globals
|
||||
with Sx_vm.VmSuspended (req, vm) -> resolve_loop req vm
|
||||
|
||||
let mk_code ~locals ~bc ~consts = {
|
||||
vc_arity = 0; vc_rest_arity = -1; vc_locals = locals;
|
||||
vc_bytecode = Array.of_list bc;
|
||||
vc_constants = Array.of_list consts;
|
||||
vc_bytecode_list = None; vc_constants_list = None;
|
||||
}
|
||||
|
||||
let mk_cl ?(name="tf") ?(env=Hashtbl.create 64) code =
|
||||
{ vm_code = code; vm_upvalues = [||]; vm_name = Some name;
|
||||
vm_env_ref = env; vm_closure_env = None }
|
||||
|
||||
let report label v =
|
||||
Printf.printf "%-28s => %s\n%!" label (Sx_runtime.value_to_str v)
|
||||
|
||||
let run label f =
|
||||
(try report label (f ())
|
||||
with
|
||||
| Eval_error m -> Printf.printf "%-28s => ERROR: %s\n%!" label m
|
||||
| e -> Printf.printf "%-28s => EXN: %s\n%!" label (Printexc.to_string e))
|
||||
|
||||
(* opcodes *)
|
||||
let _const i = [1; i land 0xff; (i lsr 8) land 0xff]
|
||||
let _perform = [112]
|
||||
let _pop = [5]
|
||||
let _call_prim idx argc = [52; idx land 0xff; (idx lsr 8) land 0xff; argc]
|
||||
let _call argc = [48; argc]
|
||||
let _return = [50]
|
||||
|
||||
let () =
|
||||
(* Serving mode: a synchronous IO resolver is installed (mirrors
|
||||
sx_server's http setup). Our mock resolves every request to Nil. *)
|
||||
Sx_types._cek_io_resolver := Some (fun _req _ -> Nil);
|
||||
|
||||
(* Case 1: direct OP_PERFORM then a list prim in the SAME frame.
|
||||
(do (perform {..}) (rest (list 1 2 3))) => (2 3) *)
|
||||
run "1.direct perform→rest" (fun () ->
|
||||
let consts = [ req_dict (); List [Number 1.; Number 2.; Number 3.]; String "rest" ] in
|
||||
let bc = _const 0 @ _perform @ _pop @ _const 1 @ _call_prim 2 1 @ _return in
|
||||
drive (mk_cl (mk_code ~locals:0 ~bc ~consts)));
|
||||
|
||||
(* Case 2: direct perform then map (2-arg prim).
|
||||
(do (perform {..}) (map inc (list 1 2 3))) — needs a fn; use a NativeFn const *)
|
||||
run "2.direct perform→map" (fun () ->
|
||||
let inc = NativeFn ("inc1", function [Number n] -> Number (n +. 1.) | _ -> Nil) in
|
||||
let consts = [ req_dict (); inc; List [Number 1.; Number 2.; Number 3.]; String "map" ] in
|
||||
(* push fn, push list, CALL_PRIM map 2 *)
|
||||
let bc = _const 0 @ _perform @ _pop @ _const 1 @ _const 2 @ _call_prim 3 2 @ _return in
|
||||
drive (mk_cl (mk_code ~locals:0 ~bc ~consts)));
|
||||
|
||||
(* Case 3: multi-frame — outer calls a JIT'd helper that performs, THEN outer maps.
|
||||
helper: (do (perform {..}) 99)
|
||||
outer: (do (helper) (map inc (list 1 2 3))) *)
|
||||
run "3.multiframe perform→map" (fun () ->
|
||||
let env = Hashtbl.create 64 in
|
||||
let helper_code = mk_code ~locals:0
|
||||
~bc:(_const 0 @ _perform @ _pop @ _const 1 @ _return)
|
||||
~consts:[ req_dict (); Number 99. ] in
|
||||
let helper_cl = mk_cl ~name:"helper" ~env helper_code in
|
||||
let inc = NativeFn ("inc1", function [Number n] -> Number (n +. 1.) | _ -> Nil) in
|
||||
let consts = [ VmClosure helper_cl; inc; List [Number 1.; Number 2.; Number 3.]; String "map" ] in
|
||||
(* push helper-closure, CALL 0, POP its result, push inc, push list, CALL_PRIM map 2 *)
|
||||
let bc = _const 0 @ _call 0 @ _pop @ _const 1 @ _const 2 @ _call_prim 3 2 @ _return in
|
||||
drive (mk_cl ~name:"outer" ~env (mk_code ~locals:0 ~bc ~consts)));
|
||||
|
||||
(* Case 4: map whose CALLBACK performs (reuse_stack path), then a trailing prim.
|
||||
callback: (do (perform {..}) (inc e)) — but callback gets arg e in slot 0
|
||||
outer: (do (map cb (list 1 2 3)) (rest (list 7 8 9))) *)
|
||||
run "4.map-callback-perform" (fun () ->
|
||||
let env = Hashtbl.create 64 in
|
||||
(* callback arity 1: slot0 = e. body: (perform {..}); (inc e) ; return
|
||||
LOCAL_GET 0 then CALL_PRIM inc... use NativeFn inc via CALL_PRIM *)
|
||||
let cb_code = {
|
||||
vc_arity = 1; vc_rest_arity = -1; vc_locals = 1;
|
||||
vc_bytecode = Array.of_list (_const 0 @ _perform @ _pop
|
||||
@ [16;0] (* LOCAL_GET 0 *)
|
||||
@ _call_prim 1 1 @ _return);
|
||||
vc_constants = [| req_dict (); String "inc" |];
|
||||
vc_bytecode_list = None; vc_constants_list = None } in
|
||||
let cb_cl = mk_cl ~name:"cb" ~env cb_code in
|
||||
let consts = [ VmClosure cb_cl; List [Number 1.; Number 2.; Number 3.]; String "map";
|
||||
List [Number 7.; Number 8.; Number 9.]; String "rest" ] in
|
||||
(* push cb, push list, CALL_PRIM map 2, POP, push list2, CALL_PRIM rest 1, RETURN *)
|
||||
let bc = _const 0 @ _const 1 @ _call_prim 2 2 @ _pop @ _const 3 @ _call_prim 4 1 @ _return in
|
||||
drive (mk_cl ~name:"outer4" ~env (mk_code ~locals:0 ~bc ~consts)));
|
||||
|
||||
(* Case 5: THE HOST CASE — perform via an INTERPRETED helper (pending_cek path),
|
||||
then a list prim. helper is a Lambda (l_compiled = jit_failed) whose body
|
||||
performs; vm_call routes it through cek_call_or_suspend → pending_cek.
|
||||
helper: (perform {..}) [interpreted via CEK]
|
||||
outer: (do (helper) (rest (list 1 2 3))) => (2 3) *)
|
||||
run "5.pending_cek perform→rest" (fun () ->
|
||||
let env = Sx_types.make_env () in
|
||||
let helper = Lambda {
|
||||
l_params = []; l_body = List [Symbol "perform"; req_dict ()];
|
||||
l_closure = env; l_name = Some "kvread";
|
||||
l_compiled = Some Sx_vm.jit_failed_sentinel; l_call_count = 0;
|
||||
l_uid = Sx_types.next_lambda_uid () } in
|
||||
let consts = [ helper; List [Number 1.; Number 2.; Number 3.]; String "rest" ] in
|
||||
(* push helper, CALL 0, POP, push list, CALL_PRIM rest 1, RETURN *)
|
||||
let bc = _const 0 @ _call 0 @ _pop @ _const 1 @ _call_prim 2 1 @ _return in
|
||||
drive (mk_cl ~name:"outer5" ~env:(Hashtbl.create 64) (mk_code ~locals:0 ~bc ~consts)));
|
||||
|
||||
(* Case 6: pending_cek perform → MAP (2-arg), the exact host shape. *)
|
||||
run "6.pending_cek perform→map" (fun () ->
|
||||
let env = Sx_types.make_env () in
|
||||
let helper = Lambda {
|
||||
l_params = []; l_body = List [Symbol "perform"; req_dict ()];
|
||||
l_closure = env; l_name = Some "kvread";
|
||||
l_compiled = Some Sx_vm.jit_failed_sentinel; l_call_count = 0;
|
||||
l_uid = Sx_types.next_lambda_uid () } in
|
||||
let inc = NativeFn ("inc1", function [Number n] -> Number (n +. 1.) | _ -> Nil) in
|
||||
let consts = [ helper; inc; List [Number 1.; Number 2.; Number 3.]; String "map" ] in
|
||||
(* push helper, CALL 0, POP, push inc, push list, CALL_PRIM map 2, RETURN *)
|
||||
let bc = _const 0 @ _call 0 @ _pop @ _const 1 @ _const 2 @ _call_prim 3 2 @ _return in
|
||||
drive (mk_cl ~name:"outer6" ~env:(Hashtbl.create 64) (mk_code ~locals:0 ~bc ~consts)));
|
||||
|
||||
(* Case 7: THE HOST SHAPE — map whose callback calls an INTERPRETED helper
|
||||
that performs (kv read via persist helper inside a map), THEN a trailing
|
||||
prim. callback(e): (do (kvread) e) — kvread suspends via pending_cek.
|
||||
outer: (do (map cb (list 1 2 3)) (drop (list 5 6 7 8) 2)) => (7 8) *)
|
||||
run "7.HOST: map[cb→helper perform]→drop" (fun () ->
|
||||
let genv = Sx_types.make_env () in
|
||||
let helper = Lambda {
|
||||
l_params = []; l_body = List [Symbol "perform"; req_dict ()];
|
||||
l_closure = genv; l_name = Some "kvread";
|
||||
l_compiled = Some Sx_vm.jit_failed_sentinel; l_call_count = 0;
|
||||
l_uid = Sx_types.next_lambda_uid () } in
|
||||
let env = Hashtbl.create 64 in
|
||||
(* cb(e): push helper, CALL 0, POP, LOCAL_GET 0, RETURN *)
|
||||
let cb_code = {
|
||||
vc_arity = 1; vc_rest_arity = -1; vc_locals = 1;
|
||||
vc_bytecode = Array.of_list (_const 0 @ _call 0 @ _pop @ [16;0] @ _return);
|
||||
vc_constants = [| helper |]; vc_bytecode_list=None; vc_constants_list=None } in
|
||||
let cb_cl = mk_cl ~name:"cb7" ~env cb_code in
|
||||
let consts = [ VmClosure cb_cl; List [Number 1.; Number 2.; Number 3.]; String "map";
|
||||
List [Number 5.; Number 6.; Number 7.; Number 8.]; Number 2.; String "drop" ] in
|
||||
(* push cb, push list, CALL_PRIM map 2, POP, push list2, push 2, CALL_PRIM drop 2, RETURN *)
|
||||
let bc = _const 0 @ _const 1 @ _call_prim 2 2 @ _pop
|
||||
@ _const 3 @ _const 4 @ _call_prim 5 2 @ _return in
|
||||
drive (mk_cl ~name:"outer7" ~env (mk_code ~locals:0 ~bc ~consts)));
|
||||
|
||||
(* Case 8: reduce whose callback performs. (reduce + 0 (list 1 2 3)) with a
|
||||
perform in the reducer => 6 *)
|
||||
run "8.reduce[acc→perform]" (fun () ->
|
||||
let env = Hashtbl.create 64 in
|
||||
(* reducer(acc e): (do (perform {..}) (+ acc e)). slots: 0=acc 1=e *)
|
||||
let rd_code = {
|
||||
vc_arity = 2; vc_rest_arity = -1; vc_locals = 2;
|
||||
vc_bytecode = Array.of_list (_const 0 @ _perform @ _pop
|
||||
@ [16;0] @ [16;1] @ _call_prim 1 2 @ _return);
|
||||
vc_constants = [| req_dict (); String "+" |];
|
||||
vc_bytecode_list=None; vc_constants_list=None } in
|
||||
let rd_cl = mk_cl ~name:"rd" ~env rd_code in
|
||||
let consts = [ VmClosure rd_cl; Number 0.; List [Number 1.; Number 2.; Number 3.]; String "reduce" ] in
|
||||
(* push reducer, push 0, push list, CALL_PRIM reduce 3, RETURN *)
|
||||
let bc = _const 0 @ _const 1 @ _const 2 @ _call_prim 3 3 @ _return in
|
||||
drive (mk_cl ~name:"outer8" ~env (mk_code ~locals:0 ~bc ~consts)));
|
||||
|
||||
(* Case 9: nested map — outer map callback runs an inner map whose callback
|
||||
performs. outer over (list 1 2), inner over (list 10 20) performing.
|
||||
cb_outer(x): (map cb_inner (list 10 20)) ; cb_inner(y): (do (perform) y)
|
||||
=> ((10 20) (10 20)) *)
|
||||
run "9.nested map[inner→perform]" (fun () ->
|
||||
let env = Hashtbl.create 64 in
|
||||
let inner_code = {
|
||||
vc_arity = 1; vc_rest_arity = -1; vc_locals = 1;
|
||||
vc_bytecode = Array.of_list (_const 0 @ _perform @ _pop @ [16;0] @ _return);
|
||||
vc_constants = [| req_dict () |]; vc_bytecode_list=None; vc_constants_list=None } in
|
||||
let inner_cl = mk_cl ~name:"cbin" ~env inner_code in
|
||||
(* outer cb(x): push inner_cl, push (10 20), CALL_PRIM map 2, RETURN *)
|
||||
let outer_cb_code = {
|
||||
vc_arity = 1; vc_rest_arity = -1; vc_locals = 1;
|
||||
vc_bytecode = Array.of_list (_const 0 @ _const 1 @ _call_prim 2 2 @ _return);
|
||||
vc_constants = [| VmClosure inner_cl; List [Number 10.; Number 20.]; String "map" |];
|
||||
vc_bytecode_list=None; vc_constants_list=None } in
|
||||
let outer_cb_cl = mk_cl ~name:"cbout" ~env outer_cb_code in
|
||||
let consts = [ VmClosure outer_cb_cl; List [Number 1.; Number 2.]; String "map" ] in
|
||||
let bc = _const 0 @ _const 1 @ _call_prim 2 2 @ _return in
|
||||
drive (mk_cl ~name:"outer9" ~env (mk_code ~locals:0 ~bc ~consts)))
|
||||
Reference in New Issue
Block a user