Fix JIT compilation cascade + MCP robustness

Three interacting JIT bugs caused infinite loops and server hangs:

1. _jit_compiling cascade: the re-entrancy flag was local to each
   binary's hook. When vm_call triggered JIT compilation internally,
   compiler functions got JIT-compiled during compilation, creating
   infinite cascades. Fix: shared _jit_compiling flag in sx_vm.ml,
   set in jit_compile_lambda itself.

2. call_closure always created new VMs: every HO primitive callback
   (for-each, map, filter) allocated a fresh VM. With 43K+ calls
   during compilation, this was the direct cause of hangs. Fix:
   call_closure_reuse reuses the active VM by isolating frames and
   running re-entrantly. VmSuspended is handled by merging frames
   for proper IO resumption.

3. vm_call for compiled Lambdas: OP_CALL dispatching to a Lambda
   with cached bytecode created a new VM instead of pushing a frame
   on the current one. Fix: push_closure_frame directly.

Additional MCP server fixes:
- Hot-reload: auto-execv when binary on disk is newer (no restart needed)
- Robust JSON: to_int_safe/to_int_or handle null, string, int params
- sx_summarise depth now optional (default 2)
- Per-request error handling (malformed JSON doesn't crash server)
- sx_test uses pre-built binary (skips dune rebuild overhead)
- Timed module loading for startup diagnostics

sx_server.ml fixes:
- Uses shared _jit_compiling flag
- Marks lambdas as jit_failed_sentinel on compile failure (no retry spam)
- call_closure_reuse with VmSuspended frame merging for IO support

Compiled compiler bytecode bug: deeply nested cond/case/let forms
(e.g. tw-resolve-style) cause the compiled compiler to loop.
Workaround: _jit_compiling guard prevents compiled function execution
during compilation. Compilation uses CEK (slower but correct).
Test suite: 3127/3127 passed.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-07 21:59:31 +00:00
parent 75130876c7
commit 03278c640d
3 changed files with 193 additions and 124 deletions

View File

@@ -133,6 +133,11 @@ let vm_report_counters () =
Printf.eprintf "[vm-perf] insns=%d calls=%d cek_fallbacks=%d comp_jit=%d comp_cek=%d\n%!"
!_vm_insn_count !_vm_call_count !_vm_cek_count !_vm_comp_jit_count !_vm_comp_cek_count
(** Global flag: true while a JIT compilation is in progress.
Prevents the JIT hook from intercepting calls during compilation,
which would cause infinite cascades (compiling the compiler). *)
let _jit_compiling = ref false
(** Push a VM closure frame onto the current VM — no new VM allocation.
This is the fast path for intra-VM closure calls. *)
let push_closure_frame vm cl args =
@@ -228,9 +233,30 @@ let rec call_closure cl args globals =
(** Call a VmClosure on the active VM if one exists, otherwise create a new one.
This is the path used by HO primitives (map, filter, for-each, some) so
callbacks can access upvalues that reference the calling VM's state. *)
callbacks run on the same VM, avoiding per-call VM allocation overhead. *)
and call_closure_reuse cl args =
call_closure cl args cl.vm_env_ref
match !_active_vm with
| Some vm ->
let saved_sp = vm.sp in
push_closure_frame vm cl args;
let saved_frames = List.tl vm.frames in
vm.frames <- [List.hd vm.frames];
(try run vm
with
| VmSuspended _ as e ->
(* IO suspension: merge remaining callback frames with caller frames
so the VM can be properly resumed. When resumed, it finishes the
callback then returns to the caller's frames. *)
vm.frames <- vm.frames @ saved_frames;
raise e
| e ->
vm.frames <- saved_frames;
vm.sp <- saved_sp;
raise e);
vm.frames <- saved_frames;
pop vm
| None ->
call_closure cl args cl.vm_env_ref
(** Call a value as a function — dispatch by type.
VmClosure: pushes frame on current VM (fast intra-VM path).
@@ -247,25 +273,18 @@ and vm_call vm f args =
| Lambda l ->
(match l.l_compiled with
| Some cl when not (is_jit_failed cl) ->
(* Cached bytecode — run on VM using the closure's captured env,
not the caller's globals. Closure vars were merged at compile time. *)
(try push vm (call_closure cl args cl.vm_env_ref)
with _e ->
(* Fallback to CEK — suspension-aware *)
push vm (cek_call_or_suspend vm f (List args)))
(* Cached bytecode — push frame on current VM *)
push_closure_frame vm cl args
| Some _ ->
(* Compile failed — CEK, suspension-aware *)
push vm (cek_call_or_suspend vm f (List args))
| None ->
if l.l_name <> None
then begin
(* Pre-mark before compile attempt to prevent re-entrancy *)
l.l_compiled <- Some jit_failed_sentinel;
match !jit_compile_ref l vm.globals with
| Some cl ->
l.l_compiled <- Some cl;
(try push vm (call_closure cl args cl.vm_env_ref)
with _e -> push vm (cek_call_or_suspend vm f (List args)))
push_closure_frame vm cl args
| None ->
push vm (cek_call_or_suspend vm f (List args))
end
@@ -784,9 +803,14 @@ let execute_module_safe code globals =
record so subsequent calls go straight to the VM. *)
let jit_compile_lambda (l : lambda) globals =
let fn_name = match l.l_name with Some n -> n | None -> "<anon>" in
if !_jit_compiling then (
(* Already compiling — prevent cascade. The CEK will handle this call. *)
None
) else
try
_jit_compiling := true;
let compile_fn = try Hashtbl.find globals "compile"
with Not_found -> raise (Eval_error "JIT: compiler not loaded") in
with Not_found -> (_jit_compiling := false; raise (Eval_error "JIT: compiler not loaded")) in
(* Reconstruct the (fn (params) body) form so the compiler produces
a proper closure. l.l_body is the inner body; we need the full
function form with params so the compiled code binds them. *)
@@ -800,12 +824,7 @@ let jit_compile_lambda (l : lambda) globals =
let compile_env = Sx_types.env_extend (Sx_types.make_env ()) in
Hashtbl.iter (fun k v -> Hashtbl.replace compile_env.bindings (Sx_types.intern k) v) globals;
let result = Sx_ref.eval_expr (List [Symbol "compile"; quoted]) (Env compile_env) in
(* Closure vars are accessible via vm_closure_env (set on the VmClosure
at line ~617). OP_GLOBAL_GET falls back to vm_closure_env when vars
aren't in globals. No injection into the shared globals table —
that would break closure isolation for factory functions like
make-page-fn where multiple closures capture different values
for the same variable names. *)
_jit_compiling := false;
let effective_globals = globals in
(match result with
| Dict d when Hashtbl.mem d "bytecode" ->
@@ -821,21 +840,13 @@ let jit_compile_lambda (l : lambda) globals =
else begin
Printf.eprintf "[jit] FAIL %s: closure index %d out of bounds (pool=%d)\n%!"
fn_name idx (Array.length outer_code.vc_constants);
None
end
end else begin
(* Not a closure — constant expression, alias, or simple computation.
Execute the bytecode as a module to get the value, then wrap
as a NativeFn if it's callable (so the CEK can dispatch to it). *)
(try
let value = execute_module outer_code globals in
Printf.eprintf "[jit] RESOLVED %s: %s (bc[0]=%d)\n%!"
fn_name (type_of value) (if Array.length bc > 0 then bc.(0) else -1);
(* If the resolved value is a NativeFn, we can't wrap it as a
vm_closure — just let the CEK handle it directly. Return None
so the lambda falls through to CEK, which will find the
resolved value in the env on next lookup. *)
None
with _ ->
Printf.eprintf "[jit] SKIP %s: non-closure execution failed (bc[0]=%d, len=%d)\n%!"
@@ -846,12 +857,13 @@ let jit_compile_lambda (l : lambda) globals =
Printf.eprintf "[jit] FAIL %s: compiler returned %s\n%!" fn_name (type_of result);
None)
with e ->
_jit_compiling := false;
Printf.eprintf "[jit] FAIL %s: %s\n%!" fn_name (Printexc.to_string e);
None
(* Wire up forward references *)
let () = jit_compile_ref := jit_compile_lambda
let () = _vm_call_closure_ref := (fun cl args -> call_closure cl args cl.vm_env_ref)
let () = _vm_call_closure_ref := (fun cl args -> call_closure_reuse cl args)
(** {1 Debugging / introspection} *)