VM upvalue support: closures capture variables from enclosing scopes

Compiler (compiler.sx):
- Function scopes marked is-function=true; let scopes share parent frame
- scope-resolve only creates upvalue captures at function boundaries
- Let scope locals use parent's slot numbering (same frame)
- OP_CLOSURE emits upvalue descriptors: (is_local, index) per capture

VM (sx_vm.ml):
- upvalue_cell type: shared mutable reference to captured value
- OP_UPVALUE_GET/SET: read/write from closure's upvalue array
- OP_CLOSURE: reads upvalue descriptors, creates cells from
  enclosing frame's locals (is_local=1) or upvalues (is_local=0)
- vm_closure carries live env_ref (not snapshot)
- vm_call falls back to CEK for Lambda/Component/Island values

Verified: (let ((x 10)) (let ((add-x (fn (y) (+ x y)))) (add-x 5)))
  Compiles to: CONST 10, LOC_SET #0, CLOSURE [UV_GET#0 LOC_GET#0 CPRIM+ RET]
  with upvalue descriptor: is_local=1 index=0
  VM executes → 15 ✓

Auto-compile: 6/117 functions compile (up from 3). Disabled until
compiler handles all features — fallback can't reconstruct closure
scope for variables like nav-state bound in caller's let*.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-19 20:13:17 +00:00
parent a62b7c8a5e
commit 1bb40415a8
4 changed files with 135 additions and 64 deletions

View File

@@ -820,18 +820,21 @@ let dispatch env cmd =
match result with
| Dict d when Hashtbl.mem d "bytecode" ->
let code = Sx_vm.code_from_value result in
let globals_snapshot = Hashtbl.copy env.bindings in
Hashtbl.iter (fun k v ->
Hashtbl.replace globals_snapshot k v) lam.l_closure.bindings;
(* VM closure with CEK fallback on error *)
(* Live env reference — NOT a snapshot. Functions see
current bindings, including later-defined functions. *)
let live_env = env.bindings in
(* Original lambda for CEK fallback *)
let orig_lambda = Lambda lam in
let fn = NativeFn ("vm:" ^ name, fun args ->
try
Sx_vm.execute_closure
{ Sx_vm.code; name = lam.l_name } args globals_snapshot
with _ ->
(* Fall back to CEK machine *)
Sx_ref.cek_call orig_lambda (List args)) in
Sx_vm.call_closure
{ Sx_vm.code; upvalues = [||]; name = lam.l_name;
env_ref = live_env }
args live_env
with
| _ ->
(* Any VM error — fall back to CEK *)
Sx_ref.eval_expr (List (orig_lambda :: args)) (Env env)) in
Hashtbl.replace env.bindings name fn;
incr count
| _ -> incr failed

View File

@@ -17,17 +17,26 @@ type code = {
constants : value array;
}
(** Closure — code + captured upvalues. *)
(** Upvalue cell — shared mutable reference to a captured variable.
Open when the variable is still on the stack; closed when the
enclosing frame returns and the value is moved to the heap. *)
type upvalue_cell = {
mutable uv_value : value;
}
(** Closure — code + captured upvalues + live env reference. *)
type vm_closure = {
code : code;
upvalues : upvalue_cell array;
name : string option;
env_ref : (string, value) Hashtbl.t; (* live global env — NOT a snapshot *)
}
(** Call frame — one per function invocation. *)
type frame = {
closure : vm_closure;
closure : vm_closure;
mutable ip : int;
base : int; (* base index in value stack for locals *)
base : int; (* base index in value stack for locals *)
}
(** VM state. *)
@@ -35,13 +44,11 @@ type vm = {
mutable stack : value array;
mutable sp : int;
mutable frames : frame list;
globals : (string, value) Hashtbl.t;
globals : (string, value) Hashtbl.t; (* live reference to kernel env *)
}
let create globals =
let g = Hashtbl.create 256 in
Hashtbl.iter (fun k v -> Hashtbl.replace g k v) globals;
{ stack = Array.make 4096 Nil; sp = 0; frames = []; globals = g }
{ stack = Array.make 4096 Nil; sp = 0; frames = []; globals }
(** Stack ops — inlined for speed. *)
let push vm v =
@@ -74,13 +81,21 @@ let[@inline] read_i16 f =
let v = read_u16 f in
if v >= 32768 then v - 65536 else v
(** Wrap a VM closure as an SX value (NativeFn). *)
let closure_to_value cl =
NativeFn ("vm:" ^ (match cl.name with Some n -> n | None -> "anon"),
fun args -> raise (Eval_error ("VM_CLOSURE_CALL:" ^ String.concat "," (List.map Sx_runtime.value_to_str args))))
(* Placeholder — actual calls go through vm_call below *)
(** Main execution loop. *)
let rec run vm =
match vm.frames with
| [] -> failwith "VM: no frame"
| [] -> () (* no frame = done *)
| frame :: rest_frames ->
let bc = frame.closure.code.bytecode in
let consts = frame.closure.code.constants in
if frame.ip >= Array.length bc then () (* ran off end *)
else
let op = bc.(frame.ip) in
frame.ip <- frame.ip + 1;
match op with
@@ -104,6 +119,14 @@ let rec run vm =
let slot = read_u8 frame in
vm.stack.(frame.base + slot) <- peek vm;
run vm
| 18 (* OP_UPVALUE_GET *) ->
let idx = read_u8 frame in
push vm frame.closure.upvalues.(idx).uv_value;
run vm
| 19 (* OP_UPVALUE_SET *) ->
let idx = read_u8 frame in
frame.closure.upvalues.(idx).uv_value <- peek vm;
run vm
| 20 (* OP_GLOBAL_GET *) ->
let idx = read_u16 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
@@ -139,30 +162,62 @@ let rec run vm =
let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let f = pop vm in
vm_call vm f (Array.to_list (Array.of_list (List.rev (Array.to_list args))));
let args_list = List.rev (Array.to_list args) in
vm_call vm f args_list;
run vm
| 49 (* OP_TAIL_CALL *) ->
let argc = read_u8 frame in
let args = Array.init argc (fun _ -> pop vm) in
let f = pop vm in
let args_list = List.rev (Array.to_list args) in
(* Tail call: pop current frame, reuse stack space *)
vm.frames <- rest_frames;
vm.sp <- frame.base;
vm_call vm f (Array.to_list (Array.of_list (List.rev (Array.to_list args))));
vm_call vm f args_list;
run vm
| 50 (* OP_RETURN *) ->
let result = pop vm in
vm.frames <- rest_frames;
vm.sp <- frame.base;
push vm result
(* Return to caller — don't recurse *)
(* Return — don't recurse, let caller continue *)
| 51 (* OP_CLOSURE *) ->
let idx = read_u16 frame in
let code_val = consts.(idx) in
let code = code_from_value code_val in
(* Read upvalue descriptors from bytecode *)
let uv_count = match code_val with
| Dict d -> (match Hashtbl.find_opt d "upvalue-count" with
| Some (Number n) -> int_of_float n | _ -> 0)
| _ -> 0
in
let upvalues = Array.init uv_count (fun _ ->
let is_local = read_u8 frame in
let index = read_u8 frame in
if is_local = 1 then
(* Capture from enclosing frame's local slot *)
{ uv_value = vm.stack.(frame.base + index) }
else
(* Capture from enclosing frame's upvalue *)
{ uv_value = frame.closure.upvalues.(index).uv_value }
) in
let cl = { code; upvalues; name = None; env_ref = vm.globals } in
(* Wrap as NativeFn that calls back into the VM *)
let fn = NativeFn ("vm-closure", fun args ->
call_closure cl args vm.globals)
in
push vm fn;
run vm
| 52 (* OP_CALL_PRIM *) ->
let idx = read_u16 frame in
let argc = read_u8 frame in
let name = match consts.(idx) with String s -> s | _ -> "" in
let args = List.init argc (fun _ -> pop vm) |> List.rev in
let result = (match Sx_primitives.get_primitive name with
NativeFn (_, fn) -> fn args | _ -> Nil) in
let result =
(match Sx_primitives.get_primitive name with
| NativeFn (_, fn) -> fn args
| _ -> Nil)
in
push vm result;
run vm
@@ -200,28 +255,19 @@ let rec run vm =
Hashtbl.replace vm.globals name v;
run vm
(* ---- Closure ---- *)
| 51 (* OP_CLOSURE *) ->
let idx = read_u16 frame in
(* The constant pool entry is a code dict from the compiler *)
let code_val = consts.(idx) in
let code = code_from_value code_val in
let cl = { code; name = None } in
push vm (NativeFn ("vm-closure", fun args ->
execute_closure cl args vm.globals));
run vm
| opcode ->
(* Unknown opcode — fall back to CEK machine *)
raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d"
opcode (frame.ip - 1)))
(** Call a value as a function — dispatch by type. *)
and vm_call vm f args =
match f with
| NativeFn (_, fn) ->
| NativeFn (_name, fn) ->
let result = fn args in
push vm result
| Lambda _ ->
(* Call a CEK-defined lambda through the VM *)
| Lambda _ | Component _ | Island _ ->
(* Fall back to CEK machine for SX-defined functions *)
let result = Sx_ref.cek_call f (List args) in
push vm result
| _ ->
@@ -240,12 +286,9 @@ and code_from_value v =
| Some (List l | ListRef { contents = l }) -> Array.of_list l
| _ -> [||]
in
(* Recursively convert nested code objects in the pool *)
let constants = Array.map (fun entry ->
match entry with
| Dict ed when Hashtbl.mem ed "bytecode" ->
(* Nested code object — keep as Dict for lazy conversion *)
entry
| Dict ed when Hashtbl.mem ed "bytecode" -> entry (* nested code — convert lazily *)
| _ -> entry
) entries in
let arity = match Hashtbl.find_opt d "arity" with
@@ -254,8 +297,9 @@ and code_from_value v =
{ arity; locals = arity + 16; bytecode = bc_list; constants }
| _ -> { arity = 0; locals = 16; bytecode = [||]; constants = [||] }
(** Execute a closure with arguments. *)
and execute_closure cl args globals =
(** Execute a closure with arguments — creates a new VM frame.
The closure carries its upvalue cells for captured variables. *)
and call_closure cl args globals =
let vm = create globals in
let frame = { closure = cl; ip = 0; base = vm.sp } in
(* Push args as locals *)
@@ -268,10 +312,9 @@ and execute_closure cl args globals =
(** Execute a compiled module (top-level bytecode). *)
let execute_module code globals =
let cl = { code; name = Some "module" } in
let cl = { code; upvalues = [||]; name = Some "module"; env_ref = globals } in
let vm = create globals in
let frame = { closure = cl; ip = 0; base = 0 } in
(* Pad locals *)
for _ = 0 to code.locals - 1 do push vm Nil done;
vm.frames <- [frame];
run vm;