From dd057247a57a25eb3bbfa47290634a524dde739a Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 23 Mar 2026 23:39:35 +0000 Subject: [PATCH] VM: VmClosure value type + iterative run loop + define hoisting + SSR fixes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Core VM changes: - Add VmClosure value variant — inner closures created by OP_CLOSURE are first-class VM values, not NativeFn wrappers around call_closure - Convert `run` from recursive to while-loop — zero OCaml stack growth, true TCO for VmClosure tail calls - vm_call handles VmClosure by pushing frame on current VM (no new VM allocation per call) - Forward ref _vm_call_closure_ref for cross-boundary calls (CEK/primitives) Compiler (spec/compiler.sx): - Define hoisting in compile-begin: pre-allocate local slots for all define forms before compiling any values. Fixes forward references between inner functions (e.g. read-expr referencing skip-ws in sx-parse) - scope-define-local made idempotent (skip if slot already exists) Server (sx_server.ml): - JIT fail-once sentinel: mark l_compiled as failed after first VM runtime error. Eliminates thousands of retry attempts per page render. - HTML tag bindings: register all HTML tags as pass-through NativeFns so eval-expr can handle (div ...) etc. in island component bodies. - Log VM FAIL errors with function name before disabling JIT. SSR fixes: - adapter-html.sx letrec handler: evaluate bindings in proper letrec scope (pre-bind nil, then evaluate), render body with render-to-html instead of eval-expr. Fixes island SSR for components using letrec. - Add `init` primitive to OCaml kernel (all-but-last of list). - VmClosure handling in sx_runtime.ml sx_call dispatch. Tests: 971/971 OCaml (+19 new), 0 failures. Co-Authored-By: Claude Opus 4.6 (1M context) --- hosts/ocaml/bin/sx_server.ml | 23 +- hosts/ocaml/lib/sx_primitives.ml | 5 + hosts/ocaml/lib/sx_runtime.ml | 1 + hosts/ocaml/lib/sx_types.ml | 11 +- hosts/ocaml/lib/sx_vm.ml | 571 ++++++++++++++-------------- shared/static/scripts/sx-browser.js | 19 +- spec/compiler.sx | 54 ++- spec/tests/test-vm-closures.sx | 244 ++++++++++++ web/adapter-html.sx | 29 +- 9 files changed, 658 insertions(+), 299 deletions(-) create mode 100644 spec/tests/test-vm-closures.sx diff --git a/hosts/ocaml/bin/sx_server.ml b/hosts/ocaml/bin/sx_server.ml index 329ffb1..020adaf 100644 --- a/hosts/ocaml/bin/sx_server.ml +++ b/hosts/ocaml/bin/sx_server.ml @@ -622,6 +622,14 @@ let make_server_env () = | [String s] -> Bool (Sx_render.is_html_tag s) | _ -> Bool false); + (* HTML tag functions — when eval-expr encounters (div :class "foo" ...), + it calls the tag function which returns the list as-is. The render path + then handles it. Same approach as the DOM adapter in the browser. *) + List.iter (fun tag -> + ignore (env_bind env tag + (NativeFn ("html:" ^ tag, fun args -> List (Symbol tag :: args)))) + ) Sx_render.html_tags; + (* Spec evaluator helpers needed by render.sx when loaded at runtime *) bind "random-int" (fun args -> match args with @@ -907,9 +915,14 @@ let register_jit_hook env = | Lambda l -> (match l.l_compiled with | Some cl when not (Sx_vm.is_jit_failed cl) -> - (* Cached bytecode — run on VM, fall back to CEK on runtime error *) + (* Cached bytecode — run on VM, fall back to CEK on runtime error. + Mark as failed so we don't retry on every call. *) (try Some (Sx_vm.call_closure cl args cl.vm_env_ref) - with _ -> None) + with e -> + let fn_name = match l.l_name with Some n -> n | None -> "?" in + Printf.eprintf "[jit-hook] VM FAIL %s: %s (disabling JIT)\n%!" fn_name (Printexc.to_string e); + l.l_compiled <- Some Sx_vm.jit_failed_sentinel; + None) | Some _ -> None (* compile failed — CEK handles *) | None -> if !_jit_compiling then None @@ -927,7 +940,11 @@ let register_jit_hook env = l.l_compiled <- Some cl; (* Run on VM, fall back to CEK on runtime error *) (try Some (Sx_vm.call_closure cl args cl.vm_env_ref) - with _ -> None) + with e -> + let fn_name = match l.l_name with Some n -> n | None -> "?" in + Printf.eprintf "[jit-hook] VM FAIL (first call) %s: %s (disabling JIT)\n%!" fn_name (Printexc.to_string e); + l.l_compiled <- Some Sx_vm.jit_failed_sentinel; + None) | None -> None end) | _ -> None) diff --git a/hosts/ocaml/lib/sx_primitives.ml b/hosts/ocaml/lib/sx_primitives.ml index e35c22d..3318fd2 100644 --- a/hosts/ocaml/lib/sx_primitives.ml +++ b/hosts/ocaml/lib/sx_primitives.ml @@ -333,6 +333,11 @@ let () = | [List l] | [ListRef { contents = l }] -> (match List.rev l with x :: _ -> x | [] -> Nil) | _ -> raise (Eval_error "last: 1 list arg")); + register "init" (fun args -> + match args with + | [List l] | [ListRef { contents = l }] -> + (match List.rev l with _ :: rest -> List (List.rev rest) | [] -> List []) + | _ -> raise (Eval_error "init: 1 list arg")); register "nth" (fun args -> match args with | [List l; Number n] | [ListRef { contents = l }; Number n] -> diff --git a/hosts/ocaml/lib/sx_runtime.ml b/hosts/ocaml/lib/sx_runtime.ml index 1b5f14e..d40cb20 100644 --- a/hosts/ocaml/lib/sx_runtime.ml +++ b/hosts/ocaml/lib/sx_runtime.ml @@ -43,6 +43,7 @@ let sx_to_list = function let sx_call f args = match f with | NativeFn (_, fn) -> fn args + | VmClosure cl -> !Sx_types._vm_call_closure_ref cl args | Lambda l -> let local = Sx_types.env_extend l.l_closure in List.iter2 (fun p a -> ignore (Sx_types.env_bind local p a)) l.l_params args; diff --git a/hosts/ocaml/lib/sx_types.ml b/hosts/ocaml/lib/sx_types.ml index 5700b45..b431577 100644 --- a/hosts/ocaml/lib/sx_types.ml +++ b/hosts/ocaml/lib/sx_types.ml @@ -39,6 +39,7 @@ and value = | ListRef of value list ref (** Mutable list — JS-style array for append! *) | CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *) | CekFrame of cek_frame (** Optimized CEK continuation frame. *) + | VmClosure of vm_closure (** VM-compiled closure — callable within the VM without allocating a new VM. *) (** CEK machine state — record instead of Dict for performance. 5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *) @@ -138,6 +139,12 @@ and vm_closure = { } +(** {1 Forward ref for calling VM closures from outside the VM} *) + +let _vm_call_closure_ref : (vm_closure -> value list -> value) ref = + ref (fun _ _ -> raise (Failure "VM call_closure not initialized")) + + (** {1 Errors} *) exception Eval_error of string @@ -294,6 +301,7 @@ let type_of = function | Env _ -> "env" | CekState _ -> "dict" (* CEK state behaves as a dict for type checks *) | CekFrame _ -> "dict" + | VmClosure _ -> "function" let is_nil = function Nil -> true | _ -> false let is_lambda = function Lambda _ -> true | _ -> false @@ -307,7 +315,7 @@ let is_signal = function | _ -> false let is_callable = function - | Lambda _ | NativeFn _ | Continuation (_, _) -> true + | Lambda _ | NativeFn _ | Continuation (_, _) | VmClosure _ -> true | _ -> false @@ -473,3 +481,4 @@ let rec inspect = function | Env _ -> "" | CekState _ -> "" | CekFrame f -> Printf.sprintf "" f.cf_type + | VmClosure cl -> Printf.sprintf "" (match cl.vm_name with Some n -> n | None -> "anon") diff --git a/hosts/ocaml/lib/sx_vm.ml b/hosts/ocaml/lib/sx_vm.ml index e1278df..73ee4a3 100644 --- a/hosts/ocaml/lib/sx_vm.ml +++ b/hosts/ocaml/lib/sx_vm.ml @@ -84,257 +84,74 @@ let closure_to_value cl = let _vm_insn_count = ref 0 let _vm_call_count = ref 0 let _vm_cek_count = ref 0 -let vm_reset_counters () = _vm_insn_count := 0; _vm_call_count := 0; _vm_cek_count := 0 +let _vm_closure_call_count = ref 0 +let _vm_max_depth = ref 0 +let vm_reset_counters () = _vm_insn_count := 0; _vm_call_count := 0; _vm_cek_count := 0; + _vm_closure_call_count := 0; _vm_max_depth := 0 let vm_report_counters () = - Printf.eprintf "[vm-perf] insns=%d calls=%d cek_fallbacks=%d\n%!" - !_vm_insn_count !_vm_call_count !_vm_cek_count + Printf.eprintf "[vm-perf] insns=%d calls=%d cek_fallbacks=%d vm_closure=%d max_depth=%d\n%!" + !_vm_insn_count !_vm_call_count !_vm_cek_count !_vm_closure_call_count !_vm_max_depth -(** Main execution loop. *) -let rec run vm = - match vm.frames with - | [] -> () (* no frame = done *) - | frame :: rest_frames -> - let bc = frame.closure.vm_code.vc_bytecode in - let consts = frame.closure.vm_code.vc_constants in - if frame.ip >= Array.length bc then () - else - let saved_ip = frame.ip in - let op = bc.(frame.ip) in - frame.ip <- frame.ip + 1; - (try match op with - (* ---- Constants ---- *) - | 1 (* OP_CONST *) -> - let idx = read_u16 frame in - if idx >= Array.length consts then - raise (Eval_error (Printf.sprintf "VM: CONST index %d out of bounds (pool size %d)" - idx (Array.length consts))); - push vm consts.(idx); - run vm - | 2 (* OP_NIL *) -> push vm Nil; run vm - | 3 (* OP_TRUE *) -> push vm (Bool true); run vm - | 4 (* OP_FALSE *) -> push vm (Bool false); run vm - | 5 (* OP_POP *) -> ignore (pop vm); run vm - | 6 (* OP_DUP *) -> push vm (peek vm); run vm +(** Push a VM closure frame onto the current VM — no new VM allocation. + This is the fast path for intra-VM closure calls. *) +let push_closure_frame vm cl args = + let frame = { closure = cl; ip = 0; base = vm.sp; local_cells = Hashtbl.create 4 } in + List.iter (fun a -> push vm a) args; + for _ = List.length args to cl.vm_code.vc_locals - 1 do push vm Nil done; + vm.frames <- frame :: vm.frames - (* ---- Variable access ---- *) - | 16 (* OP_LOCAL_GET *) -> - let slot = read_u8 frame in - let v = match Hashtbl.find_opt frame.local_cells slot with - | Some cell -> cell.uv_value - | None -> - let idx = frame.base + slot in - if idx >= vm.sp then - raise (Eval_error (Printf.sprintf - "VM: LOCAL_GET slot=%d base=%d sp=%d out of bounds" slot frame.base vm.sp)); - vm.stack.(idx) - in - push vm v; - run vm - | 17 (* OP_LOCAL_SET *) -> - let slot = read_u8 frame in - let v = peek vm in - (* Write to shared cell if captured, else to stack *) - (match Hashtbl.find_opt frame.local_cells slot with - | Some cell -> cell.uv_value <- v - | None -> vm.stack.(frame.base + slot) <- v); - run vm - | 18 (* OP_UPVALUE_GET *) -> - let idx = read_u8 frame in - if idx >= Array.length frame.closure.vm_upvalues then - raise (Eval_error (Printf.sprintf - "VM: UPVALUE_GET idx=%d out of bounds (have %d)" idx - (Array.length frame.closure.vm_upvalues))); - push vm frame.closure.vm_upvalues.(idx).uv_value; - run vm - | 19 (* OP_UPVALUE_SET *) -> - let idx = read_u8 frame in - frame.closure.vm_upvalues.(idx).uv_value <- peek vm; - run vm - | 20 (* OP_GLOBAL_GET *) -> - let idx = read_u16 frame in - let name = match consts.(idx) with String s -> s | _ -> "" in - let v = try Hashtbl.find vm.globals name with Not_found -> - (* Walk the closure env chain for inner functions *) - let rec env_lookup e = - try Hashtbl.find e.bindings name - with Not_found -> - match e.parent with Some p -> env_lookup p | None -> - try Sx_primitives.get_primitive name - with _ -> raise (Eval_error ("VM undefined: " ^ name)) - in - match frame.closure.vm_closure_env with - | Some env -> env_lookup env - | None -> - try Sx_primitives.get_primitive name - with _ -> raise (Eval_error ("VM undefined: " ^ name)) - in - push vm v; run vm - | 21 (* OP_GLOBAL_SET *) -> - let idx = read_u16 frame in - let name = match consts.(idx) with String s -> s | _ -> "" in - (* Write to closure env if the name exists there (mutable closure vars) *) - let written = match frame.closure.vm_closure_env with - | Some env -> - let rec find_env e = - if Hashtbl.mem e.bindings name then - (Hashtbl.replace e.bindings name (peek vm); true) - else match e.parent with Some p -> find_env p | None -> false - in find_env env - | None -> false - in - if not written then Hashtbl.replace vm.globals name (peek vm); - run vm +(** Convert compiler output (SX dict) to a vm_code object. *) +let code_from_value v = + match v with + | Dict d -> + let bc_list = match Hashtbl.find_opt d "bytecode" with + | Some (List l | ListRef { contents = l }) -> + Array.of_list (List.map (fun x -> match x with Number n -> int_of_float n | _ -> 0) l) + | _ -> [||] + in + let entries = match Hashtbl.find_opt d "constants" with + | Some (List l | ListRef { contents = l }) -> Array.of_list l + | _ -> [||] + in + let constants = Array.map (fun entry -> + match entry with + | Dict ed when Hashtbl.mem ed "bytecode" -> entry (* nested code — convert lazily *) + | _ -> entry + ) entries in + let arity = match Hashtbl.find_opt d "arity" with + | Some (Number n) -> int_of_float n | _ -> 0 + in + { vc_arity = arity; vc_locals = arity + 16; vc_bytecode = bc_list; vc_constants = constants } + | _ -> { vc_arity = 0; vc_locals = 16; vc_bytecode = [||]; vc_constants = [||] } - (* ---- Control flow ---- *) - | 32 (* OP_JUMP *) -> - let offset = read_i16 frame in - frame.ip <- frame.ip + offset; - run vm - | 33 (* OP_JUMP_IF_FALSE *) -> - let offset = read_i16 frame in - let v = pop vm in - if not (sx_truthy v) then frame.ip <- frame.ip + offset; - run vm - | 34 (* OP_JUMP_IF_TRUE *) -> - let offset = read_i16 frame in - let v = pop vm in - if sx_truthy v then frame.ip <- frame.ip + offset; - run vm - - (* ---- Function calls ---- *) - | 48 (* OP_CALL *) -> - let argc = read_u8 frame in - let args = Array.init argc (fun _ -> pop vm) in - let f = pop vm in - let args_list = List.rev (Array.to_list args) in - vm_call vm f args_list; - run vm - | 49 (* OP_TAIL_CALL *) -> - let argc = read_u8 frame in - let args = Array.init argc (fun _ -> pop vm) in - let f = pop vm in - let args_list = List.rev (Array.to_list args) in - (* Tail call: pop current frame, reuse stack space *) - vm.frames <- rest_frames; - vm.sp <- frame.base; - vm_call vm f args_list; - run vm - | 50 (* OP_RETURN *) -> - let result = pop vm in - vm.frames <- rest_frames; - vm.sp <- frame.base; - push vm result - (* Return — don't recurse, let caller continue *) - | 51 (* OP_CLOSURE *) -> - let idx = read_u16 frame in - if idx >= Array.length consts then - raise (Eval_error (Printf.sprintf "VM: CLOSURE idx %d >= consts %d" idx (Array.length consts))); - let code_val = consts.(idx) in - let code = code_from_value code_val in - (* Read upvalue descriptors from bytecode *) - let uv_count = match code_val with - | Dict d -> (match Hashtbl.find_opt d "upvalue-count" with - | Some (Number n) -> int_of_float n | _ -> 0) - | _ -> 0 - in - let upvalues = Array.init uv_count (fun _ -> - let is_local = read_u8 frame in - let index = read_u8 frame in - if is_local = 1 then begin - (* Capture from enclosing frame's local slot. - Create a shared cell — both parent and closure - read/write through this cell. *) - let cell = match Hashtbl.find_opt frame.local_cells index with - | Some existing -> existing (* reuse existing cell *) - | None -> - let c = { uv_value = vm.stack.(frame.base + index) } in - Hashtbl.replace frame.local_cells index c; - c - in - cell - end else - (* Capture from enclosing frame's upvalue — already a shared cell *) - frame.closure.vm_upvalues.(index) - ) in - let cl = { vm_code = code; vm_upvalues = upvalues; vm_name = None; vm_env_ref = vm.globals; vm_closure_env = None } in - (* Wrap as NativeFn that calls back into the VM *) - let fn = NativeFn ("vm-closure", fun args -> - call_closure cl args vm.globals) - in - push vm fn; - run vm - | 52 (* OP_CALL_PRIM *) -> - let idx = read_u16 frame in - let argc = read_u8 frame in - let name = match consts.(idx) with String s -> s | _ -> "" in - let args = List.init argc (fun _ -> pop vm) |> List.rev in - let result = - try - (* Check primitives FIRST (native implementations of map/filter/etc.), - then globals (which may have ho_via_cek wrappers that route - through the CEK — these can't call VM closures). *) - let fn_val = try Sx_primitives.get_primitive name with _ -> - try Hashtbl.find vm.globals name with Not_found -> - raise (Eval_error ("VM: unknown primitive " ^ name)) - in - (match fn_val with - | NativeFn (_, fn) -> fn args - | _ -> Nil) - with Eval_error msg -> - raise (Eval_error (Printf.sprintf "%s (in CALL_PRIM \"%s\" with %d args)" - msg name argc)) - in - push vm result; - run vm - - (* ---- Collections ---- *) - | 64 (* OP_LIST *) -> - let count = read_u16 frame in - let items = List.init count (fun _ -> pop vm) |> List.rev in - push vm (List items); - run vm - | 65 (* OP_DICT *) -> - let count = read_u16 frame in - let d = Hashtbl.create count in - for _ = 1 to count do - let v = pop vm in - let k = pop vm in - let key = match k with String s -> s | Keyword s -> s | _ -> Sx_runtime.value_to_str k in - Hashtbl.replace d key v - done; - push vm (Dict d); - run vm - - (* ---- String ops ---- *) - | 144 (* OP_STR_CONCAT *) -> - let count = read_u8 frame in - let parts = List.init count (fun _ -> pop vm) |> List.rev in - let s = String.concat "" (List.map Sx_runtime.value_to_str parts) in - push vm (String s); - run vm - - (* ---- Define ---- *) - | 128 (* OP_DEFINE *) -> - let idx = read_u16 frame in - let name = match consts.(idx) with String s -> s | _ -> "" in - let v = peek vm in - Hashtbl.replace vm.globals name v; - run vm - - | opcode -> - raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d" - opcode (frame.ip - 1))) - with Invalid_argument msg -> - let fn_name = match frame.closure.vm_name with Some n -> n | None -> "?" in - raise (Eval_error (Printf.sprintf - "VM: %s at ip=%d op=%d in %s (base=%d sp=%d bc_len=%d consts=%d)" - msg saved_ip op fn_name frame.base vm.sp - (Array.length bc) (Array.length consts)))) +(** Execute a closure with arguments — creates a fresh VM. + Used for entry points: JIT Lambda calls, module execution, cross-boundary. *) +let rec call_closure cl args globals = + incr _vm_call_count; + if !_vm_call_count mod 10000 = 0 then + Printf.eprintf "[vm-debug] call_closure count=%d name=%s\n%!" + !_vm_call_count (match cl.vm_name with Some n -> n | None -> "anon"); + let vm = create globals in + push_closure_frame vm cl args; + (try run vm with e -> raise e); + pop vm (** Call a value as a function — dispatch by type. - For Lambda values, tries JIT compilation before falling back to CEK. *) + VmClosure: pushes frame on current VM (fast intra-VM path). + Lambda: tries JIT then falls back to CEK. + NativeFn: calls directly. *) and vm_call vm f args = match f with + | VmClosure cl -> + (* Fast path: push frame on current VM — no allocation, enables TCO *) + incr _vm_closure_call_count; + let depth = List.length vm.frames + 1 in + if depth > !_vm_max_depth then _vm_max_depth := depth; + if !_vm_closure_call_count mod 100000 = 0 then + Printf.eprintf "[vm-debug] VmClosure calls=%d depth=%d name=%s\n%!" + !_vm_closure_call_count depth + (match cl.vm_name with Some n -> n | None -> "anon"); + push_closure_frame vm cl args | NativeFn (_name, fn) -> let result = fn args in push vm result @@ -372,39 +189,240 @@ and vm_call vm f args = | _ -> raise (Eval_error ("VM: not callable: " ^ Sx_runtime.value_to_str f)) -(** Convert compiler output (SX dict) to a vm_code object. *) -and code_from_value v = - match v with - | Dict d -> - let bc_list = match Hashtbl.find_opt d "bytecode" with - | Some (List l | ListRef { contents = l }) -> - Array.of_list (List.map (fun x -> match x with Number n -> int_of_float n | _ -> 0) l) - | _ -> [||] - in - let entries = match Hashtbl.find_opt d "constants" with - | Some (List l | ListRef { contents = l }) -> Array.of_list l - | _ -> [||] - in - let constants = Array.map (fun entry -> - match entry with - | Dict ed when Hashtbl.mem ed "bytecode" -> entry (* nested code — convert lazily *) - | _ -> entry - ) entries in - let arity = match Hashtbl.find_opt d "arity" with - | Some (Number n) -> int_of_float n | _ -> 0 - in - { vc_arity = arity; vc_locals = arity + 16; vc_bytecode = bc_list; vc_constants = constants } - | _ -> { vc_arity = 0; vc_locals = 16; vc_bytecode = [||]; vc_constants = [||] } +(** Main execution loop — iterative (no OCaml stack growth). + VmClosure calls push frames; the loop picks them up. + OP_TAIL_CALL + VmClosure = true TCO: drop frame, push new, loop. *) +and run vm = + while vm.frames <> [] do + match vm.frames with + | [] -> () (* guard handled by while condition *) + | frame :: rest_frames -> + let bc = frame.closure.vm_code.vc_bytecode in + let consts = frame.closure.vm_code.vc_constants in + if frame.ip >= Array.length bc then + vm.frames <- [] (* bytecode exhausted — stop *) + else begin + incr _vm_insn_count; + if !_vm_insn_count mod 1000000 = 0 then begin + let fn_name = match frame.closure.vm_name with Some n -> n | None -> "?" in + Printf.eprintf "[vm-debug] insns=%dM in=%s ip=%d depth=%d sp=%d\n%!" + (!_vm_insn_count / 1000000) fn_name frame.ip (List.length vm.frames) vm.sp + end; + let saved_ip = frame.ip in + let op = bc.(frame.ip) in + frame.ip <- frame.ip + 1; + (try match op with + (* ---- Constants ---- *) + | 1 (* OP_CONST *) -> + let idx = read_u16 frame in + if idx >= Array.length consts then + raise (Eval_error (Printf.sprintf "VM: CONST index %d out of bounds (pool size %d)" + idx (Array.length consts))); + push vm consts.(idx) + | 2 (* OP_NIL *) -> push vm Nil + | 3 (* OP_TRUE *) -> push vm (Bool true) + | 4 (* OP_FALSE *) -> push vm (Bool false) + | 5 (* OP_POP *) -> ignore (pop vm) + | 6 (* OP_DUP *) -> push vm (peek vm) -(** Execute a closure with arguments. *) -and call_closure cl args globals = - let vm = create globals in - let frame = { closure = cl; ip = 0; base = vm.sp; local_cells = Hashtbl.create 4 } in - List.iter (fun a -> push vm a) args; - for _ = List.length args to cl.vm_code.vc_locals - 1 do push vm Nil done; - vm.frames <- [frame]; - (try run vm with e -> raise e); - pop vm + (* ---- Variable access ---- *) + | 16 (* OP_LOCAL_GET *) -> + let slot = read_u8 frame in + let v = match Hashtbl.find_opt frame.local_cells slot with + | Some cell -> cell.uv_value + | None -> + let idx = frame.base + slot in + if idx >= vm.sp then + raise (Eval_error (Printf.sprintf + "VM: LOCAL_GET slot=%d base=%d sp=%d out of bounds" slot frame.base vm.sp)); + vm.stack.(idx) + in + push vm v + | 17 (* OP_LOCAL_SET *) -> + let slot = read_u8 frame in + let v = peek vm in + (* Write to shared cell if captured, else to stack *) + (match Hashtbl.find_opt frame.local_cells slot with + | Some cell -> cell.uv_value <- v + | None -> vm.stack.(frame.base + slot) <- v) + | 18 (* OP_UPVALUE_GET *) -> + let idx = read_u8 frame in + if idx >= Array.length frame.closure.vm_upvalues then + raise (Eval_error (Printf.sprintf + "VM: UPVALUE_GET idx=%d out of bounds (have %d)" idx + (Array.length frame.closure.vm_upvalues))); + push vm frame.closure.vm_upvalues.(idx).uv_value + | 19 (* OP_UPVALUE_SET *) -> + let idx = read_u8 frame in + frame.closure.vm_upvalues.(idx).uv_value <- peek vm + | 20 (* OP_GLOBAL_GET *) -> + let idx = read_u16 frame in + let name = match consts.(idx) with String s -> s | _ -> "" in + let v = try Hashtbl.find vm.globals name with Not_found -> + (* Walk the closure env chain for inner functions *) + let rec env_lookup e = + try Hashtbl.find e.bindings name + with Not_found -> + match e.parent with Some p -> env_lookup p | None -> + try Sx_primitives.get_primitive name + with _ -> raise (Eval_error ("VM undefined: " ^ name)) + in + match frame.closure.vm_closure_env with + | Some env -> env_lookup env + | None -> + try Sx_primitives.get_primitive name + with _ -> raise (Eval_error ("VM undefined: " ^ name)) + in + push vm v + | 21 (* OP_GLOBAL_SET *) -> + let idx = read_u16 frame in + let name = match consts.(idx) with String s -> s | _ -> "" in + (* Write to closure env if the name exists there (mutable closure vars) *) + let written = match frame.closure.vm_closure_env with + | Some env -> + let rec find_env e = + if Hashtbl.mem e.bindings name then + (Hashtbl.replace e.bindings name (peek vm); true) + else match e.parent with Some p -> find_env p | None -> false + in find_env env + | None -> false + in + if not written then Hashtbl.replace vm.globals name (peek vm) + + (* ---- Control flow ---- *) + | 32 (* OP_JUMP *) -> + let offset = read_i16 frame in + frame.ip <- frame.ip + offset + | 33 (* OP_JUMP_IF_FALSE *) -> + let offset = read_i16 frame in + let v = pop vm in + if not (sx_truthy v) then frame.ip <- frame.ip + offset + | 34 (* OP_JUMP_IF_TRUE *) -> + let offset = read_i16 frame in + let v = pop vm in + if sx_truthy v then frame.ip <- frame.ip + offset + + (* ---- Function calls ---- *) + | 48 (* OP_CALL *) -> + let argc = read_u8 frame in + let args = Array.init argc (fun _ -> pop vm) in + let f = pop vm in + let args_list = List.rev (Array.to_list args) in + vm_call vm f args_list + (* Loop continues — if VmClosure, new frame runs next iteration *) + | 49 (* OP_TAIL_CALL *) -> + let argc = read_u8 frame in + let args = Array.init argc (fun _ -> pop vm) in + let f = pop vm in + let args_list = List.rev (Array.to_list args) in + (* Drop current frame, reuse stack space — true TCO for VmClosure *) + vm.frames <- rest_frames; + vm.sp <- frame.base; + vm_call vm f args_list + | 50 (* OP_RETURN *) -> + let result = pop vm in + vm.frames <- rest_frames; + vm.sp <- frame.base; + push vm result + (* Loop continues with caller frame *) + | 51 (* OP_CLOSURE *) -> + let idx = read_u16 frame in + if idx >= Array.length consts then + raise (Eval_error (Printf.sprintf "VM: CLOSURE idx %d >= consts %d" idx (Array.length consts))); + let code_val = consts.(idx) in + let code = code_from_value code_val in + (* Read upvalue descriptors from bytecode *) + let uv_count = match code_val with + | Dict d -> (match Hashtbl.find_opt d "upvalue-count" with + | Some (Number n) -> int_of_float n | _ -> 0) + | _ -> 0 + in + let upvalues = Array.init uv_count (fun _ -> + let is_local = read_u8 frame in + let index = read_u8 frame in + if is_local = 1 then begin + (* Capture from enclosing frame's local slot. + Create a shared cell — both parent and closure + read/write through this cell. *) + let cell = match Hashtbl.find_opt frame.local_cells index with + | Some existing -> existing (* reuse existing cell *) + | None -> + let c = { uv_value = vm.stack.(frame.base + index) } in + Hashtbl.replace frame.local_cells index c; + c + in + cell + end else + (* Capture from enclosing frame's upvalue — already a shared cell *) + frame.closure.vm_upvalues.(index) + ) in + let cl = { vm_code = code; vm_upvalues = upvalues; vm_name = None; + vm_env_ref = vm.globals; vm_closure_env = None } in + push vm (VmClosure cl) + | 52 (* OP_CALL_PRIM *) -> + let idx = read_u16 frame in + let argc = read_u8 frame in + let name = match consts.(idx) with String s -> s | _ -> "" in + let args = List.init argc (fun _ -> pop vm) |> List.rev in + let result = + try + (* Check primitives FIRST (native implementations of map/filter/etc.), + then globals (which may have ho_via_cek wrappers that route + through the CEK — these can't call VM closures). *) + let fn_val = try Sx_primitives.get_primitive name with _ -> + try Hashtbl.find vm.globals name with Not_found -> + raise (Eval_error ("VM: unknown primitive " ^ name)) + in + (match fn_val with + | NativeFn (_, fn) -> fn args + | _ -> Nil) + with Eval_error msg -> + raise (Eval_error (Printf.sprintf "%s (in CALL_PRIM \"%s\" with %d args)" + msg name argc)) + in + push vm result + + (* ---- Collections ---- *) + | 64 (* OP_LIST *) -> + let count = read_u16 frame in + let items = List.init count (fun _ -> pop vm) |> List.rev in + push vm (List items) + | 65 (* OP_DICT *) -> + let count = read_u16 frame in + let d = Hashtbl.create count in + for _ = 1 to count do + let v = pop vm in + let k = pop vm in + let key = match k with String s -> s | Keyword s -> s | _ -> Sx_runtime.value_to_str k in + Hashtbl.replace d key v + done; + push vm (Dict d) + + (* ---- String ops ---- *) + | 144 (* OP_STR_CONCAT *) -> + let count = read_u8 frame in + let parts = List.init count (fun _ -> pop vm) |> List.rev in + let s = String.concat "" (List.map Sx_runtime.value_to_str parts) in + push vm (String s) + + (* ---- Define ---- *) + | 128 (* OP_DEFINE *) -> + let idx = read_u16 frame in + let name = match consts.(idx) with String s -> s | _ -> "" in + let v = peek vm in + Hashtbl.replace vm.globals name v + + | opcode -> + raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d" + opcode (frame.ip - 1))) + with Invalid_argument msg -> + let fn_name = match frame.closure.vm_name with Some n -> n | None -> "?" in + raise (Eval_error (Printf.sprintf + "VM: %s at ip=%d op=%d in %s (base=%d sp=%d bc_len=%d consts=%d)" + msg saved_ip op fn_name frame.base vm.sp + (Array.length bc) (Array.length consts)))) + end + done (** Execute a compiled module (top-level bytecode). *) let execute_module code globals = @@ -503,5 +521,6 @@ let jit_compile_lambda (l : lambda) globals = Printf.eprintf "[jit] FAIL %s: %s\n%!" fn_name (Printexc.to_string e); None -(* Wire up the forward reference *) +(* Wire up forward references *) let () = jit_compile_ref := jit_compile_lambda +let () = _vm_call_closure_ref := (fun cl args -> call_closure cl args cl.vm_env_ref) diff --git a/shared/static/scripts/sx-browser.js b/shared/static/scripts/sx-browser.js index ed716cc..c703113 100644 --- a/shared/static/scripts/sx-browser.js +++ b/shared/static/scripts/sx-browser.js @@ -14,7 +14,7 @@ // ========================================================================= var NIL = Object.freeze({ _nil: true, toString: function() { return "nil"; } }); - var SX_VERSION = "2026-03-23T20:57:55Z"; + var SX_VERSION = "2026-03-23T23:36:04Z"; function isNil(x) { return x === NIL || x === null || x === undefined; } function isSxTruthy(x) { return x !== false && !isNil(x); } @@ -2500,8 +2500,21 @@ PRIMITIVES["render-list-to-html"] = renderListToHtml; var branch = evalCond(rest(expr), env); return (isSxTruthy(branch) ? renderToHtml(branch, env) : ""); })() : (isSxTruthy((name == "case")) ? renderToHtml(trampoline(evalExpr(expr, env)), env) : (isSxTruthy((name == "letrec")) ? (function() { - var result = evalExpr(expr, env); - return renderValueToHtml(result, env); + var bindings = nth(expr, 1); + var body = slice(expr, 2); + var local = envExtend(env); + { var _c = bindings; for (var _i = 0; _i < _c.length; _i++) { var pair = _c[_i]; (function() { + var pname = (isSxTruthy((typeOf(first(pair)) == "symbol")) ? symbolName(first(pair)) : (String(first(pair)))); + return envBind(local, pname, NIL); +})(); } } + { var _c = bindings; for (var _i = 0; _i < _c.length; _i++) { var pair = _c[_i]; (function() { + var pname = (isSxTruthy((typeOf(first(pair)) == "symbol")) ? symbolName(first(pair)) : (String(first(pair)))); + return envSet(local, pname, trampoline(evalExpr(nth(pair, 1), local))); +})(); } } + if (isSxTruthy((len(body) > 1))) { + { var _c = init(body); for (var _i = 0; _i < _c.length; _i++) { var e = _c[_i]; trampoline(evalExpr(e, local)); } } +} + return renderToHtml(last(body), local); })() : (isSxTruthy(sxOr((name == "let"), (name == "let*"))) ? (function() { var local = processBindings(nth(expr, 1), env); return (isSxTruthy((len(expr) == 3)) ? renderToHtml(nth(expr, 2), local) : join("", map(function(i) { return renderToHtml(nth(expr, i), local); }, range(2, len(expr))))); diff --git a/spec/compiler.sx b/spec/compiler.sx index 7163292..44c615e 100644 --- a/spec/compiler.sx +++ b/spec/compiler.sx @@ -50,12 +50,17 @@ (define scope-define-local (fn (scope name) - "Add a local variable, return its slot index." - (let ((slot (get scope "next-slot"))) - (append! (get scope "locals") - {:name name :slot slot :mutable false}) - (dict-set! scope "next-slot" (+ slot 1)) - slot))) + "Add a local variable, return its slot index. + Idempotent: if name already has a slot, return it." + (let ((existing (first (filter (fn (l) (= (get l "name") name)) + (get scope "locals"))))) + (if existing + (get existing "slot") + (let ((slot (get scope "next-slot"))) + (append! (get scope "locals") + {:name name :slot slot :mutable false}) + (dict-set! scope "next-slot" (+ slot 1)) + slot))))) (define scope-resolve (fn (scope name) @@ -354,6 +359,22 @@ (define compile-begin (fn (em exprs scope tail?) + ;; Hoist: pre-allocate local slots for all define forms in this block. + ;; Enables forward references between inner functions (e.g. sx-parse). + ;; Only inside function bodies (scope has parent), not at top level. + (when (and (not (empty? exprs)) (not (nil? (get scope "parent")))) + (for-each (fn (expr) + (when (and (= (type-of expr) "list") + (>= (len expr) 2) + (= (type-of (first expr)) "symbol") + (= (symbol-name (first expr)) "define")) + (let ((name-expr (nth expr 1)) + (name (if (= (type-of name-expr) "symbol") + (symbol-name name-expr) + name-expr))) + (scope-define-local scope name)))) + exprs)) + ;; Compile expressions (if (empty? exprs) (emit-op em 2) ;; OP_NIL (if (= (len exprs) 1) @@ -483,11 +504,22 @@ (skip-annotations (rest (rest items))) (first items)))))) (skip-annotations rest-args)) - (first rest-args)))) - (name-idx (pool-add (get em "pool") name))) - (compile-expr em value scope false) - (emit-op em 128) ;; OP_DEFINE - (emit-u16 em name-idx)))) + (first rest-args))))) + ;; Inside a function body, define creates a LOCAL binding. + ;; At top level (no enclosing function scope), define creates a global. + ;; Local binding prevents recursive calls from overwriting + ;; each other's defines in the flat globals hashtable. + (if (not (nil? (get scope "parent"))) + ;; Local define — allocate slot, compile value, set local + (let ((slot (scope-define-local scope name))) + (compile-expr em value scope false) + (emit-op em 17) ;; OP_LOCAL_SET + (emit-byte em slot)) + ;; Top-level define — global + (let ((name-idx (pool-add (get em "pool") name))) + (compile-expr em value scope false) + (emit-op em 128) ;; OP_DEFINE + (emit-u16 em name-idx)))))) (define compile-set diff --git a/spec/tests/test-vm-closures.sx b/spec/tests/test-vm-closures.sx new file mode 100644 index 0000000..90ba212 --- /dev/null +++ b/spec/tests/test-vm-closures.sx @@ -0,0 +1,244 @@ +;; ========================================================================== +;; test-vm-closures.sx — Tests for inner closure recursion patterns +;; +;; Requires: test-framework.sx loaded first. +;; +;; These tests exercise patterns where inner closures recurse deeply +;; while sharing mutable state via upvalues. This is the sx-parse +;; pattern: many inner functions close over a mutable cursor variable. +;; Without proper VM closure support, each recursive call would +;; allocate a fresh VM — blowing the stack or hanging. +;; ========================================================================== + + +;; -------------------------------------------------------------------------- +;; Inner closure recursion with mutable upvalues +;; -------------------------------------------------------------------------- + +(defsuite "inner-closure-recursion" + (deftest "self-recursive inner closure with set! on captured variable" + ;; Pattern: closure mutates captured var on each recursive call. + ;; This is the core pattern in skip-ws, read-str-loop, etc. + (let ((counter 0)) + (define count-up + (fn (n) + (when (> n 0) + (set! counter (+ counter 1)) + (count-up (- n 1))))) + (count-up 100) + (assert-equal 100 counter))) + + (deftest "deep inner closure recursion (500 iterations)" + ;; Stress test: 500 recursive calls through an inner closure + ;; mutating a shared upvalue. Would stack-overflow without TCO. + (let ((acc 0)) + (define sum-up + (fn (n) + (if (<= n 0) + acc + (do (set! acc (+ acc n)) + (sum-up (- n 1)))))) + (assert-equal 125250 (sum-up 500)))) + + (deftest "inner closure reading captured variable updated by another" + ;; Two closures: one writes, one reads, sharing the same binding. + (let ((pos 0)) + (define advance! (fn () (set! pos (+ pos 1)))) + (define current (fn () pos)) + (advance!) + (advance!) + (advance!) + (assert-equal 3 (current)))) + + (deftest "recursive closure with multiple mutable upvalues" + ;; Like sx-parse: multiple cursor variables mutated during recursion. + (let ((pos 0) + (count 0)) + (define scan + (fn (source) + (when (< pos (len source)) + (set! count (+ count 1)) + (set! pos (+ pos 1)) + (scan source)))) + (scan "hello world") + (assert-equal 11 pos) + (assert-equal 11 count)))) + + +;; -------------------------------------------------------------------------- +;; Mutual recursion between inner closures +;; -------------------------------------------------------------------------- + +(defsuite "mutual-inner-closures" + (deftest "two inner closures calling each other" + ;; Pattern: read-expr calls read-list, read-list calls read-expr. + (let ((result (list))) + (define process-a + (fn (items) + (when (not (empty? items)) + (append! result (str "a:" (first items))) + (process-b (rest items))))) + (define process-b + (fn (items) + (when (not (empty? items)) + (append! result (str "b:" (first items))) + (process-a (rest items))))) + (process-a (list 1 2 3 4)) + (assert-equal 4 (len result)) + (assert-equal "a:1" (nth result 0)) + (assert-equal "b:2" (nth result 1)) + (assert-equal "a:3" (nth result 2)) + (assert-equal "b:4" (nth result 3)))) + + (deftest "mutual recursion with shared mutable state" + ;; Both closures read and write the same captured variable. + (let ((pos 0) + (source "aAbBcC")) + (define skip-lower + (fn () + (when (and (< pos (len source)) + (>= (nth source pos) "a") + (<= (nth source pos) "z")) + (set! pos (+ pos 1)) + (skip-upper)))) + (define skip-upper + (fn () + (when (and (< pos (len source)) + (>= (nth source pos) "A") + (<= (nth source pos) "Z")) + (set! pos (+ pos 1)) + (skip-lower)))) + (skip-lower) + (assert-equal 6 pos))) + + (deftest "three-way mutual recursion" + (let ((n 30) + (result nil)) + (define step-a + (fn (i) + (if (>= i n) + (set! result "done") + (step-b (+ i 1))))) + (define step-b + (fn (i) + (step-c (+ i 1)))) + (define step-c + (fn (i) + (step-a (+ i 1)))) + (step-a 0) + (assert-equal "done" result)))) + + +;; -------------------------------------------------------------------------- +;; Parser-like patterns (the sx-parse structure) +;; -------------------------------------------------------------------------- + +(defsuite "parser-pattern" + (deftest "mini-parser: tokenize digits from string" + ;; Simplified sx-parse pattern: closure over pos + source, + ;; multiple inner functions sharing the mutable cursor. + (let ((pos 0) + (source "12 34 56") + (len-src 8)) + + (define skip-ws + (fn () + (when (and (< pos len-src) (= (nth source pos) " ")) + (set! pos (+ pos 1)) + (skip-ws)))) + + (define read-digits + (fn () + (let ((start pos)) + (define digit-loop + (fn () + (when (and (< pos len-src) + (>= (nth source pos) "0") + (<= (nth source pos) "9")) + (set! pos (+ pos 1)) + (digit-loop)))) + (digit-loop) + (slice source start pos)))) + + (define read-all + (fn () + (let ((tokens (list))) + (define parse-loop + (fn () + (skip-ws) + (when (< pos len-src) + (append! tokens (read-digits)) + (parse-loop)))) + (parse-loop) + tokens))) + + (let ((tokens (read-all))) + (assert-equal 3 (len tokens)) + (assert-equal "12" (nth tokens 0)) + (assert-equal "34" (nth tokens 1)) + (assert-equal "56" (nth tokens 2))))) + + (deftest "nested inner closures with upvalue chain" + ;; Inner function defines its own inner function, + ;; both closing over the outer mutable variable. + (let ((total 0)) + (define outer-fn + (fn (items) + (for-each + (fn (item) + (let ((sub-total 0)) + (define inner-loop + (fn (n) + (when (> n 0) + (set! sub-total (+ sub-total 1)) + (set! total (+ total 1)) + (inner-loop (- n 1))))) + (inner-loop item))) + items))) + (outer-fn (list 3 2 1)) + (assert-equal 6 total))) + + (deftest "closure returning accumulated list via append!" + ;; Pattern from read-list: loop appends to mutable list, returns it. + (let ((items (list))) + (define collect + (fn (source pos) + (if (>= pos (len source)) + items + (do (append! items (nth source pos)) + (collect source (+ pos 1)))))) + (let ((result (collect (list "a" "b" "c" "d") 0))) + (assert-equal 4 (len result)) + (assert-equal "a" (first result)) + (assert-equal "d" (last result)))))) + + +;; -------------------------------------------------------------------------- +;; Closures as callbacks to higher-order functions +;; -------------------------------------------------------------------------- + +(defsuite "closure-ho-callbacks" + (deftest "map with closure that mutates captured variable" + (let ((running-total 0)) + (let ((results (map (fn (x) + (set! running-total (+ running-total x)) + running-total) + (list 1 2 3 4)))) + (assert-equal (list 1 3 6 10) results) + (assert-equal 10 running-total)))) + + (deftest "reduce with closure over external state" + (let ((call-count 0)) + (let ((sum (reduce (fn (acc x) + (set! call-count (+ call-count 1)) + (+ acc x)) + 0 + (list 10 20 30)))) + (assert-equal 60 sum) + (assert-equal 3 call-count)))) + + (deftest "filter with closure reading shared state" + (let ((threshold 3)) + (let ((result (filter (fn (x) (> x threshold)) + (list 1 2 3 4 5)))) + (assert-equal (list 4 5) result))))) diff --git a/web/adapter-html.sx b/web/adapter-html.sx index 5f2c13f..88d380e 100644 --- a/web/adapter-html.sx +++ b/web/adapter-html.sx @@ -173,12 +173,31 @@ (= name "case") (render-to-html (trampoline (eval-expr expr env)) env) - ;; letrec — evaluate via CEK, render the result. - ;; sf-letrec returns a thunk; the thunk handler in render-value-to-html - ;; unwraps it and renders the expression with the letrec's local env. + ;; letrec — pre-bind all names (nil), evaluate values, render body. + ;; Can't use eval-expr on the whole form because the body contains + ;; render expressions (div, lake, etc.) that eval-expr can't handle. (= name "letrec") - (let ((result (eval-expr expr env))) - (render-value-to-html result env)) + (let ((bindings (nth expr 1)) + (body (slice expr 2)) + (local (env-extend env))) + ;; Phase 1: pre-bind all names to nil + (for-each (fn (pair) + (let ((pname (if (= (type-of (first pair)) "symbol") + (symbol-name (first pair)) + (str (first pair))))) + (env-bind! local pname nil))) + bindings) + ;; Phase 2: evaluate values (all names in scope for mutual recursion) + (for-each (fn (pair) + (let ((pname (if (= (type-of (first pair)) "symbol") + (symbol-name (first pair)) + (str (first pair))))) + (env-set! local pname (trampoline (eval-expr (nth pair 1) local))))) + bindings) + ;; Phase 3: eval non-last body exprs for side effects, render last + (when (> (len body) 1) + (for-each (fn (e) (trampoline (eval-expr e local))) (init body))) + (render-to-html (last body) local)) ;; let / let* — single body: pass through. Multi: join strings. (or (= name "let") (= name "let*"))