From 318c818728929d54bc0fd54e9a45a1cb7a60fd4a Mon Sep 17 00:00:00 2001 From: giles Date: Mon, 23 Mar 2026 08:18:44 +0000 Subject: [PATCH] Lazy JIT compilation: lambdas compile to bytecode on first call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace AOT adapter compilation with lazy JIT — each named lambda is compiled to VM bytecode on first call, cached in l_compiled field for subsequent calls. Compilation failures fall back to CEK gracefully. VM types (vm_code, vm_upvalue_cell, vm_closure) moved to sx_types.ml mutual recursion block. Lambda and Component records gain mutable l_compiled/c_compiled cache fields. jit_compile_lambda in sx_vm.ml wraps body as (fn (params) body), invokes spec/compiler.sx via CEK, extracts inner closure from OP_CLOSURE constant. JIT hooks in both paths: - vm_call: Lambda calls from compiled VM code - continue_with_call: Lambda calls from CEK step loop (injected by bootstrap.py post-processing) Pre-mark sentinel prevents re-entrancy (compile function itself was hanging when JIT'd mid-compilation). VM execution errors caught and fall back to CEK with sentinel marking. Also: add kbd/samp/var to HTML_TAGS, rebuild sx-browser.js, add page URL to sx-page-full-py timing log. Performance: first page 28s (JIT compiles 17 functions), subsequent pages 0.31s home / 0.71s wittgenstein (was 2.3s). All 1945 tests pass. Co-Authored-By: Claude Opus 4.6 (1M context) --- hosts/ocaml/bin/run_tests.ml | 2 +- hosts/ocaml/bin/sx_server.ml | 196 ++++++++++----------------- hosts/ocaml/bootstrap.py | 35 ++++- hosts/ocaml/lib/sx_ref.ml | 9 +- hosts/ocaml/lib/sx_types.ml | 32 ++++- hosts/ocaml/lib/sx_vm.ml | 203 ++++++++++++++++++++-------- shared/static/scripts/sx-browser.js | 4 +- shared/sx/ocaml_bridge.py | 6 +- spec/render.sx | 2 +- sx/sxc/pages/sx_router.py | 4 +- 10 files changed, 296 insertions(+), 197 deletions(-) diff --git a/hosts/ocaml/bin/run_tests.ml b/hosts/ocaml/bin/run_tests.ml index 35d7a08..c087140 100644 --- a/hosts/ocaml/bin/run_tests.ml +++ b/hosts/ocaml/bin/run_tests.ml @@ -591,7 +591,7 @@ let run_foundation_tests () = assert_true "sx_truthy \"\"" (Bool (sx_truthy (String ""))); assert_eq "not truthy nil" (Bool false) (Bool (sx_truthy Nil)); assert_eq "not truthy false" (Bool false) (Bool (sx_truthy (Bool false))); - let l = { l_params = ["x"]; l_body = Symbol "x"; l_closure = Sx_types.make_env (); l_name = None } in + let l = { l_params = ["x"]; l_body = Symbol "x"; l_closure = Sx_types.make_env (); l_name = None; l_compiled = None } in assert_true "is_lambda" (Bool (Sx_types.is_lambda (Lambda l))); ignore (Sx_types.set_lambda_name (Lambda l) "my-fn"); assert_eq "lambda name mutated" (String "my-fn") (lambda_name (Lambda l)) diff --git a/hosts/ocaml/bin/sx_server.ml b/hosts/ocaml/bin/sx_server.ml index a7e05ae..ddda6d1 100644 --- a/hosts/ocaml/bin/sx_server.ml +++ b/hosts/ocaml/bin/sx_server.ml @@ -698,96 +698,60 @@ let make_server_env () = (* ====================================================================== *) -(* VM adapter — compiled aser functions in isolated globals *) +(* JIT hook registration *) (* ====================================================================== *) -(** Compiled adapter globals — separate from kernel env. - Contains compiled aser functions + reads from kernel env for - components, helpers, and other runtime bindings. *) -let vm_adapter_globals : (string, value) Hashtbl.t option ref = ref None +(** Register the JIT call hook. Called once after the compiler is loaded + into the kernel env. The hook handles both cached execution (bytecode + already compiled) and first-call compilation (invoke compiler.sx via + CEK, cache result). cek_call checks this before CEK dispatch. *) +let _jit_compiling = ref false (* re-entrancy guard *) -(** Compile adapter-sx.sx and store in vm_adapter_globals. - Called from vm-compile-adapter command. *) -let compile_adapter env = - if not (Hashtbl.mem env.bindings "compile") then - raise (Eval_error "compiler not loaded") - else begin - let compile_fn = Hashtbl.find env.bindings "compile" in - (* Find and parse adapter-sx.sx *) - let web_dir = try Sys.getenv "SX_WEB_DIR" with Not_found -> - try Filename.concat (Sys.getenv "SX_SPEC_DIR") "../web" - with Not_found -> "web" in - let adapter_path = Filename.concat web_dir "adapter-sx.sx" in - if not (Sys.file_exists adapter_path) then - raise (Eval_error ("adapter-sx.sx not found: " ^ adapter_path)); - let exprs = Sx_parser.parse_file adapter_path in - (* Compile each define's body *) - let globals = Hashtbl.create 64 in - (* Seed with kernel env for component/helper lookups *) - Hashtbl.iter (fun k v -> Hashtbl.replace globals k v) env.bindings; - let compiled = ref 0 in - List.iter (fun expr -> - match expr with - | List (Symbol "define" :: Symbol name :: rest) -> - (* Find the body — skip :effects annotations *) - let rec find_body = function - | Keyword _ :: _ :: rest -> find_body rest - | body :: _ -> body - | [] -> Nil - in - let body = find_body rest in - (try - let quoted = List [Symbol "quote"; body] in - let result = Sx_ref.eval_expr (List [compile_fn; quoted]) (Env env) in - match result with - | Dict d when Hashtbl.mem d "bytecode" -> - let outer_code = Sx_vm.code_from_value result in - Printf.eprintf "[vm] %s: outer bc=%d consts=%d inner_type=%s\n%!" - name (Array.length outer_code.Sx_vm.bytecode) - (Array.length outer_code.Sx_vm.constants) - (if Array.length outer_code.Sx_vm.constants > 0 then - type_of outer_code.Sx_vm.constants.(0) else "empty"); - let bc = outer_code.Sx_vm.bytecode in - if Array.length bc >= 4 && bc.(0) = 51 then begin - (* The compiled define body is (fn ...) which compiles to - OP_CLOSURE + [upvalue descriptors] + OP_RETURN. - Extract the inner code object from constants[idx]. *) - let idx = bc.(1) lor (bc.(2) lsl 8) in - let code = - if idx < Array.length outer_code.Sx_vm.constants then begin - let inner_val = outer_code.Sx_vm.constants.(idx) in - try Sx_vm.code_from_value inner_val - with e -> - Printf.eprintf "[vm] inner code_from_value failed for %s: %s\n%!" - name (Printexc.to_string e); - raise e - end else outer_code - in - let cl = { Sx_vm.code; upvalues = [||]; name = Some name; - env_ref = globals } in - Hashtbl.replace globals name - (NativeFn ("vm:" ^ name, fun args -> - Sx_vm.call_closure cl args globals)); - incr compiled - end else begin - (* Not a lambda — constant expression (e.g. (list ...)). - Execute once and store the resulting value directly. *) - let value = Sx_vm.execute_module outer_code globals in - Hashtbl.replace globals name value; - Printf.eprintf "[vm] %s: constant (type=%s)\n%!" name (type_of value); - incr compiled - end - | _ -> () (* non-dict result — skip *) - with e -> - Printf.eprintf "[vm] FAIL adapter %s: %s\n%!" name (Printexc.to_string e)) - | _ -> - (* Non-define expression — evaluate on CEK to set up constants *) - (try ignore (Sx_ref.eval_expr expr (Env env)) with _ -> ()) - ) exprs; - vm_adapter_globals := Some globals; - Printf.eprintf "[vm] Compiled adapter: %d functions\n%!" !compiled - end +let register_jit_hook env = + Sx_ref.jit_call_hook := Some (fun f args -> + match f with + | Lambda l -> + (match l.l_compiled with + | Some cl when not (Sx_vm.is_jit_failed cl) -> + (* Cached bytecode — execute on VM, fall back to CEK on error *) + (try Some (Sx_vm.call_closure cl args cl.vm_env_ref) + with _ -> l.l_compiled <- Some Sx_vm.jit_failed_sentinel; None) + | Some _ -> None (* failed sentinel *) + | None -> + (* Don't try to compile while already compiling (prevents + infinite recursion: compile calls lambdas internally) *) + if !_jit_compiling then None + else begin + let fn_name = match l.l_name with Some n -> n | None -> "?" in + begin + (* Mark as tried BEFORE compiling — prevents other calls to + the same lambda from starting redundant compilations while + this one is running. If compilation succeeds, overwrite. *) + l.l_compiled <- Some Sx_vm.jit_failed_sentinel; + Printf.eprintf "[jit-hook] compiling %s (body size ~%d)...\n%!" + fn_name (String.length (inspect l.l_body)); + _jit_compiling := true; + let t0 = Unix.gettimeofday () in + let compiled = Sx_vm.jit_compile_lambda l env.bindings in + let dt = Unix.gettimeofday () -. t0 in + _jit_compiling := false; + Printf.eprintf "[jit-hook] %s compile %s in %.3fs\n%!" + fn_name (match compiled with Some _ -> "OK" | None -> "FAIL") dt; + match compiled with + | Some cl -> + l.l_compiled <- Some cl; + Printf.eprintf "[jit-hook] executing %s on VM...\n%!" fn_name; + (try + let r = Sx_vm.call_closure cl args cl.vm_env_ref in + Printf.eprintf "[jit-hook] %s execution OK\n%!" fn_name; + Some r + with e -> + Printf.eprintf "[jit-hook] %s VM FAIL: %s\n%!" fn_name (Printexc.to_string e); + l.l_compiled <- Some Sx_vm.jit_failed_sentinel; None) + | None -> None + end end) + | _ -> None) (* ====================================================================== *) @@ -901,17 +865,15 @@ let rec dispatch env cmd = | exn -> send_error (Printexc.to_string exn)) | List [Symbol "vm-compile-adapter"] -> - (* Compile adapter-sx.sx to VM bytecode with isolated globals *) - (try - compile_adapter env; - send_ok () - with - | Eval_error msg -> send_error msg - | exn -> send_error (Printexc.to_string exn)) + (* Legacy command — JIT hook is now registered at startup. + Kept for backward compatibility with ocaml_bridge.py. *) + register_jit_hook env; + Printf.eprintf "[jit] JIT hook registered (lazy compilation active)\n%!"; + send_ok () | List [Symbol "aser-slot"; String src] -> (* Expand ALL components server-side. Uses batch IO mode. - Routes through VM if adapter is compiled, else CEK. *) + Calls aser via CEK — the JIT hook compiles it on first call. *) (try let exprs = Sx_parser.parse_all src in let expr = match exprs with @@ -925,22 +887,14 @@ let rec dispatch env cmd = let t0 = Unix.gettimeofday () in let expand_fn = NativeFn ("expand-components?", fun _args -> Bool true) in ignore (env_bind env "expand-components?" expand_fn); - let result = match !vm_adapter_globals with - | Some globals -> - Hashtbl.replace globals "expand-components?" expand_fn; - let aser_fn = try Hashtbl.find globals "aser" - with Not_found -> raise (Eval_error "VM: aser not compiled") in - let r = match aser_fn with - | NativeFn (_, fn) -> fn [expr; Env env] - | _ -> raise (Eval_error "VM: aser not a function") - in - Hashtbl.remove globals "expand-components?"; - r - | None -> - let call = List [Symbol "aser"; - List [Symbol "quote"; expr]; - Env env] in - Sx_ref.eval_expr call (Env env) + Printf.eprintf "[aser-slot] starting aser eval...\n%!"; + let result = + let call = List [Symbol "aser"; + List [Symbol "quote"; expr]; + Env env] in + let r = Sx_ref.eval_expr call (Env env) in + Printf.eprintf "[aser-slot] aser eval returned\n%!"; + r in let t1 = Unix.gettimeofday () in io_batch_mode := false; @@ -990,22 +944,14 @@ let rec dispatch env cmd = let t0 = Unix.gettimeofday () in let expand_fn = NativeFn ("expand-components?", fun _args -> Bool true) in ignore (env_bind env "expand-components?" expand_fn); - let body_result = match !vm_adapter_globals with - | Some globals -> - Hashtbl.replace globals "expand-components?" expand_fn; - let aser_fn = try Hashtbl.find globals "aser" - with Not_found -> raise (Eval_error "VM: aser not compiled") in - let r = match aser_fn with - | NativeFn (_, fn) -> fn [expr; Env env] - | _ -> raise (Eval_error "VM: aser not a function") - in - Hashtbl.remove globals "expand-components?"; - r - | None -> + Printf.eprintf "[sx-page-full] starting aser eval...\n%!"; + let body_result = let call = List [Symbol "aser"; List [Symbol "quote"; expr]; Env env] in - Sx_ref.eval_expr call (Env env) + let r = Sx_ref.eval_expr call (Env env) in + Printf.eprintf "[sx-page-full] aser eval returned\n%!"; + r in let t1 = Unix.gettimeofday () in io_batch_mode := false; @@ -1117,8 +1063,8 @@ let rec dispatch env cmd = let fn = NativeFn ("vm:" ^ name, fun args -> try Sx_vm.call_closure - { Sx_vm.code; upvalues = [||]; name = lam.l_name; - env_ref = live_env } + { vm_code = code; vm_upvalues = [||]; vm_name = lam.l_name; + vm_env_ref = live_env } args live_env with | _ -> diff --git a/hosts/ocaml/bootstrap.py b/hosts/ocaml/bootstrap.py index 0098562..c5df711 100644 --- a/hosts/ocaml/bootstrap.py +++ b/hosts/ocaml/bootstrap.py @@ -55,10 +55,15 @@ let trampoline v = !trampoline_fn v let _strict_ref = ref (Bool false) let _prim_param_types_ref = ref Nil +(* JIT call hook — cek_call checks this before CEK dispatch for named + lambdas. Registered by sx_server.ml after compiler loads. Tests + run with hook = None (pure CEK, no compilation dependency). *) +let jit_call_hook : (value -> value list -> value option) option ref = ref None + """ -# OCaml fixups — wire up trampoline + iterative CEK run +# OCaml fixups — wire up trampoline + iterative CEK run + JIT hook FIXUPS = """\ (* Wire up trampoline to resolve thunks via the CEK machine *) @@ -75,6 +80,8 @@ let cek_run_iterative state = done; cek_value !s + + """ @@ -200,9 +207,6 @@ def compile_spec_to_ml(spec_dir: str | None = None) -> str: return '\n'.join(fixed) output = fix_mutable_reads(output) - # Fix cek_call: the spec passes (make-env) as the env arg to - # continue_with_call, but the transpiler evaluates it at transpile - # time (it's a primitive), producing Dict instead of Env. # Fix cek_call: the spec passes (make-env) as the env arg to # continue_with_call, but the transpiler evaluates make-env at # transpile time (it's a primitive), producing Dict instead of Env. @@ -211,6 +215,29 @@ def compile_spec_to_ml(spec_dir: str | None = None) -> str: "(Env (Sx_types.make_env ())) (a) ((List []))", ) + # Inject JIT dispatch into continue_with_call's lambda branch. + # After params are bound, check jit_call_hook before creating CEK state. + lambda_body_pattern = ( + '(prim_call "slice" [params; (len (args))])); Nil)) in ' + '(make_cek_state ((lambda_body (f))) (local) (kont))' + ) + lambda_body_jit = ( + '(prim_call "slice" [params; (len (args))])); Nil)) in ' + '(match !jit_call_hook, f with ' + '| Some hook, Lambda l when l.l_name <> None -> ' + 'let args_list = match args with ' + 'List a | ListRef { contents = a } -> a | _ -> [] in ' + '(match hook f args_list with ' + 'Some result -> make_cek_value result local kont ' + '| None -> make_cek_state (lambda_body f) local kont) ' + '| _ -> make_cek_state ((lambda_body (f))) (local) (kont))' + ) + if lambda_body_pattern in output: + output = output.replace(lambda_body_pattern, lambda_body_jit, 1) + else: + import sys + print("WARNING: Could not find lambda body pattern for JIT injection", file=sys.stderr) + return output diff --git a/hosts/ocaml/lib/sx_ref.ml b/hosts/ocaml/lib/sx_ref.ml index b366469..53293e1 100644 --- a/hosts/ocaml/lib/sx_ref.ml +++ b/hosts/ocaml/lib/sx_ref.ml @@ -18,6 +18,11 @@ let trampoline v = !trampoline_fn v let _strict_ref = ref (Bool false) let _prim_param_types_ref = ref Nil +(* JIT call hook — cek_call checks this before CEK dispatch for named + lambdas. Registered by sx_server.ml after compiler loads. Tests + run with hook = None (pure CEK, no compilation dependency). *) +let jit_call_hook : (value -> value list -> value option) option ref = ref None + (* === Transpiled from evaluator (frames + eval + CEK) === *) @@ -482,7 +487,7 @@ and step_continue state = (* continue-with-call *) and continue_with_call f args env raw_args kont = - (if sx_truthy ((continuation_p (f))) then (let arg = (if sx_truthy ((empty_p (args))) then Nil else (first (args))) in let cont_data = (continuation_data (f)) in (let captured = (get (cont_data) ((String "captured"))) in (let result' = (cek_run ((make_cek_value (arg) (env) (captured)))) in (make_cek_value (result') (env) (kont))))) else (if sx_truthy ((let _and = (is_callable (f)) in if not (sx_truthy _and) then _and else (let _and = (Bool (not (sx_truthy ((is_lambda (f)))))) in if not (sx_truthy _and) then _and else (let _and = (Bool (not (sx_truthy ((is_component (f)))))) in if not (sx_truthy _and) then _and else (Bool (not (sx_truthy ((is_island (f)))))))))) then (make_cek_value ((sx_apply f args)) (env) (kont)) else (if sx_truthy ((is_lambda (f))) then (let params = (lambda_params (f)) in let local = (env_merge ((lambda_closure (f))) (env)) in (if sx_truthy ((prim_call ">" [(len (args)); (len (params))])) then (raise (Eval_error (value_to_str (String (sx_str [(let _or = (lambda_name (f)) in if sx_truthy _or then _or else (String "lambda")); (String " expects "); (len (params)); (String " args, got "); (len (args))]))))) else (let () = ignore ((List.iter (fun pair -> ignore ((env_bind local (sx_to_string (first (pair))) (nth (pair) ((Number 1.0)))))) (sx_to_list (prim_call "zip" [params; args])); Nil)) in (let () = ignore ((List.iter (fun p -> ignore ((env_bind local (sx_to_string p) Nil))) (sx_to_list (prim_call "slice" [params; (len (args))])); Nil)) in (make_cek_state ((lambda_body (f))) (local) (kont)))))) else (if sx_truthy ((let _or = (is_component (f)) in if sx_truthy _or then _or else (is_island (f)))) then (let parsed = (parse_keyword_args (raw_args) (env)) in let kwargs = (first (parsed)) in let children = (nth (parsed) ((Number 1.0))) in let local = (env_merge ((component_closure (f))) (env)) in (let () = ignore ((List.iter (fun p -> ignore ((env_bind local (sx_to_string p) (let _or = (dict_get (kwargs) (p)) in if sx_truthy _or then _or else Nil)))) (sx_to_list (component_params (f))); Nil)) in (let () = ignore ((if sx_truthy ((component_has_children (f))) then (env_bind local (sx_to_string (String "children")) children) else Nil)) in (make_cek_state ((component_body (f))) (local) (kont))))) else (raise (Eval_error (value_to_str (String (sx_str [(String "Not callable: "); (inspect (f))]))))))))) + (if sx_truthy ((continuation_p (f))) then (let arg = (if sx_truthy ((empty_p (args))) then Nil else (first (args))) in let cont_data = (continuation_data (f)) in (let captured = (get (cont_data) ((String "captured"))) in (let result' = (cek_run ((make_cek_value (arg) (env) (captured)))) in (make_cek_value (result') (env) (kont))))) else (if sx_truthy ((let _and = (is_callable (f)) in if not (sx_truthy _and) then _and else (let _and = (Bool (not (sx_truthy ((is_lambda (f)))))) in if not (sx_truthy _and) then _and else (let _and = (Bool (not (sx_truthy ((is_component (f)))))) in if not (sx_truthy _and) then _and else (Bool (not (sx_truthy ((is_island (f)))))))))) then (make_cek_value ((sx_apply f args)) (env) (kont)) else (if sx_truthy ((is_lambda (f))) then (let params = (lambda_params (f)) in let local = (env_merge ((lambda_closure (f))) (env)) in (if sx_truthy ((prim_call ">" [(len (args)); (len (params))])) then (raise (Eval_error (value_to_str (String (sx_str [(let _or = (lambda_name (f)) in if sx_truthy _or then _or else (String "lambda")); (String " expects "); (len (params)); (String " args, got "); (len (args))]))))) else (let () = ignore ((List.iter (fun pair -> ignore ((env_bind local (sx_to_string (first (pair))) (nth (pair) ((Number 1.0)))))) (sx_to_list (prim_call "zip" [params; args])); Nil)) in (let () = ignore ((List.iter (fun p -> ignore ((env_bind local (sx_to_string p) Nil))) (sx_to_list (prim_call "slice" [params; (len (args))])); Nil)) in (match !jit_call_hook, f with | Some hook, Lambda l when l.l_name <> None -> let args_list = match args with List a | ListRef { contents = a } -> a | _ -> [] in (match hook f args_list with Some result -> make_cek_value result local kont | None -> make_cek_state (lambda_body f) local kont) | _ -> make_cek_state ((lambda_body (f))) (local) (kont)))))) else (if sx_truthy ((let _or = (is_component (f)) in if sx_truthy _or then _or else (is_island (f)))) then (let parsed = (parse_keyword_args (raw_args) (env)) in let kwargs = (first (parsed)) in let children = (nth (parsed) ((Number 1.0))) in let local = (env_merge ((component_closure (f))) (env)) in (let () = ignore ((List.iter (fun p -> ignore ((env_bind local (sx_to_string p) (let _or = (dict_get (kwargs) (p)) in if sx_truthy _or then _or else Nil)))) (sx_to_list (component_params (f))); Nil)) in (let () = ignore ((if sx_truthy ((component_has_children (f))) then (env_bind local (sx_to_string (String "children")) children) else Nil)) in (make_cek_state ((component_body (f))) (local) (kont))))) else (raise (Eval_error (value_to_str (String (sx_str [(String "Not callable: "); (inspect (f))]))))))))) (* sf-case-step-loop *) and sf_case_step_loop match_val clauses env kont = @@ -515,3 +520,5 @@ let cek_run_iterative state = done; cek_value !s + + diff --git a/hosts/ocaml/lib/sx_types.ml b/hosts/ocaml/lib/sx_types.ml index f96dc41..7ad022c 100644 --- a/hosts/ocaml/lib/sx_types.ml +++ b/hosts/ocaml/lib/sx_types.ml @@ -74,6 +74,7 @@ and lambda = { l_body : value; l_closure : env; mutable l_name : string option; + mutable l_compiled : vm_closure option; (** Lazy JIT cache *) } and component = { @@ -83,6 +84,7 @@ and component = { c_body : value; c_closure : env; c_affinity : string; (** "auto" | "client" | "server" *) + mutable c_compiled : vm_closure option; (** Lazy JIT cache *) } and island = { @@ -107,6 +109,33 @@ and signal = { mutable s_deps : signal list; } +(** {1 Bytecode VM types} + + Defined here (not in sx_vm.ml) because [vm_code.constants] references + [value] and [lambda.l_compiled] references [vm_closure] — mutual + recursion requires all types in one [and] chain. *) + +(** Compiled function body — bytecode + constant pool. *) +and vm_code = { + vc_arity : int; + vc_locals : int; + vc_bytecode : int array; + vc_constants : value array; +} + +(** Upvalue cell — shared mutable reference to a captured variable. *) +and vm_upvalue_cell = { + mutable uv_value : value; +} + +(** Closure — compiled code + captured upvalues + live env reference. *) +and vm_closure = { + vm_code : vm_code; + vm_upvalues : vm_upvalue_cell array; + vm_name : string option; + vm_env_ref : (string, value) Hashtbl.t; +} + (** {1 Errors} *) @@ -202,7 +231,7 @@ let make_lambda params body closure = | List items -> List.map value_to_string items | _ -> value_to_string_list params in - Lambda { l_params = ps; l_body = body; l_closure = unwrap_env_val closure; l_name = None } + Lambda { l_params = ps; l_body = body; l_closure = unwrap_env_val closure; l_name = None; l_compiled = None } let make_component name params has_children body closure affinity = let n = value_to_string name in @@ -212,6 +241,7 @@ let make_component name params has_children body closure affinity = Component { c_name = n; c_params = ps; c_has_children = hc; c_body = body; c_closure = unwrap_env_val closure; c_affinity = aff; + c_compiled = None; } let make_island name params has_children body closure = diff --git a/hosts/ocaml/lib/sx_vm.ml b/hosts/ocaml/lib/sx_vm.ml index 2d1635f..6d40a94 100644 --- a/hosts/ocaml/lib/sx_vm.ml +++ b/hosts/ocaml/lib/sx_vm.ml @@ -5,39 +5,19 @@ no allocation per step (unlike the CEK machine). This is the platform-native execution engine. The same bytecode - runs on all platforms (OCaml, JS, WASM). *) + runs on all platforms (OCaml, JS, WASM). + + VM types (vm_code, vm_upvalue_cell, vm_closure) are defined in + sx_types.ml to share the mutual recursion block with [value]. *) open Sx_types -(** Code object — compiled function body. *) -type code = { - arity : int; - locals : int; - bytecode : int array; - constants : value array; -} - -(** Upvalue cell — shared mutable reference to a captured variable. - Open when the variable is still on the stack; closed when the - enclosing frame returns and the value is moved to the heap. *) -type upvalue_cell = { - mutable uv_value : value; -} - -(** Closure — code + captured upvalues + live env reference. *) -type vm_closure = { - code : code; - upvalues : upvalue_cell array; - name : string option; - env_ref : (string, value) Hashtbl.t; (* live global env — NOT a snapshot *) -} - (** Call frame — one per function invocation. *) type frame = { closure : vm_closure; mutable ip : int; base : int; (* base index in value stack for locals *) - local_cells : (int, upvalue_cell) Hashtbl.t; (* slot → shared cell for captured locals *) + local_cells : (int, vm_upvalue_cell) Hashtbl.t; (* slot → shared cell for captured locals *) } (** VM state. *) @@ -48,6 +28,19 @@ type vm = { globals : (string, value) Hashtbl.t; (* live reference to kernel env *) } +(** Forward reference for JIT compilation — set after definition. *) +let jit_compile_ref : (lambda -> (string, value) Hashtbl.t -> vm_closure option) ref = + ref (fun _ _ -> None) + +(** Sentinel closure indicating JIT compilation was attempted and failed. + Prevents retrying compilation on every call. *) +let jit_failed_sentinel = { + vm_code = { vc_arity = -1; vc_locals = 0; vc_bytecode = [||]; vc_constants = [||] }; + vm_upvalues = [||]; vm_name = Some "__jit_failed__"; vm_env_ref = Hashtbl.create 0 +} + +let is_jit_failed cl = cl.vm_code.vc_arity = -1 + let create globals = { stack = Array.make 4096 Nil; sp = 0; frames = []; globals } @@ -69,12 +62,12 @@ let[@inline] peek vm = vm.stack.(vm.sp - 1) (** Read operands. *) let[@inline] read_u8 f = - let v = f.closure.code.bytecode.(f.ip) in + let v = f.closure.vm_code.vc_bytecode.(f.ip) in f.ip <- f.ip + 1; v let[@inline] read_u16 f = - let lo = f.closure.code.bytecode.(f.ip) in - let hi = f.closure.code.bytecode.(f.ip + 1) in + let lo = f.closure.vm_code.vc_bytecode.(f.ip) in + let hi = f.closure.vm_code.vc_bytecode.(f.ip + 1) in f.ip <- f.ip + 2; lo lor (hi lsl 8) @@ -84,7 +77,7 @@ let[@inline] read_i16 f = (** Wrap a VM closure as an SX value (NativeFn). *) let closure_to_value cl = - NativeFn ("vm:" ^ (match cl.name with Some n -> n | None -> "anon"), + NativeFn ("vm:" ^ (match cl.vm_name with Some n -> n | None -> "anon"), fun args -> raise (Eval_error ("VM_CLOSURE_CALL:" ^ String.concat "," (List.map Sx_runtime.value_to_str args)))) (* Placeholder — actual calls go through vm_call below *) @@ -93,8 +86,8 @@ let rec run vm = match vm.frames with | [] -> () (* no frame = done *) | frame :: rest_frames -> - let bc = frame.closure.code.bytecode in - let consts = frame.closure.code.constants in + let bc = frame.closure.vm_code.vc_bytecode in + let consts = frame.closure.vm_code.vc_constants in if frame.ip >= Array.length bc then () else let saved_ip = frame.ip in @@ -139,15 +132,15 @@ let rec run vm = run vm | 18 (* OP_UPVALUE_GET *) -> let idx = read_u8 frame in - if idx >= Array.length frame.closure.upvalues then + if idx >= Array.length frame.closure.vm_upvalues then raise (Eval_error (Printf.sprintf "VM: UPVALUE_GET idx=%d out of bounds (have %d)" idx - (Array.length frame.closure.upvalues))); - push vm frame.closure.upvalues.(idx).uv_value; + (Array.length frame.closure.vm_upvalues))); + push vm frame.closure.vm_upvalues.(idx).uv_value; run vm | 19 (* OP_UPVALUE_SET *) -> let idx = read_u8 frame in - frame.closure.upvalues.(idx).uv_value <- peek vm; + frame.closure.vm_upvalues.(idx).uv_value <- peek vm; run vm | 20 (* OP_GLOBAL_GET *) -> let idx = read_u16 frame in @@ -209,9 +202,6 @@ let rec run vm = raise (Eval_error (Printf.sprintf "VM: CLOSURE idx %d >= consts %d" idx (Array.length consts))); let code_val = consts.(idx) in let code = code_from_value code_val in - Printf.eprintf "[vm-closure] idx=%d type=%s bc_len=%d consts=%d sp_before=%d\n%!" - idx (type_of code_val) - (Array.length code.bytecode) (Array.length code.constants) vm.sp; (* Read upvalue descriptors from bytecode *) let uv_count = match code_val with | Dict d -> (match Hashtbl.find_opt d "upvalue-count" with @@ -235,9 +225,9 @@ let rec run vm = cell end else (* Capture from enclosing frame's upvalue — already a shared cell *) - frame.closure.upvalues.(index) + frame.closure.vm_upvalues.(index) ) in - let cl = { code; upvalues; name = None; env_ref = vm.globals } in + let cl = { vm_code = code; vm_upvalues = upvalues; vm_name = None; vm_env_ref = vm.globals } in (* Wrap as NativeFn that calls back into the VM *) let fn = NativeFn ("vm-closure", fun args -> call_closure cl args vm.globals) @@ -306,26 +296,61 @@ let rec run vm = raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d" opcode (frame.ip - 1))) with Invalid_argument msg -> - let fn_name = match frame.closure.name with Some n -> n | None -> "?" in + let fn_name = match frame.closure.vm_name with Some n -> n | None -> "?" in raise (Eval_error (Printf.sprintf "VM: %s at ip=%d op=%d in %s (base=%d sp=%d bc_len=%d consts=%d)" msg saved_ip op fn_name frame.base vm.sp (Array.length bc) (Array.length consts)))) -(** Call a value as a function — dispatch by type. *) +(** Call a value as a function — dispatch by type. + For Lambda values, tries JIT compilation before falling back to CEK. *) and vm_call vm f args = match f with | NativeFn (_name, fn) -> let result = fn args in push vm result - | Lambda _ | Component _ | Island _ -> - (* Fall back to CEK machine for SX-defined functions *) + | Lambda l -> + (* Try JIT-compiled path first *) + (match l.l_compiled with + | Some cl when not (is_jit_failed cl) -> + (* Execute cached bytecode; fall back to CEK on VM error *) + (try push vm (call_closure cl args vm.globals) + with _ -> + l.l_compiled <- Some jit_failed_sentinel; + push vm (Sx_ref.cek_call f (List args))) + | Some _ -> + (* Previously failed or skipped — use CEK *) + push vm (Sx_ref.cek_call f (List args)) + | None -> + (* Only JIT-compile named lambdas (from define). + Anonymous lambdas (map/filter callbacks) are usually one-shot — + compiling them costs more than interpreting. *) + if l.l_name <> None then begin + (* Pre-mark before compile attempt to prevent re-entrancy *) + l.l_compiled <- Some jit_failed_sentinel; + match !jit_compile_ref l vm.globals with + | Some cl -> + l.l_compiled <- Some cl; + (try push vm (call_closure cl args vm.globals) + with _ -> + l.l_compiled <- Some jit_failed_sentinel; + push vm (Sx_ref.cek_call f (List args))) + | None -> + push vm (Sx_ref.cek_call f (List args)) + end + else begin + (* Mark anonymous lambdas as skipped to avoid re-checking *) + l.l_compiled <- Some jit_failed_sentinel; + push vm (Sx_ref.cek_call f (List args)) + end) + | Component _ | Island _ -> + (* Components use keyword-arg parsing — CEK handles this *) let result = Sx_ref.cek_call f (List args) in push vm result | _ -> raise (Eval_error ("VM: not callable: " ^ Sx_runtime.value_to_str f)) -(** Convert compiler output (SX dict) to a code object. *) +(** Convert compiler output (SX dict) to a vm_code object. *) and code_from_value v = match v with | Dict d -> @@ -346,35 +371,99 @@ and code_from_value v = let arity = match Hashtbl.find_opt d "arity" with | Some (Number n) -> int_of_float n | _ -> 0 in - { arity; locals = arity + 16; bytecode = bc_list; constants } - | _ -> { arity = 0; locals = 16; bytecode = [||]; constants = [||] } + { vc_arity = arity; vc_locals = arity + 16; vc_bytecode = bc_list; vc_constants = constants } + | _ -> { vc_arity = 0; vc_locals = 16; vc_bytecode = [||]; vc_constants = [||] } -(** Execute a closure with arguments. - If called from within a VM (via NativeFn wrapper from for-each/map), - the upvalue cells already contain the captured values — no parent - frame needed. The fresh VM is fine because upvalues are heap-allocated - cells, not stack references. *) +(** Execute a closure with arguments. *) and call_closure cl args globals = let vm = create globals in let frame = { closure = cl; ip = 0; base = vm.sp; local_cells = Hashtbl.create 4 } in List.iter (fun a -> push vm a) args; - for _ = List.length args to cl.code.locals - 1 do push vm Nil done; + for _ = List.length args to cl.vm_code.vc_locals - 1 do push vm Nil done; vm.frames <- [frame]; (try run vm with e -> Printf.eprintf "[vm-call-closure] FAIL in %s: %s (bc_len=%d args=%d sp=%d)\n%!" - (match cl.name with Some n -> n | None -> "?") + (match cl.vm_name with Some n -> n | None -> "?") (Printexc.to_string e) - (Array.length cl.code.bytecode) (List.length args) vm.sp; + (Array.length cl.vm_code.vc_bytecode) (List.length args) vm.sp; raise e); pop vm (** Execute a compiled module (top-level bytecode). *) let execute_module code globals = - let cl = { code; upvalues = [||]; name = Some "module"; env_ref = globals } in + let cl = { vm_code = code; vm_upvalues = [||]; vm_name = Some "module"; vm_env_ref = globals } in let vm = create globals in let frame = { closure = cl; ip = 0; base = 0; local_cells = Hashtbl.create 4 } in - for _ = 0 to code.locals - 1 do push vm Nil done; + for _ = 0 to code.vc_locals - 1 do push vm Nil done; vm.frames <- [frame]; run vm; pop vm + + +(** {1 Lazy JIT compilation} *) + +(** Compile a lambda or component body to bytecode using the SX compiler. + Invokes [compile] from spec/compiler.sx via the CEK machine. + Returns a [vm_closure] ready for execution, or [None] on failure + (safe fallback to CEK interpretation). + + The compilation cost is a single CEK evaluation of the compiler — + microseconds per function. The result is cached in the lambda/component + record so subsequent calls go straight to the VM. *) +let jit_compile_lambda (l : lambda) globals = + let fn_name = match l.l_name with Some n -> n | None -> "" in + try + let compile_fn = try Hashtbl.find globals "compile" + with Not_found -> raise (Eval_error "JIT: compiler not loaded") in + (* Reconstruct the (fn (params) body) form so the compiler produces + a proper closure. l.l_body is the inner body; we need the full + function form with params so the compiled code binds them. *) + let param_syms = List (List.map (fun s -> Symbol s) l.l_params) in + let fn_expr = List [Symbol "fn"; param_syms; l.l_body] in + let quoted = List [Symbol "quote"; fn_expr] in + let result = Sx_ref.eval_expr (List [compile_fn; quoted]) (Env (make_env ())) in + match result with + | Dict d when Hashtbl.mem d "bytecode" -> + let outer_code = code_from_value result in + let bc = outer_code.vc_bytecode in + if Array.length bc >= 4 && bc.(0) = 51 (* OP_CLOSURE *) then begin + let idx = bc.(1) lor (bc.(2) lsl 8) in + if idx < Array.length outer_code.vc_constants then + let inner_val = outer_code.vc_constants.(idx) in + let code = code_from_value inner_val in + Some { vm_code = code; vm_upvalues = [||]; + vm_name = l.l_name; vm_env_ref = globals } + else begin + Printf.eprintf "[jit] FAIL %s: closure index %d out of bounds (pool=%d)\n%!" + fn_name idx (Array.length outer_code.vc_constants); + + None + end + end else begin + (* Not a closure — constant expression, alias, or simple computation. + Execute the bytecode as a module to get the value, then wrap + as a NativeFn if it's callable (so the CEK can dispatch to it). *) + (try + let value = execute_module outer_code globals in + Printf.eprintf "[jit] RESOLVED %s: %s (bc[0]=%d)\n%!" + fn_name (type_of value) (if Array.length bc > 0 then bc.(0) else -1); + (* If the resolved value is a NativeFn, we can't wrap it as a + vm_closure — just let the CEK handle it directly. Return None + so the lambda falls through to CEK, which will find the + resolved value in the env on next lookup. *) + None + with _ -> + Printf.eprintf "[jit] SKIP %s: non-closure execution failed (bc[0]=%d, len=%d)\n%!" + fn_name (if Array.length bc > 0 then bc.(0) else -1) (Array.length bc); + None) + end + | _ -> + Printf.eprintf "[jit] FAIL %s: compiler returned %s\n%!" fn_name (type_of result); + None + with e -> + Printf.eprintf "[jit] FAIL %s: %s\n%!" fn_name (Printexc.to_string e); + None + +(* Wire up the forward reference *) +let () = jit_compile_ref := jit_compile_lambda diff --git a/shared/static/scripts/sx-browser.js b/shared/static/scripts/sx-browser.js index 478af93..6115135 100644 --- a/shared/static/scripts/sx-browser.js +++ b/shared/static/scripts/sx-browser.js @@ -14,7 +14,7 @@ // ========================================================================= var NIL = Object.freeze({ _nil: true, toString: function() { return "nil"; } }); - var SX_VERSION = "2026-03-22T20:34:05Z"; + var SX_VERSION = "2026-03-23T08:09:30Z"; function isNil(x) { return x === NIL || x === null || x === undefined; } function isSxTruthy(x) { return x !== false && !isNil(x); } @@ -2157,7 +2157,7 @@ PRIMITIVES["thaw-from-cid"] = thawFromCid; // === Transpiled from render (core) === // HTML_TAGS - var HTML_TAGS = ["html", "head", "body", "title", "meta", "link", "script", "style", "noscript", "header", "nav", "main", "section", "article", "aside", "footer", "h1", "h2", "h3", "h4", "h5", "h6", "hgroup", "div", "p", "blockquote", "pre", "figure", "figcaption", "address", "details", "summary", "a", "span", "em", "strong", "small", "b", "i", "u", "s", "mark", "sub", "sup", "abbr", "cite", "code", "time", "br", "wbr", "hr", "ul", "ol", "li", "dl", "dt", "dd", "table", "thead", "tbody", "tfoot", "tr", "th", "td", "caption", "colgroup", "col", "form", "input", "textarea", "select", "option", "optgroup", "button", "label", "fieldset", "legend", "output", "datalist", "img", "video", "audio", "source", "picture", "canvas", "iframe", "svg", "math", "path", "circle", "ellipse", "rect", "line", "polyline", "polygon", "text", "tspan", "g", "defs", "use", "clipPath", "mask", "pattern", "linearGradient", "radialGradient", "stop", "filter", "feGaussianBlur", "feOffset", "feBlend", "feColorMatrix", "feComposite", "feMerge", "feMergeNode", "feTurbulence", "feComponentTransfer", "feFuncR", "feFuncG", "feFuncB", "feFuncA", "feDisplacementMap", "feFlood", "feImage", "feMorphology", "feSpecularLighting", "feDiffuseLighting", "fePointLight", "feSpotLight", "feDistantLight", "animate", "animateTransform", "foreignObject", "template", "slot", "dialog", "menu"]; + var HTML_TAGS = ["html", "head", "body", "title", "meta", "link", "script", "style", "noscript", "header", "nav", "main", "section", "article", "aside", "footer", "h1", "h2", "h3", "h4", "h5", "h6", "hgroup", "div", "p", "blockquote", "pre", "figure", "figcaption", "address", "details", "summary", "a", "span", "em", "strong", "small", "b", "i", "u", "s", "mark", "sub", "sup", "abbr", "cite", "code", "kbd", "samp", "var", "time", "br", "wbr", "hr", "ul", "ol", "li", "dl", "dt", "dd", "table", "thead", "tbody", "tfoot", "tr", "th", "td", "caption", "colgroup", "col", "form", "input", "textarea", "select", "option", "optgroup", "button", "label", "fieldset", "legend", "output", "datalist", "img", "video", "audio", "source", "picture", "canvas", "iframe", "svg", "math", "path", "circle", "ellipse", "rect", "line", "polyline", "polygon", "text", "tspan", "g", "defs", "use", "clipPath", "mask", "pattern", "linearGradient", "radialGradient", "stop", "filter", "feGaussianBlur", "feOffset", "feBlend", "feColorMatrix", "feComposite", "feMerge", "feMergeNode", "feTurbulence", "feComponentTransfer", "feFuncR", "feFuncG", "feFuncB", "feFuncA", "feDisplacementMap", "feFlood", "feImage", "feMorphology", "feSpecularLighting", "feDiffuseLighting", "fePointLight", "feSpotLight", "feDistantLight", "animate", "animateTransform", "foreignObject", "template", "slot", "dialog", "menu"]; PRIMITIVES["HTML_TAGS"] = HTML_TAGS; // VOID_ELEMENTS diff --git a/shared/sx/ocaml_bridge.py b/shared/sx/ocaml_bridge.py index b28accd..9dc0bb7 100644 --- a/shared/sx/ocaml_bridge.py +++ b/shared/sx/ocaml_bridge.py @@ -434,13 +434,13 @@ class OcamlBridge: skipped += 1 _logger.warning("OCaml load skipped %s: %s", filepath, e) - # Compile adapter to VM after all files loaded (inside lock) + # Register JIT hook — lambdas compile on first call try: await self._send('(vm-compile-adapter)') await self._read_until_ok(ctx=None) - _logger.info("VM adapter compiled — aser runs on bytecode VM") + _logger.info("JIT hook registered — lambdas compile on first call") except OcamlBridgeError as e: - _logger.warning("VM adapter compilation skipped: %s", e) + _logger.warning("JIT hook registration skipped: %s", e) _logger.info("Loaded %d definitions from .sx files into OCaml kernel (%d skipped)", count, skipped) except Exception as e: diff --git a/spec/render.sx b/spec/render.sx index 9b48609..09faf9d 100644 --- a/spec/render.sx +++ b/spec/render.sx @@ -32,7 +32,7 @@ "div" "p" "blockquote" "pre" "figure" "figcaption" "address" "details" "summary" ;; Inline "a" "span" "em" "strong" "small" "b" "i" "u" "s" "mark" "sub" "sup" - "abbr" "cite" "code" "time" "br" "wbr" "hr" + "abbr" "cite" "code" "kbd" "samp" "var" "time" "br" "wbr" "hr" ;; Lists "ul" "ol" "li" "dl" "dt" "dd" ;; Tables diff --git a/sx/sxc/pages/sx_router.py b/sx/sxc/pages/sx_router.py index a60cad3..23dead4 100644 --- a/sx/sxc/pages/sx_router.py +++ b/sx/sxc/pages/sx_router.py @@ -230,8 +230,8 @@ async def eval_sx_url(raw_path: str) -> Any: html = await bridge.sx_page_full( page_source, shell_kwargs, ctx=ocaml_ctx) _t3 = _time.monotonic() - logger.info("[sx-page-full-py] ctx=%.3fs kwargs=%.3fs ocaml=%.3fs total=%.3fs", - _t1-_t0, _t2-_t1, _t3-_t2, _t3-_t0) + logger.info("[sx-page-full-py] %s ctx=%.3fs kwargs=%.3fs ocaml=%.3fs total=%.3fs", + raw_path, _t1-_t0, _t2-_t1, _t3-_t2, _t3-_t0) return await make_response(html, 200) else: content_sx = await _eval_slot(wrapped_ast, env, ctx)