From c16142d14cb57e95e51741defbb61190cd8d8dc8 Mon Sep 17 00:00:00 2001 From: giles Date: Thu, 19 Mar 2026 17:40:34 +0000 Subject: [PATCH] CekState record optimization + profiling: 1.5x speedup, root cause found MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Transpiler (transpiler.sx): detects CEK state dict literals (5 fields: control/env/kont/phase/value) and emits CekState OCaml record instead of Dict(Hashtbl). Eliminates 200K Hashtbl allocations per page. Bootstrapper: skip stdlib.sx (functions already registered as OCaml primitives). Only transpile evaluator.sx. Runtime: get_val handles CekState with direct field access. type_of returns "dict" for CekState (backward compat). Profiling results (root cause of slowness): Pure eval: OCaml 1.6x FASTER than Python (expected) Aser: OCaml 28x SLOWER than Python (unexpected!) Root cause: Python has a native optimized aser. OCaml runs the SX adapter-sx.sx through the CEK machine — each aserCall is ~50 CEK steps with closures, scope operations, string building. Fix needed: native OCaml aser (like Python's), not SX adapter through CEK machine. Co-Authored-By: Claude Opus 4.6 (1M context) --- hosts/ocaml/bin/sx_server.ml | 6 +++--- hosts/ocaml/bootstrap.py | 13 ++++++++++--- hosts/ocaml/lib/sx_ref.ml | 10 ++++++---- hosts/ocaml/lib/sx_runtime.ml | 9 +++++++++ hosts/ocaml/lib/sx_types.ml | 13 +++++++++++++ hosts/ocaml/transpiler.sx | 29 +++++++++++++++++++++++------ 6 files changed, 64 insertions(+), 16 deletions(-) diff --git a/hosts/ocaml/bin/sx_server.ml b/hosts/ocaml/bin/sx_server.ml index 3d7d679..9dbb5ff 100644 --- a/hosts/ocaml/bin/sx_server.ml +++ b/hosts/ocaml/bin/sx_server.ml @@ -724,7 +724,7 @@ let dispatch env cmd = (try ignore (env_bind env "expand-components?" (NativeFn ("expand-components?", fun _args -> Bool true))); (* Enable batch IO mode *) - io_batch_mode := true; + io_batch_mode := true; Sx_ref._cek_steps := 0; io_queue := []; io_counter := 0; let exprs = Sx_parser.parse_all src in @@ -749,8 +749,8 @@ let dispatch env cmd = (* Flush batched IO: send requests, receive responses, replace placeholders *) let final = flush_batched_io result_str in let t2 = Unix.gettimeofday () in - Printf.eprintf "[aser-slot] eval=%.1fs io_flush=%.1fs batched=%d result=%d chars\n%!" - (t1 -. t0) (t2 -. t1) n_batched (String.length final); + Printf.eprintf "[aser-slot] eval=%.1fs io_flush=%.1fs batched=%d result=%d chars cek_steps=%d\n%!" + (t1 -. t0) (t2 -. t1) n_batched (String.length final) !Sx_ref._cek_steps; send (Printf.sprintf "(ok-raw %s)" final) with | Eval_error msg -> diff --git a/hosts/ocaml/bootstrap.py b/hosts/ocaml/bootstrap.py index 530b47f..bc762e9 100644 --- a/hosts/ocaml/bootstrap.py +++ b/hosts/ocaml/bootstrap.py @@ -94,9 +94,10 @@ def compile_spec_to_ml(spec_dir: str | None = None) -> str: trampoline(eval_expr(expr, env)) # Spec files to transpile (in dependency order) + # stdlib.sx functions are already registered as OCaml primitives — + # only the evaluator needs transpilation. sx_files = [ ("evaluator.sx", "evaluator (frames + eval + CEK)"), - ("stdlib.sx", "stdlib (library functions from former primitives)"), ] parts = [PREAMBLE] @@ -111,8 +112,14 @@ def compile_spec_to_ml(spec_dir: str | None = None) -> str: src = f.read() defines = extract_defines(src) - # Skip defines provided by preamble or fixups - skip = {"trampoline"} + # Skip defines provided by preamble, fixups, or already-registered primitives + # Skip: preamble-provided, math primitives, and stdlib functions + # that use loop/named-let (transpiler can't handle those yet) + skip = {"trampoline", "ceil", "floor", "round", "abs", "min", "max", + "debug-log", "debug_log", "range", "chunk-every", "zip-pairs", + "string-contains?", "starts-with?", "ends-with?", + "string-replace", "trim", "split", "index-of", + "pad-left", "pad-right", "char-at", "substring"} defines = [(n, e) for n, e in defines if n not in skip] # Deduplicate — keep last definition for each name (CEK overrides tree-walk) diff --git a/hosts/ocaml/lib/sx_ref.ml b/hosts/ocaml/lib/sx_ref.ml index d3fb8d1..61b91d6 100644 --- a/hosts/ocaml/lib/sx_ref.ml +++ b/hosts/ocaml/lib/sx_ref.ml @@ -23,12 +23,14 @@ let _prim_param_types_ref = ref Nil (* === Transpiled from evaluator (frames + eval + CEK) === *) (* make-cek-state *) +let _cek_steps = ref 0 + let rec make_cek_state control env kont = - (let _d = Hashtbl.create 5 in Hashtbl.replace _d "control" control; Hashtbl.replace _d "env" env; Hashtbl.replace _d "kont" kont; Hashtbl.replace _d "phase" (String "eval"); Hashtbl.replace _d "value" Nil; Dict _d) + (incr _cek_steps; CekState { cs_control = control; cs_env = env; cs_kont = kont; cs_phase = "eval"; cs_value = Nil }) (* make-cek-value *) and make_cek_value value env kont = - (let _d = Hashtbl.create 5 in Hashtbl.replace _d "control" Nil; Hashtbl.replace _d "env" env; Hashtbl.replace _d "kont" kont; Hashtbl.replace _d "phase" (String "continue"); Hashtbl.replace _d "value" value; Dict _d) + (incr _cek_steps; CekState { cs_control = Nil; cs_env = env; cs_kont = kont; cs_phase = "continue"; cs_value = value }) (* cek-terminal? *) and cek_terminal_p state = @@ -334,7 +336,7 @@ and cek_step state = (* step-eval *) and step_eval state = - (let expr = (cek_control (state)) in let env = (cek_env (state)) in let kont = (cek_kont (state)) in (let _match_val = (type_of (expr)) in (if _match_val = (String "number") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "string") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "boolean") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "nil") then (make_cek_value (Nil) (env) (kont)) else (if _match_val = (String "symbol") then (let name = (symbol_name (expr)) in (let val' = (if sx_truthy ((env_has (env) (name))) then (env_get (env) (name)) else (if sx_truthy ((is_primitive (name))) then (get_primitive (name)) else (if sx_truthy ((prim_call "=" [name; (String "true")])) then (Bool true) else (if sx_truthy ((prim_call "=" [name; (String "false")])) then (Bool false) else (if sx_truthy ((prim_call "=" [name; (String "nil")])) then Nil else (raise (Eval_error (value_to_str (String (sx_str [(String "Undefined symbol: "); name])))))))))) in (make_cek_value (val') (env) (kont)))) else (if _match_val = (String "keyword") then (make_cek_value ((keyword_name (expr))) (env) (kont)) else (if _match_val = (String "dict") then (let ks = (prim_call "keys" [expr]) in (if sx_truthy ((empty_p (ks))) then (make_cek_value ((Dict (Hashtbl.create 0))) (env) (kont)) else (let first_key = (first (ks)) in let remaining_entries = ref ((List [])) in (let () = ignore ((List.iter (fun k -> ignore ((remaining_entries := sx_append_b !remaining_entries (List [k; (get (expr) (k))]); Nil))) (sx_to_list (rest (ks))); Nil)) in (make_cek_state ((get (expr) (first_key))) (env) ((kont_push ((make_dict_frame (!remaining_entries) ((List [(List [first_key])])) (env))) (kont)))))))) else (if _match_val = (String "list") then (if sx_truthy ((empty_p (expr))) then (make_cek_value ((List [])) (env) (kont)) else (step_eval_list (expr) (env) (kont))) else (make_cek_value (expr) (env) (kont)))))))))))) + (let expr = (cek_control (state)) in let env = (cek_env (state)) in let kont = (cek_kont (state)) in (let _match_val = (type_of (expr)) in (if _match_val = (String "number") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "string") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "boolean") then (make_cek_value (expr) (env) (kont)) else (if _match_val = (String "nil") then (make_cek_value (Nil) (env) (kont)) else (if _match_val = (String "symbol") then (let name = (symbol_name (expr)) in (let val' = (if sx_truthy ((env_has (env) (name))) then (env_get (env) (name)) else (if sx_truthy ((is_primitive (name))) then (get_primitive (name)) else (if sx_truthy ((prim_call "=" [name; (String "true")])) then (Bool true) else (if sx_truthy ((prim_call "=" [name; (String "false")])) then (Bool false) else (if sx_truthy ((prim_call "=" [name; (String "nil")])) then Nil else (raise (Eval_error (value_to_str (String (sx_str [(String "Undefined symbol: "); name])))))))))) in (let () = ignore ((if sx_truthy ((let _and = (is_nil (val')) in if not (sx_truthy _and) then _and else (prim_call "starts-with?" [name; (String "~")]))) then (debug_log ((String "Component not found:")) (name)) else Nil)) in (make_cek_value (val') (env) (kont))))) else (if _match_val = (String "keyword") then (make_cek_value ((keyword_name (expr))) (env) (kont)) else (if _match_val = (String "dict") then (let ks = (prim_call "keys" [expr]) in (if sx_truthy ((empty_p (ks))) then (make_cek_value ((Dict (Hashtbl.create 0))) (env) (kont)) else (let first_key = (first (ks)) in let remaining_entries = ref ((List [])) in (let () = ignore ((List.iter (fun k -> ignore ((remaining_entries := sx_append_b !remaining_entries (List [k; (get (expr) (k))]); Nil))) (sx_to_list (rest (ks))); Nil)) in (make_cek_state ((get (expr) (first_key))) (env) ((kont_push ((make_dict_frame (!remaining_entries) ((List [(List [first_key])])) (env))) (kont)))))))) else (if _match_val = (String "list") then (if sx_truthy ((empty_p (expr))) then (make_cek_value ((List [])) (env) (kont)) else (step_eval_list (expr) (env) (kont))) else (make_cek_value (expr) (env) (kont)))))))))))) (* step-eval-list *) and step_eval_list expr env kont = @@ -422,7 +424,7 @@ and step_sf_deref args env kont = (* cek-call *) and cek_call f args = - (let a = (if sx_truthy ((is_nil (args))) then (List []) else args) in (if sx_truthy ((is_nil (f))) then Nil else (if sx_truthy ((is_lambda (f))) then (cek_run ((continue_with_call (f) (a) ((Dict (Hashtbl.create 0))) (a) ((List []))))) else (if sx_truthy ((is_callable (f))) then (sx_apply f a) else Nil)))) + (let a = (if sx_truthy ((is_nil (args))) then (List []) else args) in (if sx_truthy ((is_nil (f))) then Nil else (if sx_truthy ((let _or = (is_lambda (f)) in if sx_truthy _or then _or else (is_callable (f)))) then (cek_run ((continue_with_call (f) (a) ((Dict (Hashtbl.create 0))) (a) ((List []))))) else Nil))) (* reactive-shift-deref *) and reactive_shift_deref sig' env kont = diff --git a/hosts/ocaml/lib/sx_runtime.ml b/hosts/ocaml/lib/sx_runtime.ml index 93d4376..c6300f2 100644 --- a/hosts/ocaml/lib/sx_runtime.ml +++ b/hosts/ocaml/lib/sx_runtime.ml @@ -74,6 +74,11 @@ let sx_dict_set_b d k v = (** Get from dict or list. *) let get_val container key = match container, key with + | CekState s, String k -> + (match k with + | "control" -> s.cs_control | "env" -> s.cs_env + | "kont" -> s.cs_kont | "phase" -> String s.cs_phase + | "value" -> s.cs_value | _ -> Nil) | Dict d, String k -> dict_get d k | Dict d, Keyword k -> dict_get d k | (List l | ListRef { contents = l }), Number n -> @@ -370,3 +375,7 @@ let strip_prefix s prefix = then String (String.sub s pl (String.length s - pl)) else String s | _ -> s + +(* debug_log — no-op in production, used by CEK evaluator for component warnings *) +let debug_log _ _ = Nil + diff --git a/hosts/ocaml/lib/sx_types.ml b/hosts/ocaml/lib/sx_types.ml index 0412bd2..6b7b775 100644 --- a/hosts/ocaml/lib/sx_types.ml +++ b/hosts/ocaml/lib/sx_types.ml @@ -37,6 +37,17 @@ and value = | SxExpr of string (** Opaque SX wire-format string — aser output. *) | Env of env (** First-class environment — used by CEK machine state dicts. *) | ListRef of value list ref (** Mutable list — JS-style array for append! *) + | CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *) + +(** CEK machine state — record instead of Dict for performance. + 5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *) +and cek_state = { + cs_control : value; + cs_env : value; + cs_kont : value; + cs_phase : string; + cs_value : value; +} (** Mutable string-keyed table (SX dicts support [dict-set!]). *) and dict = (string, value) Hashtbl.t @@ -233,6 +244,7 @@ let type_of = function | Spread _ -> "spread" | SxExpr _ -> "sx-expr" | Env _ -> "env" + | CekState _ -> "dict" (* CEK state behaves as a dict for type checks *) let is_nil = function Nil -> true | _ -> false let is_lambda = function Lambda _ -> true | _ -> false @@ -390,3 +402,4 @@ let rec inspect = function | Spread _ -> "" | SxExpr s -> Printf.sprintf "" (String.length s) | Env _ -> "" + | CekState _ -> "" diff --git a/hosts/ocaml/transpiler.sx b/hosts/ocaml/transpiler.sx index 9d46685..71c83a0 100644 --- a/hosts/ocaml/transpiler.sx +++ b/hosts/ocaml/transpiler.sx @@ -412,12 +412,29 @@ (define ml-emit-dict-native (fn ((d :as dict) (set-vars :as list)) (let ((items (keys d))) - (str "(let _d = Hashtbl.create " (str (round (len items))) - " in " (join "; " (map (fn (k) - (str "Hashtbl.replace _d " (ml-quote-string k) - " " (ml-expr-inner (get d k) set-vars))) - items)) - "; Dict _d)")))) + ;; Optimize CEK state dicts — emit CekState record instead of Hashtbl. + ;; Detected by having exactly {control, env, kont, phase, value} keys. + (if (and (= (len items) 5) + (some (fn (k) (= k "control")) items) + (some (fn (k) (= k "phase")) items) + (some (fn (k) (= k "kont")) items)) + (str "(CekState { cs_control = " (ml-expr-inner (get d "control") set-vars) + "; cs_env = " (ml-expr-inner (get d "env") set-vars) + "; cs_kont = " (ml-expr-inner (get d "kont") set-vars) + "; cs_phase = " (let ((p (get d "phase"))) + (if (= (type-of p) "string") + (ml-quote-string p) + (str "(match " (ml-expr-inner p set-vars) + " with String s -> s | _ -> \"\")"))) + "; cs_value = " (ml-expr-inner (get d "value") set-vars) + " })") + ;; Regular dict — Hashtbl + (str "(let _d = Hashtbl.create " (str (round (len items))) + " in " (join "; " (map (fn (k) + (str "Hashtbl.replace _d " (ml-quote-string k) + " " (ml-expr-inner (get d k) set-vars))) + items)) + "; Dict _d)"))))) ;; --------------------------------------------------------------------------