From d8d7663565fa14a805be1a33fe199ff72242ad0e Mon Sep 17 00:00:00 2001 From: giles Date: Sun, 28 Jun 2026 20:13:24 +0000 Subject: [PATCH] =?UTF-8?q?host:=20fix=20serving-JIT=20host=20miscompile?= =?UTF-8?q?=20=E2=80=94=20install=20IO=20resolver=20for=20http-listen?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The serving-JIT perform-in-HO-callback miscompile (map/rest/drop wrong CALL_PRIM args → blank pages, empty picker) is now fully fixed, so the host runs 100% serving JIT with NO jit-exclude. sx-vm-extensions 81177d0e resolves a suspended HO-callback's IO inline (instead of unwinding the native map/filter loop and corrupting the stack), but ONLY when a synchronous resolver is installed (!_cek_io_resolver = Some). The host serves via the http-listen primitive, whose handler drove durable IO through cek_run_with_io with the resolver = None — so it hit the unwinding path the fix doesn't cover. (The vm-ext repro installed a resolver, so it never exercised the host's real no-resolver path.) Fix: extract cek_run_with_io's IO resolution into resolve_io_request, and have http-listen install _cek_io_resolver := Some (fun req _ -> resolve_io_request req) — byte-identical resolution, so the inline path resolves durable reads exactly as the CEK loop would. Verified: host conformance 271/271; ephemeral durable server at 100% JIT (no exclude) zero fallbacks + real content + related shown + picker 12 candidates; live blog.rose-ash.com home/post/tags 200 with related posts, zero error-log lines; relate-picker Playwright 4/4 (infinite-scroll + filter + relate). Co-Authored-By: Claude Opus 4.8 --- hosts/ocaml/bin/sx_server.ml | 111 +++++++++++++++++++------------- lib/host/serve.sh | 21 +++--- plans/HANDOFF-jit-miscompile.md | 29 +++++++++ 3 files changed, 102 insertions(+), 59 deletions(-) diff --git a/hosts/ocaml/bin/sx_server.ml b/hosts/ocaml/bin/sx_server.ml index 51859aff..4ecb34c7 100644 --- a/hosts/ocaml/bin/sx_server.ml +++ b/hosts/ocaml/bin/sx_server.ml @@ -522,9 +522,59 @@ let rec load_library_file path = Printf.eprintf "[load-library] %s: %s\n%!" (Filename.basename path) msg ) exprs -(** IO-aware CEK run — handles suspension by dispatching IO requests. - Import requests are handled locally (load .sx file). - Other IO requests are sent to the Python bridge. *) +(* IO-aware CEK run (cek_run_with_io, below) — handles suspension by dispatching + IO requests. Import requests are handled locally (load .sx file). *) +(** Resolve a single IO request value to its response. Shared by + cek_run_with_io's suspension loop AND the _cek_io_resolver installed for the + http-listen serving path, so the synchronous inline-resolve path (sx_vm.ml's + HO-callback suspend fix) resolves durable reads byte-identically to the + CEK-driven path. Without an installed resolver, a `perform` inside an HO + primitive callback (map/filter/…) unwinds the native loop and corrupts the + stack — the host's map/rest/drop serving-JIT miscompile. *) +and resolve_io_request request = + let op = match Sx_runtime.get_val request (String "op") with String s -> s | _ -> "" in + (match op with + | "import" -> + (* Resolve library locally — load the .sx file *) + let lib_spec = Sx_runtime.get_val request (String "library") in + let key = Sx_ref.library_name_key lib_spec in + if Sx_types.sx_truthy (Sx_ref.library_loaded_p key) then + (* Already loaded — just resume *) + Nil + else begin + (match resolve_library_path lib_spec with + | Some path -> load_library_file path + | None -> + Printf.eprintf "[import] WARNING: no file for library %s\n%!" + (Sx_runtime.value_to_str lib_spec)); + Nil + end + | "text-measure" -> + let args = let a = Sx_runtime.get_val request (String "args") in + (match a with List l -> l | _ -> [a]) in + let font = match args with String f :: _ -> f | _ -> "serif" in + let size = match args with + | [_font; Number sz; _text] -> sz + | [_font; Number sz] -> sz + | _ -> 16.0 in + let text = match args with + | [_font; _sz; String t] -> t + | _ -> "" in + let (w, h, asc, desc) = measure_text_otfm font size text in + let d = Hashtbl.create 4 in + Hashtbl.replace d "width" (Number w); + Hashtbl.replace d "height" (Number h); + Hashtbl.replace d "ascent" (Number asc); + Hashtbl.replace d "descent" (Number desc); + Dict d + | _ -> + let argsv = Sx_runtime.get_val request (String "args") in + (match Sx_persist_store.handle_op op argsv with + | Some resp -> resp + | None -> + let args = (match argsv with List l -> l | _ -> [argsv]) in + io_request op args)) + and cek_run_with_io state = let s = ref state in let is_terminal s = match Sx_ref.cek_terminal_p s with Bool true -> true | _ -> false in @@ -535,49 +585,7 @@ and cek_run_with_io state = done; if is_suspended !s then begin let request = Sx_runtime.get_val !s (String "request") in - let op = match Sx_runtime.get_val request (String "op") with String s -> s | _ -> "" in - let response = match op with - | "import" -> - (* Resolve library locally — load the .sx file *) - let lib_spec = Sx_runtime.get_val request (String "library") in - let key = Sx_ref.library_name_key lib_spec in - if Sx_types.sx_truthy (Sx_ref.library_loaded_p key) then - (* Already loaded — just resume *) - Nil - else begin - (match resolve_library_path lib_spec with - | Some path -> load_library_file path - | None -> - Printf.eprintf "[import] WARNING: no file for library %s\n%!" - (Sx_runtime.value_to_str lib_spec)); - Nil - end - | "text-measure" -> - let args = let a = Sx_runtime.get_val request (String "args") in - (match a with List l -> l | _ -> [a]) in - let font = match args with String f :: _ -> f | _ -> "serif" in - let size = match args with - | [_font; Number sz; _text] -> sz - | [_font; Number sz] -> sz - | _ -> 16.0 in - let text = match args with - | [_font; _sz; String t] -> t - | _ -> "" in - let (w, h, asc, desc) = measure_text_otfm font size text in - let d = Hashtbl.create 4 in - Hashtbl.replace d "width" (Number w); - Hashtbl.replace d "height" (Number h); - Hashtbl.replace d "ascent" (Number asc); - Hashtbl.replace d "descent" (Number desc); - Dict d - | _ -> - let argsv = Sx_runtime.get_val request (String "args") in - (match Sx_persist_store.handle_op op argsv with - | Some resp -> resp - | None -> - let args = (match argsv with List l -> l | _ -> [argsv]) in - io_request op args) - in + let response = resolve_io_request request in s := Sx_ref.cek_resume !s response; loop () end else @@ -755,6 +763,17 @@ let setup_evaluator_bridge env = Unix.bind sock (Unix.ADDR_INET (bind_addr, port)); Unix.listen sock 64; + (* Install the synchronous IO resolver for the serving path. Without it, a + `perform` (durable kv read) that fires inside an HO-primitive callback + (map/filter/reduce/…) during request handling suspends through the + native OCaml loop, dropping its iteration state and leaving the stack + misaligned — the serving-JIT host miscompile (map/rest/drop wrong args, + blank pages, empty picker). With a resolver installed, sx_vm.ml resolves + that callback's IO inline (byte-identically to cek_run_with_io) and the + loop is never unwound. Only set if one isn't already installed. *) + (if !Sx_types._cek_io_resolver = None then + Sx_types._cek_io_resolver := + Some (fun request _state -> resolve_io_request request)); (* SX runtime is shared across threads — serialize handler calls. *) let mtx = Mutex.create () in let reason = function diff --git a/lib/host/serve.sh b/lib/host/serve.sh index 1b3bc0c9..472bc090 100755 --- a/lib/host/serve.sh +++ b/lib/host/serve.sh @@ -99,19 +99,14 @@ EPOCH=1 for M in "${MODULES[@]}"; do echo "(epoch $EPOCH)"; echo "(load \"$M\")"; EPOCH=$((EPOCH+1)) done - # Serving-mode JIT: exclude the request-path SX (host app + Dream framework). The - # 100%-JIT experiment surfaced the key finding — the kernel miscompile isn't just - # 500s, it SILENTLY returns wrong results (the `drop` in host/blog-relate-options - # yielded an empty candidate list → broken relate picker, NO error logged). Silent - # corruption is worse than a crash, so these run on CEK (they're IO-bound — no perf - # loss) while the kernel bug is fixed upstream (sx-vm-extensions, OP_PERFORM resume - # — see plans/HANDOFF-jit-miscompile.md). The Datalog/relations JIT (the real win) - # stays on. Drop this exclude once the resume bug lands, then go 100% JIT again. - if [ "${SX_SERVING_JIT:-}" = "1" ]; then - echo "(epoch $EPOCH)" - echo "(eval \"(jit-exclude! \\\"host/*\\\" \\\"dream-*\\\" \\\"dr/*\\\")\")" - EPOCH=$((EPOCH+1)) - fi + # 100% serving JIT — NO host exclude. The serving-JIT perform-in-HO-callback + # miscompile (map/rest/drop wrong args → blank pages, empty picker) is fixed by + # two composing pieces: sx-vm-extensions 81177d0e resolves a callback's IO + # inline (instead of unwinding the native HO loop) WHEN a synchronous resolver + # is installed, and sx_server.ml's http-listen now installs that resolver (it + # mirrors cek_run_with_io exactly). So the whole request path — host app + + # Dream + Datalog — runs under JIT with no exclude. Verified: ephemeral durable + # server, 100% JIT, zero fallbacks, real content, picker lists candidates. # Point the blog at the DURABLE file backend (persists under $SX_PERSIST_DIR), # then idempotently seed a welcome post (sx_content = SX element markup, the # editor's content model). Re-seeding is a no-op if the slug already exists. diff --git a/plans/HANDOFF-jit-miscompile.md b/plans/HANDOFF-jit-miscompile.md index 54f90fc8..d3d0a816 100644 --- a/plans/HANDOFF-jit-miscompile.md +++ b/plans/HANDOFF-jit-miscompile.md @@ -1,5 +1,34 @@ # Hand-off: serving-mode JIT miscompiles host handlers (to sx-vm-extensions) +> ## ✅ RESOLVED 2026-06-28 — host now runs 100% serving JIT, no exclude. +> +> Two composing pieces fixed it: +> 1. **sx-vm-extensions `81177d0e`** (`sx_vm.ml` `call_closure_reuse`): when an +> HO-primitive callback (map/filter/reduce/…) suspends on a `perform` AND a +> synchronous resolver is installed, resolve its IO inline and run it to +> completion instead of unwinding the native loop (which dropped iteration +> state and misaligned the stack → the next `CALL_PRIM` got wrong args). +> 2. **host side (`sx_server.ml`)**: that fix only engages when +> `!_cek_io_resolver = Some`. The host serves via the `http-listen` primitive, +> whose handler drove durable IO through `cek_run_with_io` with the resolver +> **= None**, so it hit the unwinding path the fix doesn't cover (the +> vm-extensions repro `repro_jit_resume.ml` *installed* a resolver, so it never +> exercised the host's real path). Fix: extracted `cek_run_with_io`'s IO +> resolution into `resolve_io_request`, and `http-listen` now installs +> `_cek_io_resolver := Some (fun req _ -> resolve_io_request req)` — byte- +> identical resolution, so the inline-resolve path resolves durable reads +> exactly as the CEK loop would. +> +> Verified: host conformance **271/271**; ephemeral durable server at 100% JIT +> (no exclude) — zero fallbacks, real content, related posts shown, picker lists +> 12 candidates; live blog.rose-ash.com home/post/tags 200 with related posts and +> zero error-log lines; relate-picker Playwright **4/4** (infinite-scroll + +> filter + relate, the `drop` path). `serve.sh` exclude dropped. +> +> Everything below is the original hand-off, kept for the record. + +--- + > From the **host-on-sx** loop, 2026-06-28. We enabled `SX_SERVING_JIT=1` on the > live host (blog.rose-ash.com) — the Datalog/relations saturation JITs cleanly > and is the real win (host conformance 271/271 under JIT, 5.4× faster; live