host: fix serving-JIT host miscompile — install IO resolver for http-listen
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s

The serving-JIT perform-in-HO-callback miscompile (map/rest/drop wrong
CALL_PRIM args → blank pages, empty picker) is now fully fixed, so the host
runs 100% serving JIT with NO jit-exclude.

sx-vm-extensions 81177d0e resolves a suspended HO-callback's IO inline
(instead of unwinding the native map/filter loop and corrupting the stack),
but ONLY when a synchronous resolver is installed (!_cek_io_resolver = Some).
The host serves via the http-listen primitive, whose handler drove durable IO
through cek_run_with_io with the resolver = None — so it hit the unwinding
path the fix doesn't cover. (The vm-ext repro installed a resolver, so it
never exercised the host's real no-resolver path.)

Fix: extract cek_run_with_io's IO resolution into resolve_io_request, and have
http-listen install _cek_io_resolver := Some (fun req _ -> resolve_io_request
req) — byte-identical resolution, so the inline path resolves durable reads
exactly as the CEK loop would.

Verified: host conformance 271/271; ephemeral durable server at 100% JIT (no
exclude) zero fallbacks + real content + related shown + picker 12 candidates;
live blog.rose-ash.com home/post/tags 200 with related posts, zero error-log
lines; relate-picker Playwright 4/4 (infinite-scroll + filter + relate).

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
This commit is contained in:
2026-06-28 20:13:24 +00:00
parent 8104aadc2b
commit d8d7663565
3 changed files with 102 additions and 59 deletions

View File

@@ -522,9 +522,59 @@ let rec load_library_file path =
Printf.eprintf "[load-library] %s: %s\n%!" (Filename.basename path) msg Printf.eprintf "[load-library] %s: %s\n%!" (Filename.basename path) msg
) exprs ) exprs
(** IO-aware CEK run — handles suspension by dispatching IO requests. (* IO-aware CEK run (cek_run_with_io, below) — handles suspension by dispatching
Import requests are handled locally (load .sx file). IO requests. Import requests are handled locally (load .sx file). *)
Other IO requests are sent to the Python bridge. *) (** Resolve a single IO request value to its response. Shared by
cek_run_with_io's suspension loop AND the _cek_io_resolver installed for the
http-listen serving path, so the synchronous inline-resolve path (sx_vm.ml's
HO-callback suspend fix) resolves durable reads byte-identically to the
CEK-driven path. Without an installed resolver, a `perform` inside an HO
primitive callback (map/filter/…) unwinds the native loop and corrupts the
stack — the host's map/rest/drop serving-JIT miscompile. *)
and resolve_io_request request =
let op = match Sx_runtime.get_val request (String "op") with String s -> s | _ -> "" in
(match op with
| "import" ->
(* Resolve library locally — load the .sx file *)
let lib_spec = Sx_runtime.get_val request (String "library") in
let key = Sx_ref.library_name_key lib_spec in
if Sx_types.sx_truthy (Sx_ref.library_loaded_p key) then
(* Already loaded — just resume *)
Nil
else begin
(match resolve_library_path lib_spec with
| Some path -> load_library_file path
| None ->
Printf.eprintf "[import] WARNING: no file for library %s\n%!"
(Sx_runtime.value_to_str lib_spec));
Nil
end
| "text-measure" ->
let args = let a = Sx_runtime.get_val request (String "args") in
(match a with List l -> l | _ -> [a]) in
let font = match args with String f :: _ -> f | _ -> "serif" in
let size = match args with
| [_font; Number sz; _text] -> sz
| [_font; Number sz] -> sz
| _ -> 16.0 in
let text = match args with
| [_font; _sz; String t] -> t
| _ -> "" in
let (w, h, asc, desc) = measure_text_otfm font size text in
let d = Hashtbl.create 4 in
Hashtbl.replace d "width" (Number w);
Hashtbl.replace d "height" (Number h);
Hashtbl.replace d "ascent" (Number asc);
Hashtbl.replace d "descent" (Number desc);
Dict d
| _ ->
let argsv = Sx_runtime.get_val request (String "args") in
(match Sx_persist_store.handle_op op argsv with
| Some resp -> resp
| None ->
let args = (match argsv with List l -> l | _ -> [argsv]) in
io_request op args))
and cek_run_with_io state = and cek_run_with_io state =
let s = ref state in let s = ref state in
let is_terminal s = match Sx_ref.cek_terminal_p s with Bool true -> true | _ -> false in let is_terminal s = match Sx_ref.cek_terminal_p s with Bool true -> true | _ -> false in
@@ -535,49 +585,7 @@ and cek_run_with_io state =
done; done;
if is_suspended !s then begin if is_suspended !s then begin
let request = Sx_runtime.get_val !s (String "request") in let request = Sx_runtime.get_val !s (String "request") in
let op = match Sx_runtime.get_val request (String "op") with String s -> s | _ -> "" in let response = resolve_io_request request in
let response = match op with
| "import" ->
(* Resolve library locally — load the .sx file *)
let lib_spec = Sx_runtime.get_val request (String "library") in
let key = Sx_ref.library_name_key lib_spec in
if Sx_types.sx_truthy (Sx_ref.library_loaded_p key) then
(* Already loaded — just resume *)
Nil
else begin
(match resolve_library_path lib_spec with
| Some path -> load_library_file path
| None ->
Printf.eprintf "[import] WARNING: no file for library %s\n%!"
(Sx_runtime.value_to_str lib_spec));
Nil
end
| "text-measure" ->
let args = let a = Sx_runtime.get_val request (String "args") in
(match a with List l -> l | _ -> [a]) in
let font = match args with String f :: _ -> f | _ -> "serif" in
let size = match args with
| [_font; Number sz; _text] -> sz
| [_font; Number sz] -> sz
| _ -> 16.0 in
let text = match args with
| [_font; _sz; String t] -> t
| _ -> "" in
let (w, h, asc, desc) = measure_text_otfm font size text in
let d = Hashtbl.create 4 in
Hashtbl.replace d "width" (Number w);
Hashtbl.replace d "height" (Number h);
Hashtbl.replace d "ascent" (Number asc);
Hashtbl.replace d "descent" (Number desc);
Dict d
| _ ->
let argsv = Sx_runtime.get_val request (String "args") in
(match Sx_persist_store.handle_op op argsv with
| Some resp -> resp
| None ->
let args = (match argsv with List l -> l | _ -> [argsv]) in
io_request op args)
in
s := Sx_ref.cek_resume !s response; s := Sx_ref.cek_resume !s response;
loop () loop ()
end else end else
@@ -755,6 +763,17 @@ let setup_evaluator_bridge env =
Unix.bind sock Unix.bind sock
(Unix.ADDR_INET (bind_addr, port)); (Unix.ADDR_INET (bind_addr, port));
Unix.listen sock 64; Unix.listen sock 64;
(* Install the synchronous IO resolver for the serving path. Without it, a
`perform` (durable kv read) that fires inside an HO-primitive callback
(map/filter/reduce/…) during request handling suspends through the
native OCaml loop, dropping its iteration state and leaving the stack
misaligned — the serving-JIT host miscompile (map/rest/drop wrong args,
blank pages, empty picker). With a resolver installed, sx_vm.ml resolves
that callback's IO inline (byte-identically to cek_run_with_io) and the
loop is never unwound. Only set if one isn't already installed. *)
(if !Sx_types._cek_io_resolver = None then
Sx_types._cek_io_resolver :=
Some (fun request _state -> resolve_io_request request));
(* SX runtime is shared across threads — serialize handler calls. *) (* SX runtime is shared across threads — serialize handler calls. *)
let mtx = Mutex.create () in let mtx = Mutex.create () in
let reason = function let reason = function

View File

@@ -99,19 +99,14 @@ EPOCH=1
for M in "${MODULES[@]}"; do for M in "${MODULES[@]}"; do
echo "(epoch $EPOCH)"; echo "(load \"$M\")"; EPOCH=$((EPOCH+1)) echo "(epoch $EPOCH)"; echo "(load \"$M\")"; EPOCH=$((EPOCH+1))
done done
# Serving-mode JIT: exclude the request-path SX (host app + Dream framework). The # 100% serving JIT — NO host exclude. The serving-JIT perform-in-HO-callback
# 100%-JIT experiment surfaced the key finding — the kernel miscompile isn't just # miscompile (map/rest/drop wrong args → blank pages, empty picker) is fixed by
# 500s, it SILENTLY returns wrong results (the `drop` in host/blog-relate-options # two composing pieces: sx-vm-extensions 81177d0e resolves a callback's IO
# yielded an empty candidate list → broken relate picker, NO error logged). Silent # inline (instead of unwinding the native HO loop) WHEN a synchronous resolver
# corruption is worse than a crash, so these run on CEK (they're IO-bound — no perf # is installed, and sx_server.ml's http-listen now installs that resolver (it
# loss) while the kernel bug is fixed upstream (sx-vm-extensions, OP_PERFORM resume # mirrors cek_run_with_io exactly). So the whole request path — host app +
# — see plans/HANDOFF-jit-miscompile.md). The Datalog/relations JIT (the real win) # Dream + Datalog — runs under JIT with no exclude. Verified: ephemeral durable
# stays on. Drop this exclude once the resume bug lands, then go 100% JIT again. # server, 100% JIT, zero fallbacks, real content, picker lists candidates.
if [ "${SX_SERVING_JIT:-}" = "1" ]; then
echo "(epoch $EPOCH)"
echo "(eval \"(jit-exclude! \\\"host/*\\\" \\\"dream-*\\\" \\\"dr/*\\\")\")"
EPOCH=$((EPOCH+1))
fi
# Point the blog at the DURABLE file backend (persists under $SX_PERSIST_DIR), # Point the blog at the DURABLE file backend (persists under $SX_PERSIST_DIR),
# then idempotently seed a welcome post (sx_content = SX element markup, the # then idempotently seed a welcome post (sx_content = SX element markup, the
# editor's content model). Re-seeding is a no-op if the slug already exists. # editor's content model). Re-seeding is a no-op if the slug already exists.

View File

@@ -1,5 +1,34 @@
# Hand-off: serving-mode JIT miscompiles host handlers (to sx-vm-extensions) # Hand-off: serving-mode JIT miscompiles host handlers (to sx-vm-extensions)
> ## ✅ RESOLVED 2026-06-28 — host now runs 100% serving JIT, no exclude.
>
> Two composing pieces fixed it:
> 1. **sx-vm-extensions `81177d0e`** (`sx_vm.ml` `call_closure_reuse`): when an
> HO-primitive callback (map/filter/reduce/…) suspends on a `perform` AND a
> synchronous resolver is installed, resolve its IO inline and run it to
> completion instead of unwinding the native loop (which dropped iteration
> state and misaligned the stack → the next `CALL_PRIM` got wrong args).
> 2. **host side (`sx_server.ml`)**: that fix only engages when
> `!_cek_io_resolver = Some`. The host serves via the `http-listen` primitive,
> whose handler drove durable IO through `cek_run_with_io` with the resolver
> **= None**, so it hit the unwinding path the fix doesn't cover (the
> vm-extensions repro `repro_jit_resume.ml` *installed* a resolver, so it never
> exercised the host's real path). Fix: extracted `cek_run_with_io`'s IO
> resolution into `resolve_io_request`, and `http-listen` now installs
> `_cek_io_resolver := Some (fun req _ -> resolve_io_request req)` — byte-
> identical resolution, so the inline-resolve path resolves durable reads
> exactly as the CEK loop would.
>
> Verified: host conformance **271/271**; ephemeral durable server at 100% JIT
> (no exclude) — zero fallbacks, real content, related posts shown, picker lists
> 12 candidates; live blog.rose-ash.com home/post/tags 200 with related posts and
> zero error-log lines; relate-picker Playwright **4/4** (infinite-scroll +
> filter + relate, the `drop` path). `serve.sh` exclude dropped.
>
> Everything below is the original hand-off, kept for the record.
---
> From the **host-on-sx** loop, 2026-06-28. We enabled `SX_SERVING_JIT=1` on the > From the **host-on-sx** loop, 2026-06-28. We enabled `SX_SERVING_JIT=1` on the
> live host (blog.rose-ash.com) — the Datalog/relations saturation JITs cleanly > live host (blog.rose-ash.com) — the Datalog/relations saturation JITs cleanly
> and is the real win (host conformance 271/271 under JIT, 5.4× faster; live > and is the real win (host conformance 271/271 under JIT, 5.4× faster; live