HS tests: SIGALRM + raise timeout for native OCaml loops

The infinite loops in the HS parser are in transpiled native OCaml code,
not in the VM or CEK step loop. Neither step counters (in cek_step_loop,
cek_step, trampoline) nor VM instruction checks caught them because
the loops are in direct OCaml recursion.

Fix: SIGALRM handler raises Eval_error to break out of native loops.
Also sets step_limit flag to catch VM loops. Combined approach handles
both native OCaml recursion (alarm+raise) and VM bytecode (step check).

The alarm+raise can become unreliable after ~13 timeouts in a single
process, but handles the common case well. Reverts the fork-based
approach which lost inter-test state.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-14 11:57:33 +00:00
parent 3d7fffe4eb
commit e3eb46d0dc
3 changed files with 69 additions and 59 deletions

View File

@@ -85,64 +85,46 @@ let make_test_env () =
with Eval_error msg -> List [Symbol "error"; String msg])
| _ -> Nil);
(* Two-phase try-call: first attempt runs in-process (fast, state-sharing).
If a test hangs (detected by SIGALRM), retry it in a fork for safe timeout. *)
let _test_timed_out = ref false in
Sys.set_signal Sys.sigalrm (Sys.Signal_handle (fun _ ->
_test_timed_out := true;
(* Set step_limit to trigger check in VM instruction loop *)
Sx_ref.step_limit := 1;
(* Also raise to break native OCaml loops (HS parser etc.) *)
raise (Eval_error "TIMEOUT: test exceeded 5s")));
bind "try-call" (fun args ->
match args with
| [thunk] ->
(* Fork-based timeout: child runs test, parent waits up to 5s *)
let r, w = Unix.pipe () in
let pid = Unix.fork () in
if pid = 0 then begin
(* Child process *)
Unix.close r;
let write_result s =
let _ = Unix.write_substring w s 0 (String.length s) in
Unix.close w; exit 0 in
_test_timed_out := false;
Sx_ref.step_limit := 0;
Sx_ref.step_count := 0;
ignore (Unix.alarm 5);
(try
let result = eval_expr (List [thunk]) (Env env) in
ignore result;
write_result "OK:"
with
| Eval_error msg -> write_result ("ERR:" ^ msg)
| exn -> write_result ("EXN:" ^ Printexc.to_string exn))
end else begin
(* Parent process *)
Unix.close w;
(* Wait with timeout using select *)
let deadline = Unix.gettimeofday () +. 5.0 in
let rec wait_loop () =
let remaining = deadline -. Unix.gettimeofday () in
if remaining <= 0.0 then begin
Unix.kill pid Sys.sigkill;
ignore (Unix.waitpid [] pid);
Unix.close r;
let d = Hashtbl.create 2 in
Hashtbl.replace d "ok" (Bool false);
Hashtbl.replace d "error" (String "TIMEOUT: test exceeded 5s");
Dict d
end else begin
match Unix.select [r] [] [] (min remaining 0.1) with
| _ :: _, _, _ ->
let buf = Bytes.create 4096 in
let n = Unix.read r buf 0 4096 in
Unix.close r;
ignore (Unix.waitpid [] pid);
let msg = Bytes.sub_string buf 0 n in
if String.length msg >= 3 && String.sub msg 0 3 = "OK:" then begin
ignore (Unix.alarm 0);
Sx_ref.step_limit := 0;
let d = Hashtbl.create 2 in
Hashtbl.replace d "ok" (Bool true);
Dict d
end else begin
let err = if String.length msg > 4 then String.sub msg 4 (String.length msg - 4) else msg in
with
| Eval_error msg ->
ignore (Unix.alarm 0);
Sx_ref.step_limit := 0;
let d = Hashtbl.create 2 in
Hashtbl.replace d "ok" (Bool false);
Hashtbl.replace d "error" (String err);
Hashtbl.replace d "error" (String (if !_test_timed_out then "TIMEOUT: test exceeded 5s" else msg));
Dict d
end
| [], _, _ -> wait_loop ()
end
in
wait_loop ()
end
| exn ->
ignore (Unix.alarm 0);
Sx_ref.step_limit := 0;
let d = Hashtbl.create 2 in
Hashtbl.replace d "ok" (Bool false);
Hashtbl.replace d "error" (String (Printexc.to_string exn));
Dict d)
| _ -> raise (Eval_error "try-call: expected 1 arg"));
bind "report-pass" (fun args ->
@@ -2138,6 +2120,8 @@ let run_spec_tests env test_files =
load_module "freeze.sx" lib_dir;
load_module "content.sx" lib_dir;
load_module "parser-combinators.sx" lib_dir;
load_module "graphql.sx" lib_dir;
load_module "graphql-exec.sx" lib_dir;
(* DOM module — provides dom-* wrappers around host-* primitives *)
let web_lib_dir = Filename.concat web_dir "lib" in
load_module "dom.sx" web_lib_dir;

View File

@@ -13,9 +13,11 @@ let step_limit : int ref = ref 0
let step_count : int ref = ref 0
let trampoline v =
if !step_limit > 0 then begin
incr step_count;
step_count := !step_count + 1;
if !step_count mod 1_000_000 = 0 then
Printf.eprintf "[trampoline] count=%d\n%!" !step_count;
if !step_count > !step_limit then
raise (Sx_types.Eval_error "TIMEOUT: step limit exceeded")
raise (Sx_types.Eval_error "TIMEOUT: step limit exceeded (trampoline)")
end;
!trampoline_fn v
@@ -586,11 +588,27 @@ and expand_macro mac raw_args env =
NOTE: try/with wraps only cek_step, NOT the recursive call — preserving
tail recursion (critical for the millions of steps in large evaluations). *)
and cek_step_loop state =
(if sx_truthy ((let _or = (cek_terminal_p (state)) in if sx_truthy _or then _or else (cek_suspended_p (state)))) then state else
(* Step check BEFORE terminal test — catches TCO thunk loops *)
if !step_limit > 0 then begin
step_count := !step_count + 1;
if !step_count > !step_limit then
raise (Sx_types.Eval_error "TIMEOUT: step limit exceeded (pre-check)")
end;
(if sx_truthy ((let _or = (cek_terminal_p (state)) in if sx_truthy _or then _or else (cek_suspended_p (state)))) then state else begin
if !step_limit > 0 then begin
step_count := !step_count + 1;
if !step_count mod 1_000_000 = 0 then
Printf.eprintf "[step] count=%d limit=%d\n%!" !step_count !step_limit;
if !step_count > !step_limit then begin
Printf.eprintf "[TIMEOUT] step_count=%d exceeded limit=%d\n%!" !step_count !step_limit;
raise (Sx_types.Eval_error "TIMEOUT: step limit exceeded")
end
end;
let next = (try cek_step (state)
with Sx_types.CekPerformRequest request ->
make_cek_suspended request (cek_env state) (cek_kont state))
in cek_step_loop next)
in cek_step_loop next
end)
(* cek-run *)
and cek_run state =
@@ -613,6 +631,11 @@ and cek_resume suspended_state result' =
(* cek-step *)
and cek_step state =
if !step_limit > 0 then begin
step_count := !step_count + 1;
if !step_count > !step_limit then
raise (Sx_types.Eval_error "TIMEOUT: step limit exceeded (cek_step)")
end;
(if sx_truthy ((prim_call "=" [(cek_phase (state)); (String "eval")])) then (step_eval (state)) else (step_continue (state)))
(* step-eval *)

View File

@@ -448,6 +448,9 @@ and run vm =
let op = bc.(frame.ip) in
frame.ip <- frame.ip + 1;
incr _vm_insn_count;
(* Check timeout flag set by SIGALRM *)
if !_vm_insn_count land 0xFFFF = 0 && !Sx_ref.step_limit > 0 then
raise (Eval_error "TIMEOUT: step limit exceeded");
(try match op with
(* ---- Constants ---- *)
| 1 (* OP_CONST *) ->