diff --git a/hosts/ocaml/bin/run_tests.ml b/hosts/ocaml/bin/run_tests.ml index 4145a56a..d3f7afb9 100644 --- a/hosts/ocaml/bin/run_tests.ml +++ b/hosts/ocaml/bin/run_tests.ml @@ -1597,7 +1597,92 @@ let run_foundation_tests () = | other -> incr fail_count; Printf.printf " FAIL: invocation_count: %s\n" - (match other with Some n -> string_of_int n | None -> "None")) + (match other with Some n -> string_of_int n | None -> "None")); + + Printf.printf "\nSuite: jit extension-opcode awareness\n"; + let scan = Sx_vm.bytecode_uses_extension_opcodes in + let no_consts = [||] in + + (* Pure core ops: scan reports false. *) + (* OP_TRUE OP_RETURN *) + if not (scan [| 3; 50 |] no_consts) then begin + incr pass_count; + Printf.printf " PASS: pure core bytecode is JIT-eligible\n" + end else begin + incr fail_count; + Printf.printf " FAIL: pure core bytecode flagged as extension\n" + end; + + (* Extension opcode anywhere → true. *) + if scan [| 220; 50 |] no_consts then begin + incr pass_count; + Printf.printf " PASS: extension opcode detected at head\n" + end else begin + incr fail_count; + Printf.printf " FAIL: extension opcode at head missed\n" + end; + + (* Mixed: core + extension → true. *) + if scan [| 3; 220; 50 |] no_consts then begin + incr pass_count; + Printf.printf " PASS: extension opcode detected after core ops\n" + end else begin + incr fail_count; + Printf.printf " FAIL: extension opcode after core ops missed\n" + end; + + (* Operand bytes ≥200 must NOT trigger. CONST u16 with index 220 + into a synthetic constant pool — the operand is 220 (lo) 0 (hi), + not an opcode. The pool entry at 220 is irrelevant for the scan. *) + let big_consts = Array.make 256 Nil in + if not (scan [| 1; 220; 0; 50 |] big_consts) then begin + incr pass_count; + Printf.printf " PASS: CONST operand ≥200 not a false positive\n" + end else begin + incr fail_count; + Printf.printf " FAIL: CONST operand ≥200 false-positives as ext op\n" + end; + + (* CALL_PRIM has 3 operand bytes (u16 + u8); all ≥200 should not + trigger. *) + if not (scan [| 52; 220; 200; 200; 50 |] big_consts) then begin + incr pass_count; + Printf.printf " PASS: CALL_PRIM operands ≥200 not a false positive\n" + end else begin + incr fail_count; + Printf.printf " FAIL: CALL_PRIM operands ≥200 false-positive\n" + end; + + (* CLOSURE with upvalue descriptors: scan must skip the 2 + 2*n + dynamic operand bytes. Build a synthetic constant pool with a + Dict at index 0 declaring upvalue-count 1, descriptors that are + ≥200 — the scan should skip them and not trigger. + + Bytecode layout: CLOSURE 0 0 desc_is_local desc_index RETURN + op lo hi 210 220 50 + With upvalue-count = 1, scan must advance past the 2-byte CLOSURE + operand AND the 2 descriptor bytes (210, 220), landing on RETURN. *) + let cl_consts = Array.make 1 Nil in + let dict = Hashtbl.create 1 in + Hashtbl.replace dict "upvalue-count" (Integer 1); + cl_consts.(0) <- Dict dict; + if not (scan [| 51; 0; 0; 210; 220; 50 |] cl_consts) then begin + incr pass_count; + Printf.printf " PASS: CLOSURE upvalue descriptors ≥200 skipped\n" + end else begin + incr fail_count; + Printf.printf " FAIL: CLOSURE upvalue descriptors false-positive\n" + end; + + (* Sanity: opcode after CLOSURE+descriptors that IS an extension + opcode triggers correctly. *) + if scan [| 51; 0; 0; 210; 220; 221; 50 |] cl_consts then begin + incr pass_count; + Printf.printf " PASS: extension opcode after CLOSURE detected\n" + end else begin + incr fail_count; + Printf.printf " FAIL: extension opcode after CLOSURE missed\n" + end (* ====================================================================== *) diff --git a/hosts/ocaml/lib/sx_vm.ml b/hosts/ocaml/lib/sx_vm.ml index 5654f630..0899440c 100644 --- a/hosts/ocaml/lib/sx_vm.ml +++ b/hosts/ocaml/lib/sx_vm.ml @@ -1048,6 +1048,62 @@ let _jit_is_broken_name n = || n = "hs-repeat-while" || n = "hs-repeat-until" || n = "hs-for-each" || n = "hs-put!" +(** Scan bytecode for any extension opcode (≥ 200, the registry's + [Sx_vm_extensions.extension_min]). Walks operand bytes correctly + so values that happen to be ≥200 (e.g. a CONST u16 index pointing + into a large pool) do not trigger false positives. CLOSURE's + dynamic upvalue descriptors are read from the constant pool entry + at the same index it pushes. + + Used by [jit_compile_lambda] (Phase E of the opcode-extension + plan): a lambda whose compiled body contains any extension opcode + is routed through interpretation rather than JIT. Extensions + interpret their opcodes via the registry; the JIT does not + currently know how to compile them. + + Operand-size logic mirrors [opcode_operand_size] (which is defined + later, in the disassembly section); inlined here so this helper can + sit before [jit_compile_lambda] in the file. *) +let bytecode_uses_extension_opcodes (bc : int array) (consts : value array) = + let core_operand_size = function + | 1 | 20 | 21 | 64 | 65 | 128 -> 2 (* u16 *) + | 16 | 17 | 18 | 19 | 48 | 49 | 144 -> 1 (* u8 *) + | 32 | 33 | 34 | 35 -> 2 (* i16 *) + | 52 -> 3 (* CALL_PRIM: u16 + u8 *) + | _ -> 0 + in + let len = Array.length bc in + let ip = ref 0 in + let found = ref false in + while not !found && !ip < len do + let op = bc.(!ip) in + if op >= 200 then found := true + else begin + ip := !ip + 1; + let extra = match op with + | 51 (* CLOSURE *) when !ip + 1 < len -> + let lo = bc.(!ip) in + let hi = bc.(!ip + 1) in + let idx = lo lor (hi lsl 8) in + let uv_count = + if idx < Array.length consts then + (match consts.(idx) with + | Dict d -> + (match Hashtbl.find_opt d "upvalue-count" with + | Some (Integer n) -> n + | Some (Number n) -> int_of_float n + | _ -> 0) + | _ -> 0) + else 0 + in + 2 + uv_count * 2 + | _ -> core_operand_size op + in + ip := !ip + extra + end + done; + !found + let jit_compile_lambda (l : lambda) globals = let fn_name = match l.l_name with Some n -> n | None -> "" in if !_jit_compiling then ( @@ -1110,8 +1166,18 @@ let jit_compile_lambda (l : lambda) globals = if idx < Array.length outer_code.vc_constants then let inner_val = outer_code.vc_constants.(idx) in let code = code_from_value inner_val in - Some { vm_code = code; vm_upvalues = [||]; - vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure } + (* Phase E: if the inner lambda's bytecode contains any + extension opcode (≥200), skip JIT and let the lambda run + interpreted via CEK. Extension opcodes dispatch correctly + through the VM's registry fallthrough, but the JIT has no + knowledge of them and shouldn't claim ownership. *) + if bytecode_uses_extension_opcodes code.vc_bytecode code.vc_constants then begin + Printf.eprintf "[jit] SKIP %s: bytecode uses extension opcodes (interpret-only in v1)\n%!" + fn_name; + None + end else + Some { vm_code = code; vm_upvalues = [||]; + vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure } else begin Printf.eprintf "[jit] FAIL %s: closure index %d out of bounds (pool=%d)\n%!" fn_name idx (Array.length outer_code.vc_constants);