vm-ext: phase E — JIT skips lambdas containing extension opcodes

Adds Sx_vm.bytecode_uses_extension_opcodes — an operand-aware
bytecode scanner that walks past CONST u16, CALL_PRIM u16+u8, and
CLOSURE u16+dynamic upvalue descriptors so operand bytes that happen
to be ≥200 don't false-positive as extension opcodes.

jit_compile_lambda calls the scanner on the inner closure's bytecode.
On hit it returns None — the lambda then runs through CEK
interpretation. The VM's dispatch fallthrough still routes the
extension opcodes themselves through the registry; this change just
prevents the JIT from claiming code it has no plan for.

Tests: 7 new foundation cases — pure core eligible, head/middle/
post-CLOSURE detection, CONST + CALL_PRIM + CLOSURE-descriptor false-
positive avoidance. +7 pass vs Phase D baseline, no regressions
across 11 conformance suites.

Loop complete: acceptance criteria 1-4 met. Hand-off to the Erlang
loop — lib/erlang/vm/dispatcher.sx's Phase 9b stub can now be
replaced with a real hosts/ocaml/lib/extensions/erlang.ml consumer.
This commit is contained in:
2026-05-15 01:53:39 +00:00
parent 4dfccc244d
commit 76614da154
2 changed files with 154 additions and 3 deletions

View File

@@ -1597,7 +1597,92 @@ let run_foundation_tests () =
| other ->
incr fail_count;
Printf.printf " FAIL: invocation_count: %s\n"
(match other with Some n -> string_of_int n | None -> "None"))
(match other with Some n -> string_of_int n | None -> "None"));
Printf.printf "\nSuite: jit extension-opcode awareness\n";
let scan = Sx_vm.bytecode_uses_extension_opcodes in
let no_consts = [||] in
(* Pure core ops: scan reports false. *)
(* OP_TRUE OP_RETURN *)
if not (scan [| 3; 50 |] no_consts) then begin
incr pass_count;
Printf.printf " PASS: pure core bytecode is JIT-eligible\n"
end else begin
incr fail_count;
Printf.printf " FAIL: pure core bytecode flagged as extension\n"
end;
(* Extension opcode anywhere → true. *)
if scan [| 220; 50 |] no_consts then begin
incr pass_count;
Printf.printf " PASS: extension opcode detected at head\n"
end else begin
incr fail_count;
Printf.printf " FAIL: extension opcode at head missed\n"
end;
(* Mixed: core + extension → true. *)
if scan [| 3; 220; 50 |] no_consts then begin
incr pass_count;
Printf.printf " PASS: extension opcode detected after core ops\n"
end else begin
incr fail_count;
Printf.printf " FAIL: extension opcode after core ops missed\n"
end;
(* Operand bytes ≥200 must NOT trigger. CONST u16 with index 220
into a synthetic constant pool — the operand is 220 (lo) 0 (hi),
not an opcode. The pool entry at 220 is irrelevant for the scan. *)
let big_consts = Array.make 256 Nil in
if not (scan [| 1; 220; 0; 50 |] big_consts) then begin
incr pass_count;
Printf.printf " PASS: CONST operand ≥200 not a false positive\n"
end else begin
incr fail_count;
Printf.printf " FAIL: CONST operand ≥200 false-positives as ext op\n"
end;
(* CALL_PRIM has 3 operand bytes (u16 + u8); all ≥200 should not
trigger. *)
if not (scan [| 52; 220; 200; 200; 50 |] big_consts) then begin
incr pass_count;
Printf.printf " PASS: CALL_PRIM operands ≥200 not a false positive\n"
end else begin
incr fail_count;
Printf.printf " FAIL: CALL_PRIM operands ≥200 false-positive\n"
end;
(* CLOSURE with upvalue descriptors: scan must skip the 2 + 2*n
dynamic operand bytes. Build a synthetic constant pool with a
Dict at index 0 declaring upvalue-count 1, descriptors that are
≥200 — the scan should skip them and not trigger.
Bytecode layout: CLOSURE 0 0 desc_is_local desc_index RETURN
op lo hi 210 220 50
With upvalue-count = 1, scan must advance past the 2-byte CLOSURE
operand AND the 2 descriptor bytes (210, 220), landing on RETURN. *)
let cl_consts = Array.make 1 Nil in
let dict = Hashtbl.create 1 in
Hashtbl.replace dict "upvalue-count" (Integer 1);
cl_consts.(0) <- Dict dict;
if not (scan [| 51; 0; 0; 210; 220; 50 |] cl_consts) then begin
incr pass_count;
Printf.printf " PASS: CLOSURE upvalue descriptors ≥200 skipped\n"
end else begin
incr fail_count;
Printf.printf " FAIL: CLOSURE upvalue descriptors false-positive\n"
end;
(* Sanity: opcode after CLOSURE+descriptors that IS an extension
opcode triggers correctly. *)
if scan [| 51; 0; 0; 210; 220; 221; 50 |] cl_consts then begin
incr pass_count;
Printf.printf " PASS: extension opcode after CLOSURE detected\n"
end else begin
incr fail_count;
Printf.printf " FAIL: extension opcode after CLOSURE missed\n"
end
(* ====================================================================== *)

View File

@@ -1048,6 +1048,62 @@ let _jit_is_broken_name n =
|| n = "hs-repeat-while" || n = "hs-repeat-until"
|| n = "hs-for-each" || n = "hs-put!"
(** Scan bytecode for any extension opcode (≥ 200, the registry's
[Sx_vm_extensions.extension_min]). Walks operand bytes correctly
so values that happen to be ≥200 (e.g. a CONST u16 index pointing
into a large pool) do not trigger false positives. CLOSURE's
dynamic upvalue descriptors are read from the constant pool entry
at the same index it pushes.
Used by [jit_compile_lambda] (Phase E of the opcode-extension
plan): a lambda whose compiled body contains any extension opcode
is routed through interpretation rather than JIT. Extensions
interpret their opcodes via the registry; the JIT does not
currently know how to compile them.
Operand-size logic mirrors [opcode_operand_size] (which is defined
later, in the disassembly section); inlined here so this helper can
sit before [jit_compile_lambda] in the file. *)
let bytecode_uses_extension_opcodes (bc : int array) (consts : value array) =
let core_operand_size = function
| 1 | 20 | 21 | 64 | 65 | 128 -> 2 (* u16 *)
| 16 | 17 | 18 | 19 | 48 | 49 | 144 -> 1 (* u8 *)
| 32 | 33 | 34 | 35 -> 2 (* i16 *)
| 52 -> 3 (* CALL_PRIM: u16 + u8 *)
| _ -> 0
in
let len = Array.length bc in
let ip = ref 0 in
let found = ref false in
while not !found && !ip < len do
let op = bc.(!ip) in
if op >= 200 then found := true
else begin
ip := !ip + 1;
let extra = match op with
| 51 (* CLOSURE *) when !ip + 1 < len ->
let lo = bc.(!ip) in
let hi = bc.(!ip + 1) in
let idx = lo lor (hi lsl 8) in
let uv_count =
if idx < Array.length consts then
(match consts.(idx) with
| Dict d ->
(match Hashtbl.find_opt d "upvalue-count" with
| Some (Integer n) -> n
| Some (Number n) -> int_of_float n
| _ -> 0)
| _ -> 0)
else 0
in
2 + uv_count * 2
| _ -> core_operand_size op
in
ip := !ip + extra
end
done;
!found
let jit_compile_lambda (l : lambda) globals =
let fn_name = match l.l_name with Some n -> n | None -> "<anon>" in
if !_jit_compiling then (
@@ -1110,8 +1166,18 @@ let jit_compile_lambda (l : lambda) globals =
if idx < Array.length outer_code.vc_constants then
let inner_val = outer_code.vc_constants.(idx) in
let code = code_from_value inner_val in
Some { vm_code = code; vm_upvalues = [||];
vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure }
(* Phase E: if the inner lambda's bytecode contains any
extension opcode (≥200), skip JIT and let the lambda run
interpreted via CEK. Extension opcodes dispatch correctly
through the VM's registry fallthrough, but the JIT has no
knowledge of them and shouldn't claim ownership. *)
if bytecode_uses_extension_opcodes code.vc_bytecode code.vc_constants then begin
Printf.eprintf "[jit] SKIP %s: bytecode uses extension opcodes (interpret-only in v1)\n%!"
fn_name;
None
end else
Some { vm_code = code; vm_upvalues = [||];
vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure }
else begin
Printf.eprintf "[jit] FAIL %s: closure index %d out of bounds (pool=%d)\n%!"
fn_name idx (Array.length outer_code.vc_constants);