diff --git a/hosts/ocaml/bin/run_tests.ml b/hosts/ocaml/bin/run_tests.ml index 2cacbf2e..9689fa35 100644 --- a/hosts/ocaml/bin/run_tests.ml +++ b/hosts/ocaml/bin/run_tests.ml @@ -1597,7 +1597,92 @@ let run_foundation_tests () = | other -> incr fail_count; Printf.printf " FAIL: invocation_count: %s\n" - (match other with Some n -> string_of_int n | None -> "None")) + (match other with Some n -> string_of_int n | None -> "None")); + + Printf.printf "\nSuite: jit extension-opcode awareness\n"; + let scan = Sx_vm.bytecode_uses_extension_opcodes in + let no_consts = [||] in + + (* Pure core ops: scan reports false. *) + (* OP_TRUE OP_RETURN *) + if not (scan [| 3; 50 |] no_consts) then begin + incr pass_count; + Printf.printf " PASS: pure core bytecode is JIT-eligible\n" + end else begin + incr fail_count; + Printf.printf " FAIL: pure core bytecode flagged as extension\n" + end; + + (* Extension opcode anywhere → true. *) + if scan [| 220; 50 |] no_consts then begin + incr pass_count; + Printf.printf " PASS: extension opcode detected at head\n" + end else begin + incr fail_count; + Printf.printf " FAIL: extension opcode at head missed\n" + end; + + (* Mixed: core + extension → true. *) + if scan [| 3; 220; 50 |] no_consts then begin + incr pass_count; + Printf.printf " PASS: extension opcode detected after core ops\n" + end else begin + incr fail_count; + Printf.printf " FAIL: extension opcode after core ops missed\n" + end; + + (* Operand bytes ≥200 must NOT trigger. CONST u16 with index 220 + into a synthetic constant pool — the operand is 220 (lo) 0 (hi), + not an opcode. The pool entry at 220 is irrelevant for the scan. *) + let big_consts = Array.make 256 Nil in + if not (scan [| 1; 220; 0; 50 |] big_consts) then begin + incr pass_count; + Printf.printf " PASS: CONST operand ≥200 not a false positive\n" + end else begin + incr fail_count; + Printf.printf " FAIL: CONST operand ≥200 false-positives as ext op\n" + end; + + (* CALL_PRIM has 3 operand bytes (u16 + u8); all ≥200 should not + trigger. *) + if not (scan [| 52; 220; 200; 200; 50 |] big_consts) then begin + incr pass_count; + Printf.printf " PASS: CALL_PRIM operands ≥200 not a false positive\n" + end else begin + incr fail_count; + Printf.printf " FAIL: CALL_PRIM operands ≥200 false-positive\n" + end; + + (* CLOSURE with upvalue descriptors: scan must skip the 2 + 2*n + dynamic operand bytes. Build a synthetic constant pool with a + Dict at index 0 declaring upvalue-count 1, descriptors that are + ≥200 — the scan should skip them and not trigger. + + Bytecode layout: CLOSURE 0 0 desc_is_local desc_index RETURN + op lo hi 210 220 50 + With upvalue-count = 1, scan must advance past the 2-byte CLOSURE + operand AND the 2 descriptor bytes (210, 220), landing on RETURN. *) + let cl_consts = Array.make 1 Nil in + let dict = Hashtbl.create 1 in + Hashtbl.replace dict "upvalue-count" (Integer 1); + cl_consts.(0) <- Dict dict; + if not (scan [| 51; 0; 0; 210; 220; 50 |] cl_consts) then begin + incr pass_count; + Printf.printf " PASS: CLOSURE upvalue descriptors ≥200 skipped\n" + end else begin + incr fail_count; + Printf.printf " FAIL: CLOSURE upvalue descriptors false-positive\n" + end; + + (* Sanity: opcode after CLOSURE+descriptors that IS an extension + opcode triggers correctly. *) + if scan [| 51; 0; 0; 210; 220; 221; 50 |] cl_consts then begin + incr pass_count; + Printf.printf " PASS: extension opcode after CLOSURE detected\n" + end else begin + incr fail_count; + Printf.printf " FAIL: extension opcode after CLOSURE missed\n" + end (* ====================================================================== *) diff --git a/hosts/ocaml/lib/sx_vm.ml b/hosts/ocaml/lib/sx_vm.ml index f26acecb..8aea6348 100644 --- a/hosts/ocaml/lib/sx_vm.ml +++ b/hosts/ocaml/lib/sx_vm.ml @@ -1056,6 +1056,62 @@ let _jit_is_broken_name n = || n = "hs-repeat-while" || n = "hs-repeat-until" || n = "hs-for-each" || n = "hs-put!" +(** Scan bytecode for any extension opcode (≥ 200, the registry's + [Sx_vm_extensions.extension_min]). Walks operand bytes correctly + so values that happen to be ≥200 (e.g. a CONST u16 index pointing + into a large pool) do not trigger false positives. CLOSURE's + dynamic upvalue descriptors are read from the constant pool entry + at the same index it pushes. + + Used by [jit_compile_lambda] (Phase E of the opcode-extension + plan): a lambda whose compiled body contains any extension opcode + is routed through interpretation rather than JIT. Extensions + interpret their opcodes via the registry; the JIT does not + currently know how to compile them. + + Operand-size logic mirrors [opcode_operand_size] (which is defined + later, in the disassembly section); inlined here so this helper can + sit before [jit_compile_lambda] in the file. *) +let bytecode_uses_extension_opcodes (bc : int array) (consts : value array) = + let core_operand_size = function + | 1 | 20 | 21 | 64 | 65 | 128 -> 2 (* u16 *) + | 16 | 17 | 18 | 19 | 48 | 49 | 144 -> 1 (* u8 *) + | 32 | 33 | 34 | 35 -> 2 (* i16 *) + | 52 -> 3 (* CALL_PRIM: u16 + u8 *) + | _ -> 0 + in + let len = Array.length bc in + let ip = ref 0 in + let found = ref false in + while not !found && !ip < len do + let op = bc.(!ip) in + if op >= 200 then found := true + else begin + ip := !ip + 1; + let extra = match op with + | 51 (* CLOSURE *) when !ip + 1 < len -> + let lo = bc.(!ip) in + let hi = bc.(!ip + 1) in + let idx = lo lor (hi lsl 8) in + let uv_count = + if idx < Array.length consts then + (match consts.(idx) with + | Dict d -> + (match Hashtbl.find_opt d "upvalue-count" with + | Some (Integer n) -> n + | Some (Number n) -> int_of_float n + | _ -> 0) + | _ -> 0) + else 0 + in + 2 + uv_count * 2 + | _ -> core_operand_size op + in + ip := !ip + extra + end + done; + !found + let jit_compile_lambda (l : lambda) globals = let fn_name = match l.l_name with Some n -> n | None -> "" in if !_jit_compiling then ( @@ -1118,8 +1174,18 @@ let jit_compile_lambda (l : lambda) globals = if idx < Array.length outer_code.vc_constants then let inner_val = outer_code.vc_constants.(idx) in let code = code_from_value inner_val in - Some { vm_code = code; vm_upvalues = [||]; - vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure } + (* Phase E: if the inner lambda's bytecode contains any + extension opcode (≥200), skip JIT and let the lambda run + interpreted via CEK. Extension opcodes dispatch correctly + through the VM's registry fallthrough, but the JIT has no + knowledge of them and shouldn't claim ownership. *) + if bytecode_uses_extension_opcodes code.vc_bytecode code.vc_constants then begin + Printf.eprintf "[jit] SKIP %s: bytecode uses extension opcodes (interpret-only in v1)\n%!" + fn_name; + None + end else + Some { vm_code = code; vm_upvalues = [||]; + vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure } else begin Printf.eprintf "[jit] FAIL %s: closure index %d out of bounds (pool=%d)\n%!" fn_name idx (Array.length outer_code.vc_constants); diff --git a/plans/sx-vm-opcode-extension.md b/plans/sx-vm-opcode-extension.md index b0646ca3..ac703046 100644 --- a/plans/sx-vm-opcode-extension.md +++ b/plans/sx-vm-opcode-extension.md @@ -8,7 +8,10 @@ future language port that wants performance-critical opcodes. Reference: `plans/erlang-on-sx.md` Phase 9, `plans/fed-sx-design.md` §17.5, `hosts/ocaml/lib/sx_vm.ml` (current VM). -Status: **in progress** on `loops/sx-vm-extensions`. +Status: **complete** on `loops/sx-vm-extensions` (Phases A-E landed +2026-05-14 / 2026-05-15). Ready for first real consumer +(`hosts/ocaml/lib/extensions/erlang.ml`, replacing the Phase 9b stub +dispatcher in `lib/erlang/vm/dispatcher.sx`). --- @@ -317,9 +320,9 @@ The JIT (lazy lambda compilation) currently compiles based on opcode ranges. Extension opcodes (≥200) should fall through to interpretation, not be JIT-compiled in v1. -- [ ] Mark extension opcodes as "interpret only" in the JIT pre-analysis. -- [ ] Lambda containing only core opcodes JIT-compiles as before. -- [ ] Lambda containing any extension opcode runs interpreted. +- [x] Mark extension opcodes as "interpret only" in the JIT pre-analysis. +- [x] Lambda containing only core opcodes JIT-compiles as before. +- [x] Lambda containing any extension opcode runs interpreted. JITing extension opcodes is a follow-up project; v1 keeps the JIT scope unchanged and just makes it correctly route mixed bytecode. @@ -457,6 +460,29 @@ familiarity. Newest first. +- **2026-05-15** — Phase E done. Loop complete (acceptance criteria + 1-4 all met). New `Sx_vm.bytecode_uses_extension_opcodes` walks + bytecode operand-aware (CONST u16 indices, CALL_PRIM u16+u8, + CLOSURE u16+dynamic upvalue descriptors) so values that happen to + be ≥200 don't false-positive as extension opcodes. Wired into + `jit_compile_lambda`: when the inner closure's bytecode contains + any extension opcode, JIT returns None and the lambda runs + interpreted via CEK (the dispatch fallthrough still routes + extension opcodes through the registry — this just prevents the + JIT from claiming ownership of code it can't optimise). 7 new + foundation tests (`jit extension-opcode awareness` suite): pure + core eligible, head/middle/post-CLOSURE detection, CONST + CALL_PRIM + + CLOSURE-descriptor false-positive avoidance. +7 pass vs Phase D + baseline (4833 vs 4826), 1111 pre-existing failures unchanged. + Conformance suites green: erlang 530/530, haskell 285/285, datalog + 276/276, prolog 590/590, smalltalk 847/847, common-lisp 487/487, + apl 562/562, js 148/148, forth 632/638 (pre-existing), tcl 3/4 + (pre-existing), ocaml-on-sx unit 607/607. + + Loop done. Hand-off: the Erlang loop's Phase 9b stub dispatcher in + `lib/erlang/vm/dispatcher.sx` can now be replaced with a real + `hosts/ocaml/lib/extensions/erlang.ml` consumer. + - **2026-05-15** — Phase D done. New `hosts/ocaml/lib/extensions/` subtree wired into the `sx` library via `(include_subdirs unqualified)`. `extensions/test_ext.ml` is the canonical worked example: two