vm-ext: phase D — extensions/ subtree + test_ext + opcode_name lookup

lib/extensions/ becomes the new home for VM extensions, wired in via
(include_subdirs unqualified). README documents the registration
pattern, opcode-ID range conventions (200-209 guest_vm, 210-219
inline test, 220-229 test_ext, 230-247 ports), and naming rules.

extensions/test_ext.ml is the canonical worked example — two
operand-less opcodes (220 push 42, 221 double TOS) carrying a per-
extension state slot (TestExtState invocation counter). Test_ext.register
called from run_tests.ml at the start of the Phase D suite, on top of
the inline test_reg from earlier suites (disjoint opcode IDs).

Sx_vm.opcode_name now consults extension_opcode_name_ref (forward ref
in the same style as extension_dispatch_ref), so disassemble shows
extension opcodes by name instead of UNKNOWN_n. Registry maintains
name_of_id_table and installs the lookup at module init.

Tests: 5 new foundation cases — primitive resolves test_ext name,
end-to-end bytecode (push + double + return → 84), disassemble shows
"test_ext.OP_TEST_PUSH_42" / "test_ext.OP_TEST_DOUBLE_TOS",
unregistered ext opcodes still fall back to UNKNOWN_n, invocation
counter records the two dispatches. +5 pass vs Phase C baseline, no
regressions across 11 conformance suites.
This commit is contained in:
2026-05-15 01:05:30 +00:00
parent 57af0f386f
commit f3192f7fda
7 changed files with 278 additions and 10 deletions

View File

@@ -70,6 +70,13 @@ let jit_compile_ref : (lambda -> (string, value) Hashtbl.t -> vm_closure option)
let extension_dispatch_ref : (int -> vm -> frame -> unit) ref =
ref (fun op _vm _frame -> raise (Invalid_opcode op))
(** Forward reference for extension opcode → name lookup, used by
[opcode_name] / [disassemble] for human-readable disassembly. The
registry installs a real lookup at module init; default returns
[None] (then [opcode_name] falls back to "UNKNOWN_n"). *)
let extension_opcode_name_ref : (int -> string option) ref =
ref (fun _ -> None)
(* JIT threshold and counters live in Sx_types so primitives can read them
without creating a sx_primitives → sx_vm dependency cycle. *)
@@ -1222,7 +1229,12 @@ let opcode_name = function
| 164 -> "EQ" | 165 -> "LT" | 166 -> "GT" | 167 -> "NOT"
| 168 -> "LEN" | 169 -> "FIRST" | 170 -> "REST" | 171 -> "NTH"
| 172 -> "CONS" | 173 -> "NEG" | 174 -> "INC" | 175 -> "DEC"
| n -> Printf.sprintf "UNKNOWN_%d" n
| n ->
(* Extension opcodes (≥200) get their human-readable name from the
registry; defaults to UNKNOWN_n if the extension isn't loaded. *)
(match !extension_opcode_name_ref n with
| Some name -> name
| None -> Printf.sprintf "UNKNOWN_%d" n)
(** Number of extra operand bytes consumed by each opcode.
Returns (format, total_bytes) where format describes the operand types. *)