Merge loops/sx-vm-extensions into architecture: hosts/ocaml VM opcode extension mechanism

5 phases (A-E) per plans/sx-vm-opcode-extension.md:

- A: Sx_vm dispatch fallthrough for opcodes ≥200 + Invalid_opcode + extension_dispatch_ref
- B: Sx_vm_extension interface + Sx_vm_extensions registry (register / dispatch /
     id_of_name / state_of_extension), installs into the dispatch_ref at module init
- C: extension-opcode-id SX primitive for compiler-side lookup
- D: lib/extensions/ subtree wired via include_subdirs, test_ext.ml as the canonical
     worked example, opcode_name forward-ref so disassemble shows ext opcodes by name
- E: bytecode_uses_extension_opcodes scanner + JIT skip path so lambdas containing
     extension opcodes run interpreted via CEK

26 new foundation tests across 5 suites, all green. Zero regressions across 11
language-port conformance suites (erlang 530, haskell 285, datalog 276, prolog 590,
smalltalk 847, common-lisp 487, apl 562, js 148, forth 632, tcl 3, ocaml-on-sx unit 607).

Hand-off: lib/erlang/vm/dispatcher.sx (Phase 9b stub) can now be replaced with a real
hosts/ocaml/lib/extensions/erlang.ml consumer.
This commit is contained in:
2026-05-15 07:22:29 +00:00
9 changed files with 1455 additions and 4 deletions

View File

@@ -67,6 +67,14 @@ let rec deep_equal a b =
| NativeFn _, NativeFn _ -> a == b
| _ -> false
(* ====================================================================== *)
(* Test extensions for the VM extension registry suite (Phase B) *)
(* ====================================================================== *)
(* Extend the extensible variant from sx_vm_extension.ml so the test
extensions below can carry their own private state. *)
type Sx_vm_extension.extension_state += TestRegState of int ref
(* ====================================================================== *)
(* Build evaluator environment with test platform functions *)
(* ====================================================================== *)
@@ -1282,7 +1290,399 @@ let run_foundation_tests () =
let l = { l_params = ["x"]; l_body = Symbol "x"; l_closure = Sx_types.make_env (); l_name = None; l_compiled = None; l_call_count = 0; l_uid = Sx_types.next_lambda_uid () } in
assert_true "is_lambda" (Bool (Sx_types.is_lambda (Lambda l)));
ignore (Sx_types.set_lambda_name (Lambda l) "my-fn");
assert_eq "lambda name mutated" (String "my-fn") (lambda_name (Lambda l))
assert_eq "lambda name mutated" (String "my-fn") (lambda_name (Lambda l));
Printf.printf "\nSuite: vm-extension-dispatch\n";
let make_bc op = ({
vc_arity = 0; vc_rest_arity = -1; vc_locals = 0;
vc_bytecode = [| op |]; vc_constants = [||];
vc_bytecode_list = None; vc_constants_list = None;
} : Sx_types.vm_code) in
let expect_invalid_opcode label op =
let globals = Hashtbl.create 1 in
try
let _ = Sx_vm.execute_module (make_bc op) globals in
incr fail_count;
Printf.printf " FAIL: %s — expected Invalid_opcode, got a result\n" label
with
| Sx_vm.Invalid_opcode n when n = op ->
incr pass_count;
Printf.printf " PASS: %s\n" label
| exn ->
incr fail_count;
Printf.printf " FAIL: %s — unexpected: %s\n" label (Printexc.to_string exn)
in
expect_invalid_opcode "opcode 200 raises Invalid_opcode 200" 200;
expect_invalid_opcode "opcode 224 raises Invalid_opcode 224" 224;
expect_invalid_opcode "opcode 247 raises Invalid_opcode 247" 247;
(* Opcode 199 sits just below the extension threshold — should fall to the
catch-all (Eval_error), proving the threshold is at 200, not 199. *)
let globals = Hashtbl.create 1 in
(try
let _ = Sx_vm.execute_module (make_bc 199) globals in
incr fail_count;
Printf.printf " FAIL: opcode 199 — expected Eval_error, got a result\n"
with
| Sx_vm.Invalid_opcode _ ->
incr fail_count;
Printf.printf " FAIL: opcode 199 routed to extension dispatch (threshold wrong)\n"
| Sx_types.Eval_error _ ->
incr pass_count;
Printf.printf " PASS: opcode 199 stays in core (catch-all)\n"
| exn ->
incr fail_count;
Printf.printf " FAIL: opcode 199 — unexpected: %s\n" (Printexc.to_string exn));
Printf.printf "\nSuite: vm-extension-registry\n";
(* Sx_vm_extensions self-installs its dispatcher at module init. Reset
the registry so prior loaded extensions don't interfere with this
test. *)
Sx_vm_extensions._reset_for_tests ();
let module TestExt : Sx_vm_extension.EXTENSION = struct
let name = "test_reg"
let init () = TestRegState (ref 0)
let opcodes _st = [
(210, "test_reg.OP_PUSH_42", (fun vm _frame ->
Sx_vm.push vm (Sx_types.Integer 42)));
(211, "test_reg.OP_DOUBLE_TOS", (fun vm _frame ->
let v = Sx_vm.pop vm in
match v with
| Sx_types.Integer n -> Sx_vm.push vm (Sx_types.Integer (n * 2))
| _ -> failwith "OP_DOUBLE_TOS: not an integer"));
]
end in
Sx_vm_extensions.register (module TestExt);
(match Sx_vm_extensions.id_of_name "test_reg.OP_PUSH_42" with
| Some 210 ->
incr pass_count;
Printf.printf " PASS: id_of_name resolves opcode\n"
| other ->
incr fail_count;
Printf.printf " FAIL: id_of_name: got %s\n"
(match other with Some n -> string_of_int n | None -> "None"));
(match Sx_vm_extensions.id_of_name "nonexistent.OP" with
| None ->
incr pass_count;
Printf.printf " PASS: id_of_name returns None for unknown\n"
| Some _ ->
incr fail_count;
Printf.printf " FAIL: id_of_name should return None for unknown\n");
(match Sx_vm_extensions.state_of_extension "test_reg" with
| Some (TestRegState _) ->
incr pass_count;
Printf.printf " PASS: state_of_extension returns extension state\n"
| _ ->
incr fail_count;
Printf.printf " FAIL: state_of_extension lookup\n");
(match Sx_vm_extensions.state_of_extension "nonexistent" with
| None ->
incr pass_count;
Printf.printf " PASS: state_of_extension None for unknown\n"
| Some _ ->
incr fail_count;
Printf.printf " FAIL: state_of_extension should be None\n");
(* End-to-end dispatch through the VM. Bytecode runs OP_PUSH_42 then
OP_RETURN (50); execute_module pops the result. *)
let make_bc_seq bytes = ({
vc_arity = 0; vc_rest_arity = -1; vc_locals = 0;
vc_bytecode = bytes; vc_constants = [||];
vc_bytecode_list = None; vc_constants_list = None;
} : Sx_types.vm_code) in
(let globals = Hashtbl.create 1 in
try
match Sx_vm.execute_module (make_bc_seq [| 210; 50 |]) globals with
| Integer 42 ->
incr pass_count;
Printf.printf " PASS: dispatch routes opcode 210 -> push 42\n"
| other ->
incr fail_count;
Printf.printf " FAIL: dispatch opcode 210: got %s\n"
(Sx_types.inspect other)
with exn ->
incr fail_count;
Printf.printf " FAIL: dispatch opcode 210 raised: %s\n"
(Printexc.to_string exn));
(* Compose two extension opcodes: PUSH_42 then DOUBLE_TOS then RETURN.
Verifies that successive extension dispatches share VM state. *)
(let globals = Hashtbl.create 1 in
try
match Sx_vm.execute_module (make_bc_seq [| 210; 211; 50 |]) globals with
| Integer 84 ->
incr pass_count;
Printf.printf " PASS: extension opcodes compose (42 -> 84)\n"
| other ->
incr fail_count;
Printf.printf " FAIL: composed opcodes: got %s\n"
(Sx_types.inspect other)
with exn ->
incr fail_count;
Printf.printf " FAIL: composed opcodes raised: %s\n"
(Printexc.to_string exn));
(* Duplicate opcode-id detection. *)
let module DupExt : Sx_vm_extension.EXTENSION = struct
let name = "dup_check"
let init () = TestRegState (ref 0)
let opcodes _st = [
(210, "dup_check.OP_X", (fun _vm _frame -> ()));
]
end in
(try
Sx_vm_extensions.register (module DupExt);
incr fail_count;
Printf.printf " FAIL: duplicate opcode id should have raised\n"
with Failure _ ->
incr pass_count;
Printf.printf " PASS: duplicate opcode id rejected\n");
(* Out-of-range opcode-id detection. *)
let module OutExt : Sx_vm_extension.EXTENSION = struct
let name = "out_of_range"
let init () = TestRegState (ref 0)
let opcodes _st = [
(300, "out_of_range.OP_X", (fun _vm _frame -> ()));
]
end in
(try
Sx_vm_extensions.register (module OutExt);
incr fail_count;
Printf.printf " FAIL: out-of-range opcode should have raised\n"
with Failure _ ->
incr pass_count;
Printf.printf " PASS: out-of-range opcode rejected\n");
(* Duplicate extension-name detection. *)
let module SameNameExt : Sx_vm_extension.EXTENSION = struct
let name = "test_reg" (* same as TestExt above *)
let init () = TestRegState (ref 0)
let opcodes _st = []
end in
(try
Sx_vm_extensions.register (module SameNameExt);
incr fail_count;
Printf.printf " FAIL: duplicate extension name should have raised\n"
with Failure _ ->
incr pass_count;
Printf.printf " PASS: duplicate extension name rejected\n");
Printf.printf "\nSuite: extension-opcode-id primitive\n";
let prim = Hashtbl.find Sx_primitives.primitives "extension-opcode-id" in
(* Known opcode (registered by TestExt above). *)
(match prim [String "test_reg.OP_PUSH_42"] with
| Integer 210 ->
incr pass_count;
Printf.printf " PASS: primitive returns Integer for registered opcode\n"
| other ->
incr fail_count;
Printf.printf " FAIL: registered opcode lookup: got %s\n"
(Sx_types.inspect other));
(* Unknown opcode → Nil. *)
(match prim [String "nonexistent.OP_X"] with
| Nil ->
incr pass_count;
Printf.printf " PASS: primitive returns nil for unknown opcode\n"
| other ->
incr fail_count;
Printf.printf " FAIL: unknown opcode lookup: got %s\n"
(Sx_types.inspect other));
(* Symbol arg also accepted (compilers may pass quoted symbols). *)
(match prim [Symbol "test_reg.OP_DOUBLE_TOS"] with
| Integer 211 ->
incr pass_count;
Printf.printf " PASS: primitive accepts Symbol args\n"
| other ->
incr fail_count;
Printf.printf " FAIL: symbol arg: got %s\n" (Sx_types.inspect other));
(* Wrong arity / type raises Eval_error. *)
(try
let _ = prim [] in
incr fail_count;
Printf.printf " FAIL: zero args should have raised\n"
with Sx_types.Eval_error _ ->
incr pass_count;
Printf.printf " PASS: zero args rejected\n");
(try
let _ = prim [Integer 42] in
incr fail_count;
Printf.printf " FAIL: integer arg should have raised\n"
with Sx_types.Eval_error _ ->
incr pass_count;
Printf.printf " PASS: integer arg rejected\n");
Printf.printf "\nSuite: extensions/test_ext (canonical extension)\n";
(* Phase D: the real test extension lives at lib/extensions/test_ext.ml.
Register it on top of the inline test_reg from earlier suites — the
two use disjoint opcode IDs (210/211 vs 220/221) so they coexist. *)
Test_ext.register ();
(* Lookup via the public primitive should now find OP_TEST_PUSH_42. *)
(match prim [String "test_ext.OP_TEST_PUSH_42"] with
| Integer 220 ->
incr pass_count;
Printf.printf " PASS: extension-opcode-id finds test_ext.OP_TEST_PUSH_42\n"
| other ->
incr fail_count;
Printf.printf " FAIL: opcode lookup: got %s\n" (Sx_types.inspect other));
(* End-to-end: PUSH_42 + DOUBLE_TOS + RETURN. *)
(let globals = Hashtbl.create 1 in
try
match Sx_vm.execute_module (make_bc_seq [| 220; 221; 50 |]) globals with
| Integer 84 ->
incr pass_count;
Printf.printf " PASS: extensions/test_ext bytecode executes (84)\n"
| other ->
incr fail_count;
Printf.printf " FAIL: test_ext bytecode result: got %s\n"
(Sx_types.inspect other)
with exn ->
incr fail_count;
Printf.printf " FAIL: test_ext bytecode raised: %s\n"
(Printexc.to_string exn));
(* Disassembly: opcode_name should resolve 220/221 via the registry,
not fall back to UNKNOWN_220 / UNKNOWN_221. disassemble returns a
Dict; the instruction list lives at key "bytecode". *)
(let code = make_bc_seq [| 220; 221; 50 |] in
let dis = Sx_vm.disassemble code in
let entries = match dis with
| Dict d -> (match Hashtbl.find_opt d "bytecode" with
| Some (List es) -> es
| _ -> [])
| _ -> []
in
let names = List.filter_map (fun entry -> match entry with
| Dict d ->
(match Hashtbl.find_opt d "opcode" with
| Some (String name) -> Some name
| _ -> None)
| _ -> None) entries
in
let has name = List.mem name names in
if has "test_ext.OP_TEST_PUSH_42" && has "test_ext.OP_TEST_DOUBLE_TOS" then begin
incr pass_count;
Printf.printf " PASS: disassemble shows extension opcode names\n"
end else begin
incr fail_count;
Printf.printf " FAIL: disassemble names: [%s]\n" (String.concat ", " names)
end);
(* Sanity: opcode_name on an unregistered extension opcode still
returns UNKNOWN_n. Pick 230 — out of test_ext's range. *)
(match Sx_vm.opcode_name 230 with
| "UNKNOWN_230" ->
incr pass_count;
Printf.printf " PASS: unregistered ext opcode falls back to UNKNOWN_n\n"
| other ->
incr fail_count;
Printf.printf " FAIL: opcode_name 230: got %s\n" other);
(* Per-extension state: invocation_count should reflect the two opcodes
that ran in the dispatch test above. *)
(match Test_ext.invocation_count () with
| Some n when n >= 2 ->
incr pass_count;
Printf.printf " PASS: extension state recorded %d invocations\n" n
| other ->
incr fail_count;
Printf.printf " FAIL: invocation_count: %s\n"
(match other with Some n -> string_of_int n | None -> "None"));
Printf.printf "\nSuite: jit extension-opcode awareness\n";
let scan = Sx_vm.bytecode_uses_extension_opcodes in
let no_consts = [||] in
(* Pure core ops: scan reports false. *)
(* OP_TRUE OP_RETURN *)
if not (scan [| 3; 50 |] no_consts) then begin
incr pass_count;
Printf.printf " PASS: pure core bytecode is JIT-eligible\n"
end else begin
incr fail_count;
Printf.printf " FAIL: pure core bytecode flagged as extension\n"
end;
(* Extension opcode anywhere → true. *)
if scan [| 220; 50 |] no_consts then begin
incr pass_count;
Printf.printf " PASS: extension opcode detected at head\n"
end else begin
incr fail_count;
Printf.printf " FAIL: extension opcode at head missed\n"
end;
(* Mixed: core + extension → true. *)
if scan [| 3; 220; 50 |] no_consts then begin
incr pass_count;
Printf.printf " PASS: extension opcode detected after core ops\n"
end else begin
incr fail_count;
Printf.printf " FAIL: extension opcode after core ops missed\n"
end;
(* Operand bytes ≥200 must NOT trigger. CONST u16 with index 220
into a synthetic constant pool — the operand is 220 (lo) 0 (hi),
not an opcode. The pool entry at 220 is irrelevant for the scan. *)
let big_consts = Array.make 256 Nil in
if not (scan [| 1; 220; 0; 50 |] big_consts) then begin
incr pass_count;
Printf.printf " PASS: CONST operand ≥200 not a false positive\n"
end else begin
incr fail_count;
Printf.printf " FAIL: CONST operand ≥200 false-positives as ext op\n"
end;
(* CALL_PRIM has 3 operand bytes (u16 + u8); all ≥200 should not
trigger. *)
if not (scan [| 52; 220; 200; 200; 50 |] big_consts) then begin
incr pass_count;
Printf.printf " PASS: CALL_PRIM operands ≥200 not a false positive\n"
end else begin
incr fail_count;
Printf.printf " FAIL: CALL_PRIM operands ≥200 false-positive\n"
end;
(* CLOSURE with upvalue descriptors: scan must skip the 2 + 2*n
dynamic operand bytes. Build a synthetic constant pool with a
Dict at index 0 declaring upvalue-count 1, descriptors that are
≥200 — the scan should skip them and not trigger.
Bytecode layout: CLOSURE 0 0 desc_is_local desc_index RETURN
op lo hi 210 220 50
With upvalue-count = 1, scan must advance past the 2-byte CLOSURE
operand AND the 2 descriptor bytes (210, 220), landing on RETURN. *)
let cl_consts = Array.make 1 Nil in
let dict = Hashtbl.create 1 in
Hashtbl.replace dict "upvalue-count" (Integer 1);
cl_consts.(0) <- Dict dict;
if not (scan [| 51; 0; 0; 210; 220; 50 |] cl_consts) then begin
incr pass_count;
Printf.printf " PASS: CLOSURE upvalue descriptors ≥200 skipped\n"
end else begin
incr fail_count;
Printf.printf " FAIL: CLOSURE upvalue descriptors false-positive\n"
end;
(* Sanity: opcode after CLOSURE+descriptors that IS an extension
opcode triggers correctly. *)
if scan [| 51; 0; 0; 210; 220; 221; 50 |] cl_consts then begin
incr pass_count;
Printf.printf " PASS: extension opcode after CLOSURE detected\n"
end else begin
incr fail_count;
Printf.printf " FAIL: extension opcode after CLOSURE missed\n"
end
(* ====================================================================== *)

View File

@@ -2,3 +2,7 @@
(name sx)
(wrapped false)
(libraries re re.pcre unix))
; Pull in extension modules from lib/extensions/ (test_ext.ml, etc).
; See plans/sx-vm-opcode-extension.md.
(include_subdirs unqualified)

View File

@@ -0,0 +1,71 @@
# SX VM extensions
Each `*.ml` file here is a VM extension — a first-class OCaml module that
registers specialized bytecode opcodes with `Sx_vm_extensions`. See
[`plans/sx-vm-opcode-extension.md`](../../../../plans/sx-vm-opcode-extension.md)
for the design.
## Pattern
```ocaml
(* lib/extensions/myport.ml *)
open Sx_types
type Sx_vm_extension.extension_state += MyportState of { ... }
module M : Sx_vm_extension.EXTENSION = struct
let name = "myport"
let init () = MyportState { ... }
let opcodes _st = [
(id, "myport.OP_NAME", handler);
...
]
end
let register () = Sx_vm_extensions.register (module M)
```
Then call `Myport.register ()` once at startup from any binary that
should have the extension loaded.
## Opcode-ID allocation
Range 200-247 (per `Sx_vm_extensions.extension_min` /
`extension_max`). Conventions:
| Range | Use |
|---------|-------------------------------------------------------------------------|
| 200-209 | reserved for `lib/guest/vm/` shared opcodes (chiselled out on 2nd use) |
| 210-219 | inline test extensions defined in `bin/run_tests.ml` |
| 220-229 | this directory's `test_ext` (the canonical template) |
| 230-247 | first-come-first-served by language ports (Erlang first) |
When a port claims a contiguous block, document it in the table above.
The registry rejects collisions at startup with a loud error — there is
no silent shadowing.
## Naming
Always prefix opcode names with the extension name plus a dot:
`myport.OP_<NAME>`. The prefix is a hard convention so that multiple
extensions can share the global opcode-name namespace cleanly.
## State
`extension_state` is an extensible variant. Add your case (e.g.
`MyportState of { ... }`) at the top of your file, return it from
`init`, and pattern-match it inside your handlers. Other extensions
cannot see your state — the variant case is private to your module.
## Testing
`test_ext.ml` is the canonical worked example. `bin/run_tests.ml`
calls `Test_ext.register ()`, then drives bytecode that exercises the
opcodes end-to-end (push, double, dispatch, disassemble, invocation
counter). Mirror this shape when adding a real port's extension.
## Build wiring
`lib/dune` has `(include_subdirs unqualified)`, so any `.ml` you drop
in here is automatically part of the `sx` library. Module name follows
the filename verbatim (`test_ext.ml``Test_ext`).

View File

@@ -0,0 +1,67 @@
(** {1 [test_ext] — canonical example VM extension}
A minimal extension demonstrating the registration pattern from
[plans/sx-vm-opcode-extension.md]. The opcode IDs (220, 221) sit at
the top of the extension range, well clear of anything a real
language port would claim.
Two operand-less opcodes:
- [test_ext.OP_TEST_PUSH_42] (220) — pushes the integer 42.
- [test_ext.OP_TEST_DOUBLE_TOS] (221) — pops the integer on TOS,
pushes 2× it.
These are the smallest stack manipulations that prove the extension
mechanism wires through end-to-end (registry → dispatch → human-
readable disassembly). Real ports (Erlang Phase 9, future Haskell
perf phases) replace this template with their own opcode set.
Loading: [Test_ext.register ()] adds the extension to
[Sx_vm_extensions]. Run-time binaries that want the test opcodes
available call this once at startup. Unit tests in
[bin/run_tests.ml] do exactly that. *)
open Sx_types
(** Per-instance state for [test_ext]. Counts how many times the
handlers ran — purely so the extension has *some* state, exercising
the [extension_state] machinery. *)
type Sx_vm_extension.extension_state += TestExtState of {
mutable invocations : int;
}
module M : Sx_vm_extension.EXTENSION = struct
let name = "test_ext"
let init () = TestExtState { invocations = 0 }
let opcodes st =
let bump () = match st with
| TestExtState s -> s.invocations <- s.invocations + 1
| _ -> ()
in
[
(220, "test_ext.OP_TEST_PUSH_42",
(fun vm _frame -> bump (); Sx_vm.push vm (Integer 42)));
(221, "test_ext.OP_TEST_DOUBLE_TOS",
(fun vm _frame ->
bump ();
let v = Sx_vm.pop vm in
match v with
| Integer n -> Sx_vm.push vm (Integer (n * 2))
| _ -> raise (Eval_error
"test_ext.OP_TEST_DOUBLE_TOS: TOS is not an integer")));
]
end
(** Register [test_ext] in [Sx_vm_extensions]. Idempotent only by
failing loudly — calling twice raises [Failure]. Binaries call this
once at startup; tests may [_reset_for_tests] then re-register. *)
let register () = Sx_vm_extensions.register (module M : Sx_vm_extension.EXTENSION)
(** Read the invocation counter from the live registry state. Returns
[None] if [register] hasn't been called yet. *)
let invocation_count () =
match Sx_vm_extensions.state_of_extension "test_ext" with
| Some (TestExtState s) -> Some s.invocations
| _ -> None

View File

@@ -44,6 +44,11 @@ type vm = {
ip past OP_PERFORM, stack ready for a result push). *)
exception VmSuspended of value * vm
(** Raised by the extension dispatch fallthrough when an opcode in the
extension range (≥ 200) is encountered with no handler registered.
Carries the offending opcode id. See plans/sx-vm-opcode-extension.md. *)
exception Invalid_opcode of int
(* Register the VM suspension converter so sx_runtime.sx_apply_cek can
catch VmSuspended and convert it to CekPerformRequest without a
direct dependency on this module. *)
@@ -57,6 +62,21 @@ let () = Sx_types._convert_vm_suspension := (fun exn ->
let jit_compile_ref : (lambda -> (string, value) Hashtbl.t -> vm_closure option) ref =
ref (fun _ _ -> None)
(** Forward reference for extension opcode dispatch — Phase B installs the
real registry's dispatch function here at module init. Until then, any
opcode in the extension range raises [Invalid_opcode]. Same forward-ref
pattern as [jit_compile_ref] above; keeps [Sx_vm_extensions] free to
depend on [Sx_vm]'s [vm] / [frame] types without a cycle. *)
let extension_dispatch_ref : (int -> vm -> frame -> unit) ref =
ref (fun op _vm _frame -> raise (Invalid_opcode op))
(** Forward reference for extension opcode → name lookup, used by
[opcode_name] / [disassemble] for human-readable disassembly. The
registry installs a real lookup at module init; default returns
[None] (then [opcode_name] falls back to "UNKNOWN_n"). *)
let extension_opcode_name_ref : (int -> string option) ref =
ref (fun _ -> None)
(* JIT threshold and counters live in Sx_types so primitives can read them
without creating a sx_primitives → sx_vm dependency cycle. *)
@@ -875,6 +895,15 @@ and run vm =
let request = pop vm in
raise (VmSuspended (request, vm))
(* ---- Extension dispatch fallthrough ----
Opcode partition (see plans/sx-vm-opcode-extension.md):
0 reserved / NOP
1-199 core opcodes (current ceiling 175 = OP_DEC)
200-247 extension opcodes (registered via Sx_vm_extensions)
248-255 reserved for future expansion / multi-byte
Any opcode ≥ 200 routes through the extension registry. *)
| op when op >= 200 -> !extension_dispatch_ref op vm frame
| opcode ->
raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d"
opcode (frame.ip - 1)))
@@ -1027,6 +1056,62 @@ let _jit_is_broken_name n =
|| n = "hs-repeat-while" || n = "hs-repeat-until"
|| n = "hs-for-each" || n = "hs-put!"
(** Scan bytecode for any extension opcode (≥ 200, the registry's
[Sx_vm_extensions.extension_min]). Walks operand bytes correctly
so values that happen to be ≥200 (e.g. a CONST u16 index pointing
into a large pool) do not trigger false positives. CLOSURE's
dynamic upvalue descriptors are read from the constant pool entry
at the same index it pushes.
Used by [jit_compile_lambda] (Phase E of the opcode-extension
plan): a lambda whose compiled body contains any extension opcode
is routed through interpretation rather than JIT. Extensions
interpret their opcodes via the registry; the JIT does not
currently know how to compile them.
Operand-size logic mirrors [opcode_operand_size] (which is defined
later, in the disassembly section); inlined here so this helper can
sit before [jit_compile_lambda] in the file. *)
let bytecode_uses_extension_opcodes (bc : int array) (consts : value array) =
let core_operand_size = function
| 1 | 20 | 21 | 64 | 65 | 128 -> 2 (* u16 *)
| 16 | 17 | 18 | 19 | 48 | 49 | 144 -> 1 (* u8 *)
| 32 | 33 | 34 | 35 -> 2 (* i16 *)
| 52 -> 3 (* CALL_PRIM: u16 + u8 *)
| _ -> 0
in
let len = Array.length bc in
let ip = ref 0 in
let found = ref false in
while not !found && !ip < len do
let op = bc.(!ip) in
if op >= 200 then found := true
else begin
ip := !ip + 1;
let extra = match op with
| 51 (* CLOSURE *) when !ip + 1 < len ->
let lo = bc.(!ip) in
let hi = bc.(!ip + 1) in
let idx = lo lor (hi lsl 8) in
let uv_count =
if idx < Array.length consts then
(match consts.(idx) with
| Dict d ->
(match Hashtbl.find_opt d "upvalue-count" with
| Some (Integer n) -> n
| Some (Number n) -> int_of_float n
| _ -> 0)
| _ -> 0)
else 0
in
2 + uv_count * 2
| _ -> core_operand_size op
in
ip := !ip + extra
end
done;
!found
let jit_compile_lambda (l : lambda) globals =
let fn_name = match l.l_name with Some n -> n | None -> "<anon>" in
if !_jit_compiling then (
@@ -1089,8 +1174,18 @@ let jit_compile_lambda (l : lambda) globals =
if idx < Array.length outer_code.vc_constants then
let inner_val = outer_code.vc_constants.(idx) in
let code = code_from_value inner_val in
Some { vm_code = code; vm_upvalues = [||];
vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure }
(* Phase E: if the inner lambda's bytecode contains any
extension opcode (≥200), skip JIT and let the lambda run
interpreted via CEK. Extension opcodes dispatch correctly
through the VM's registry fallthrough, but the JIT has no
knowledge of them and shouldn't claim ownership. *)
if bytecode_uses_extension_opcodes code.vc_bytecode code.vc_constants then begin
Printf.eprintf "[jit] SKIP %s: bytecode uses extension opcodes (interpret-only in v1)\n%!"
fn_name;
None
end else
Some { vm_code = code; vm_upvalues = [||];
vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure }
else begin
Printf.eprintf "[jit] FAIL %s: closure index %d out of bounds (pool=%d)\n%!"
fn_name idx (Array.length outer_code.vc_constants);
@@ -1200,7 +1295,12 @@ let opcode_name = function
| 164 -> "EQ" | 165 -> "LT" | 166 -> "GT" | 167 -> "NOT"
| 168 -> "LEN" | 169 -> "FIRST" | 170 -> "REST" | 171 -> "NTH"
| 172 -> "CONS" | 173 -> "NEG" | 174 -> "INC" | 175 -> "DEC"
| n -> Printf.sprintf "UNKNOWN_%d" n
| n ->
(* Extension opcodes (≥200) get their human-readable name from the
registry; defaults to UNKNOWN_n if the extension isn't loaded. *)
(match !extension_opcode_name_ref n with
| Some name -> name
| None -> Printf.sprintf "UNKNOWN_%d" n)
(** Number of extra operand bytes consumed by each opcode.
Returns (format, total_bytes) where format describes the operand types. *)

View File

@@ -0,0 +1,48 @@
(** {1 VM extension interface}
Type definitions for VM bytecode extensions. See
[plans/sx-vm-opcode-extension.md].
An extension is a first-class module of type [EXTENSION]: it has a
stable [name], an [init] that returns its private state, and an
[opcodes] function that lists the opcodes it provides.
Opcode handlers receive the live [vm] and the active [frame]. They
read operands via [Sx_vm.read_u8] / [read_u16], manipulate the stack
via [push] / [pop] / [peek], and update the frame's [ip] as needed. *)
(** A handler for an extension opcode. Reads operands from bytecode,
manipulates the VM stack, updates the frame's instruction pointer.
May raise exceptions (which propagate via the existing VM error path). *)
type handler = Sx_vm.vm -> Sx_vm.frame -> unit
(** State an extension carries alongside the VM. Opaque to the VM core;
extensions extend this with their own constructor and cast as needed.
Extensible variant — extensions add cases:
{[
type Sx_vm_extension.extension_state +=
| ErlangState of erlang_scheduler
]} *)
type extension_state = ..
(** An extension is a first-class module of this signature. *)
module type EXTENSION = sig
(** Stable name for this extension (e.g. ["erlang"], ["guest_vm"]).
Used as the lookup key in the registry and as the prefix for opcode
names ([erlang.OP_PATTERN_TUPLE_2] etc). *)
val name : string
(** Initialize per-instance state. Called once when [register] is
invoked on this extension. *)
val init : unit -> extension_state
(** Opcodes this extension provides. Each is
[(opcode_id, opcode_name, handler)].
[opcode_id] must be in the range 200-247 (the extension partition;
see the partition comment at the top of [Sx_vm]'s dispatch loop).
Conflicts with already-registered opcodes cause [register] to
fail. *)
val opcodes : extension_state -> (int * string * handler) list
end

View File

@@ -0,0 +1,120 @@
(** {1 VM extension registry}
Holds the live registry of extension opcodes and installs the
[dispatch] function into [Sx_vm.extension_dispatch_ref] at module
init time, replacing Phase A's stub.
See [plans/sx-vm-opcode-extension.md] and [Sx_vm_extension] for the
extension interface. *)
open Sx_vm_extension
(** The opcode range an extension is allowed to claim.
Mirrors the partition comment in [Sx_vm]. *)
let extension_min = 200
let extension_max = 247
(** opcode_id → handler *)
let by_id : (int, handler) Hashtbl.t = Hashtbl.create 64
(** opcode_name → opcode_id *)
let by_name : (string, int) Hashtbl.t = Hashtbl.create 64
(** opcode_id → opcode_name (reverse of [by_name]; used by
[Sx_vm.opcode_name] for disassembly). *)
let name_of_id_table : (int, string) Hashtbl.t = Hashtbl.create 64
(** extension_name → state *)
let states : (string, extension_state) Hashtbl.t = Hashtbl.create 8
(** Registered extension names, newest first. *)
let extensions : string list ref = ref []
(** Dispatch an extension opcode to its registered handler. Raises
[Sx_vm.Invalid_opcode] if no handler is registered for [op]. *)
let dispatch op vm frame =
match Hashtbl.find_opt by_id op with
| Some handler -> handler vm frame
| None -> raise (Sx_vm.Invalid_opcode op)
(** Register an extension. Fails if the extension name is already
registered, or if any opcode_id is outside the extension range or
collides with an already-registered opcode. *)
let register (m : (module EXTENSION)) =
let module M = (val m) in
if Hashtbl.mem states M.name then
failwith (Printf.sprintf
"Sx_vm_extensions: extension %S already registered" M.name);
let st = M.init () in
let ops = M.opcodes st in
List.iter (fun (id, opname, _h) ->
if id < extension_min || id > extension_max then
failwith (Printf.sprintf
"Sx_vm_extensions: opcode %d (%s) outside extension range %d-%d"
id opname extension_min extension_max);
if Hashtbl.mem by_id id then
failwith (Printf.sprintf
"Sx_vm_extensions: opcode %d (%s) already registered" id opname);
if Hashtbl.mem by_name opname then
failwith (Printf.sprintf
"Sx_vm_extensions: opcode name %S already registered" opname)
) ops;
Hashtbl.add states M.name st;
List.iter (fun (id, opname, h) ->
Hashtbl.add by_id id h;
Hashtbl.add by_name opname id;
Hashtbl.add name_of_id_table id opname
) ops;
extensions := M.name :: !extensions
(** Look up the opcode_id for an opcode_name. Returns [None] if no
extension provides that opcode. *)
let id_of_name name = Hashtbl.find_opt by_name name
(** Look up the opcode_name for an opcode_id. Returns [None] if no
extension provides that opcode. Used by disassembly. *)
let name_of_id id = Hashtbl.find_opt name_of_id_table id
(** Look up the state of an extension by name. Returns [None] if the
extension is not registered. *)
let state_of_extension name = Hashtbl.find_opt states name
(** Names of all registered extensions, newest first. *)
let registered_extensions () = !extensions
(** Test-only: clear the registry. Used by unit tests to isolate
extensions between test cases. The dispatch_ref is left in place. *)
let _reset_for_tests () =
Hashtbl.clear by_id;
Hashtbl.clear by_name;
Hashtbl.clear name_of_id_table;
Hashtbl.clear states;
extensions := []
(** Install our [dispatch] into [Sx_vm.extension_dispatch_ref] and our
[name_of_id] into [Sx_vm.extension_opcode_name_ref], replacing
the Phase A stubs. Idempotent. Called automatically at module init. *)
let install_dispatch () =
Sx_vm.extension_dispatch_ref := dispatch;
Sx_vm.extension_opcode_name_ref := name_of_id
let () = install_dispatch ()
(** Compiler-side opcode lookup: register the [extension-opcode-id]
primitive. Compilers ([lib/compiler.sx]) call this to emit
extension opcodes by name. Returns [Integer id] when registered,
[Nil] otherwise — so missing extensions degrade to a fallback
rather than failure. *)
let () =
Sx_primitives.register "extension-opcode-id" (fun args ->
match args with
| [Sx_types.String name] ->
(match id_of_name name with
| Some id -> Sx_types.Integer id
| None -> Sx_types.Nil)
| [Sx_types.Symbol name] ->
(match id_of_name name with
| Some id -> Sx_types.Integer id
| None -> Sx_types.Nil)
| _ -> raise (Sx_types.Eval_error
"extension-opcode-id: expected one string or symbol"))

View File

@@ -0,0 +1,86 @@
# sx-vm-extensions loop agent
Role: drives `plans/sx-vm-opcode-extension.md` to completion. One phase per
fire (A → B → C → D → E). Bounded loop — after Phase E acceptance, the loop
is done.
```
description: sx-vm-extensions queue loop
subagent_type: general-purpose
run_in_background: true
isolation: worktree (already on loops/sx-vm-extensions)
```
## What this loop is for
Mechanism in `hosts/ocaml/lib/` that lets language ports register specialized
bytecode opcodes without modifying the SX VM core. Direct prerequisite for
**erlang-on-sx Phase 9** (the BEAM analog) and a structural enabler for any
future language port that wants performance-critical opcodes.
## The queue
Per `plans/sx-vm-opcode-extension.md`, in order:
- **Phase A** — Opcode ID partition + dispatch fallthrough in `sx_vm.ml`.
Add `Invalid_opcode of int` exception, `extension_dispatch_ref`, the
`| op when op >= 200 -> !extension_dispatch_ref op vm frame` arm, and a
partition comment near the opcode list.
- **Phase B** — Extension registry module (`sx_vm_extensions.ml`).
`register`, `dispatch`, `id_of_name`, `state_of_extension`. Wire dispatch
into Phase A's ref at module init.
- **Phase C** — Compiler-side opcode lookup primitive (`extension-opcode-id`).
- **Phase D** — Test extension at `hosts/ocaml/lib/extensions/test_ext.ml`,
end-to-end SX → bytecode → VM dispatch flow.
- **Phase E** — JIT awareness: extension opcodes mark a lambda as
interpret-only.
## Per-fire workflow (hard)
1. Read `plans/sx-vm-opcode-extension.md` — find the first un-ticked phase.
2. Implement the phase (only files in `hosts/ocaml/**` and the plan file).
3. Build via `sx_build target=ocaml`.
4. Run regression: every existing language-port conformance suite plus
the OCaml unit tests. The list lives at `lib/<lang>/conformance.sh`
13 suites at last count (apl, common-lisp, datalog, erlang, forth, guest,
haskell, js, lua, ocaml, prolog, smalltalk, tcl).
5. If green, commit (short factual message — `vm-ext: phase A — dispatch
fallthrough` style).
6. Tick the `[ ]` for the completed phase in the plan, append one dated
line to the Progress log (newest first).
7. Stop. Wait for the next fire.
## Ground rules (hard)
- **Scope:** only `hosts/ocaml/**` and `plans/sx-vm-opcode-extension.md`.
Do **not** edit `lib/<lang>/**`, `spec/**`, `shared/**`, or any other
language port's tests.
- **One phase per fire.** Don't combine phases even if a phase looks small.
The point of the loop is incremental commits.
- **Commit locally only.** Do **not** push. Do **not** touch `main`.
- **Worktree:** you are on `loops/sx-vm-extensions` in
`/root/rose-ash-loops/sx-vm-extensions`.
- **OCaml SX VM gotchas:**
- `vm` and `frame` types are defined in `sx_vm.ml`, not `sx_types.ml`.
Forward refs (like the existing `jit_compile_ref` pattern) are how
sibling modules avoid circular dependency.
- Current core opcode ceiling is 175 (OP_DEC). The extension threshold
is 200, leaving 24 spare slots for future core opcodes.
- JIT compilation is lazy per-lambda. See `project_jit_compilation.md`
in memory for the cache + sentinel pattern.
- **SX edits:** `sx-tree` MCP tools only (none expected for this loop, but
if needed).
- **OCaml edits:** Edit/Write tools are fine — these aren't `.sx` files.
## Done condition
Phase E acceptance: all 13 (or however many exist at the time) language-port
conformance suites pass, OCaml unit tests pass, the test extension from
Phase D demonstrates end-to-end flow including JIT routing. Loop is
complete; mark and stop.
## After acceptance
Hand off to the Erlang loop: `hosts/ocaml/lib/extensions/erlang.ml` becomes
the first real consumer, written against this mechanism instead of the
Phase 9b stub dispatcher in `lib/erlang/vm/dispatcher.sx`.

View File

@@ -0,0 +1,555 @@
# SX VM Opcode Extension Mechanism
Mechanism in `hosts/ocaml/lib/` that lets language ports register specialized
bytecode opcodes without modifying the SX VM core. Direct prerequisite for
**erlang-on-sx Phase 9** (the BEAM analog) and a structural enabler for any
future language port that wants performance-critical opcodes.
Reference: `plans/erlang-on-sx.md` Phase 9, `plans/fed-sx-design.md` §17.5,
`hosts/ocaml/lib/sx_vm.ml` (current VM).
Status: **complete** on `loops/sx-vm-extensions` (Phases A-E landed
2026-05-14 / 2026-05-15). Ready for first real consumer
(`hosts/ocaml/lib/extensions/erlang.ml`, replacing the Phase 9b stub
dispatcher in `lib/erlang/vm/dispatcher.sx`).
---
## Goal
Allow language ports to register custom bytecode opcodes in the SX VM, with:
- **Zero overhead for core opcodes.** Existing opcodes (current ceiling 175,
see `sx_vm.ml`) must dispatch identically. No regression for any existing
language port or the core SX runtime.
- **One additional dispatch step for extension opcodes.** Acceptable cost; the
win comes from avoiding the general CEK machinery.
- **Per-extension state slot.** Erlang's process scheduler, Haskell's thunk
cache, etc. need somewhere to hang state alongside the VM.
- **Compiler awareness.** The bytecode compiler (`lib/compiler.sx`) must be
able to emit extension opcodes by name, looked up against the registered
set.
- **JIT compatibility.** Existing JIT (lazy lambda compilation) continues to
work for code paths using only core opcodes. Extension opcodes are
interpreted in v1; JITing them is a follow-up.
## Non-goals
- **Hot opcode reload.** Adding/replacing opcodes mid-runtime is not in
scope. Extensions are compile-time additions to the OCaml binary. (If
needed, that's a separate project.)
- **Per-instance opcode sets.** All running instances of the SX VM share
the same opcode set determined at build time. Selective opcode loading
per instance is out of scope.
- **Opcode hot-swap or supersession.** Once registered, opcodes are stable
for the lifetime of the binary.
- **Language-port isolation at the dispatch layer.** Two language ports can
see each other's opcodes (they share the dispatch table). Isolation is a
build-time concern — don't compile in extensions you don't trust.
---
## Why now
The Erlang-on-SX Phase 9 work needs this. Without it, Phase 9b-9g (the actual
opcode implementations) have nowhere to plug in. The Erlang loop hit this
dependency as a Blocker (`0abf05ed`); this design is what unblocks it.
It also enables the **shared opcode pattern** discussed in `plans/fed-sx-
design.md` §17.5: opcodes Erlang Phase 9 produces that other ports could
plausibly use (pattern match, perform/handle, record access) get chiselled
out to `lib/guest/vm/` when a second port has an actual second use. Without
the extension mechanism, each port would have to fork the SX VM core or
modify shared dispatch — neither acceptable.
---
## Architectural overview
```
┌──────────────────────────────────────────┐
│ SX VM core (hosts/ocaml/lib/sx_vm.ml) │
│ │
│ ┌────────────────────────────────────┐ │
│ │ Bytecode dispatch loop │ │
│ │ │ │
│ │ match op with │ │
│ │ | 1 (OP_CONST) -> ... │ │
│ │ | 2 (OP_NIL) -> ... │ │
│ │ | ... │ │
│ │ | 175 -> ... (last core opcode) │ │
│ │ | op when op >= 200 -> │ │
│ │ !extension_dispatch_ref op │ │ ◄── new
│ │ vm frame │ │
│ └────────────────────────────────────┘ │
│ │
│ ┌────────────────────────────────────┐ │
│ │ Extension registry │ │
│ │ opcode_id -> handler │ │ ◄── Phase B
│ │ opcode_name -> opcode_id │ │
│ │ extension_state per extension │ │
│ └────────────────────────────────────┘ │
└──────────────────────────────────────────┘
│ register at startup
┌──────────────────┴──────────────────────┐
│ Extension modules │
│ hosts/ocaml/lib/extensions/erlang.ml │
│ hosts/ocaml/lib/extensions/haskell.ml │
│ hosts/ocaml/lib/extensions/datalog.ml │
│ hosts/ocaml/lib/extensions/guest_vm.ml │ ◄── shared opcodes
└─────────────────────────────────────────┘
```
### Opcode ID space partition
Current SX VM uses opcode IDs from 1 to 175 (per inspection of `sx_vm.ml`,
ceiling at OP_DEC = 175). We partition the 0-255 space:
| Range | Use |
|---------|------------------------------------------------------------------|
| 0 | reserved / NOP |
| 1-199 | **core opcodes** — owned by the SX VM, locked schema |
| 200-247 | **extension opcodes** — registered by extensions (ports + shared) |
| 248-255 | reserved for future expansion / multi-byte opcodes |
This gives the core 24 free slots above the current 175 ceiling for future
core additions, and 48 slots for extensions. Erlang Phase 9 expects to need
fewer than 30 specialized opcodes, so this is comfortable headroom.
The plan originally proposed a finer split (`128-199` for `lib/guest/vm/`
shared, `200-247` for ports). That distinction is preserved at the **naming
level** (`guest_vm.OP_X` vs `erlang.OP_Y`) and policed by the registry
(duplicate IDs fail at startup), without consuming separate ID ranges. The
chiselling discipline (move an opcode to `guest_vm` when a second port uses
it) operates at the source level.
If we need more than 256 opcodes total, multi-byte opcodes (a leading 248-255
byte plus a second byte) extend the space without breaking the schema.
### Extension module signature
```ocaml
(* hosts/ocaml/lib/sx_vm_extension.ml *)
(** A handler for an extension opcode. Reads operands from bytecode,
manipulates the VM stack, updates the frame's instruction pointer.
May raise exceptions (which propagate via the existing VM error path). *)
type handler = vm -> frame -> unit
(** State an extension carries alongside the VM. Opaque to the VM core;
extensions cast as needed. *)
type extension_state = ..
module type EXTENSION = sig
(** Stable name for this extension (e.g. "erlang", "guest_vm"). *)
val name : string
(** Initialize per-instance state. Called once when the VM starts and the
extension is loaded. *)
val init : unit -> extension_state
(** Opcodes this extension provides. Each is (opcode_id, opcode_name, handler).
opcode_id must be in 200-247. Conflicts cause startup failure. *)
val opcodes : extension_state -> (int * string * handler) list
end
```
### Registration and dispatch
```ocaml
(* hosts/ocaml/lib/sx_vm_extensions.ml *)
let extensions : (module EXTENSION) list ref = ref []
let states : (string, extension_state) Hashtbl.t = Hashtbl.create 8
let by_id : (int, handler) Hashtbl.t = Hashtbl.create 64
let by_name : (string, int) Hashtbl.t = Hashtbl.create 64
let register (m : (module EXTENSION)) =
let module M = (val m) in
let st = M.init () in
Hashtbl.add states M.name st;
List.iter (fun (id, name, h) ->
if Hashtbl.mem by_id id then
failwith (Printf.sprintf "Opcode %d (%s) already registered" id name);
Hashtbl.add by_id id h;
Hashtbl.add by_name name id
) (M.opcodes st);
extensions := m :: !extensions
let dispatch op vm frame =
match Hashtbl.find_opt by_id op with
| Some handler -> handler vm frame
| None -> raise (Invalid_opcode op)
let id_of_name name = Hashtbl.find_opt by_name name
let state_of_extension name = Hashtbl.find_opt states name
```
Phase B installs this dispatcher into `Sx_vm.extension_dispatch_ref` at
module init. Until then, the ref's default raises `Invalid_opcode op` for
any opcode ≥ 200, which is the Phase A test condition.
The dispatch path adds **one hashtable lookup per extension opcode**.
Acceptable cost — and Erlang's specialized opcodes win >100× over going
through the general CEK machine, so the overhead is negligible by comparison.
### Bytecode compiler integration
The compiler (`lib/compiler.sx`) needs to know extension opcode IDs to emit
them. New SX primitive exposed to the compiler:
```sx
(extension-opcode-id "erlang.OP_PATTERN_TUPLE_2") ; → 200, or nil if not loaded
```
When the compiler wants to emit a specialized opcode, it queries by name. If
the extension isn't loaded, the compiler falls back to the general path
(emit a `CALL_PRIM` or general SX `case`). This means a language port's
optimization is opt-in per build, and missing extensions degrade to slower
correct execution rather than failure.
Naming convention: `<extension-name>.OP_<NAME>`. So `erlang.OP_PATTERN_TUPLE_2`,
`guest_vm.OP_PERFORM`, etc.
### Per-extension state access
Some opcodes need state beyond the VM stack (Erlang's scheduler, mailbox
state, etc.). Extensions store state in their `init`-returned value, accessed
via `state_of_extension`:
```ocaml
let op_spawn vm frame =
let st = Sx_vm_extensions.state_of_extension "erlang"
|> Option.get
|> Obj.magic in (* extension casts to its known type *)
let body = pop vm in
let pid = Erlang_scheduler.spawn st body in
push vm (pid_value pid);
frame.ip <- frame.ip + 1
```
Shared scheduler state lives in the Erlang extension's state value. Other
extensions don't see it.
---
## Phase plan
Five sub-phases in dependency order. Each is testable in isolation.
### Phase A — Opcode ID partition + dispatch fallthrough
- [x] Define `exception Invalid_opcode of int` in `sx_vm.ml`.
- [x] Add `extension_dispatch_ref : (int -> vm -> frame -> unit) ref`
whose default handler raises `Invalid_opcode op`. Forward-declared in
the same style as the existing `jit_compile_ref`.
- [x] Add `| op when op >= 200 -> !extension_dispatch_ref op vm frame` arm
in the dispatch loop, immediately before the catch-all.
- [x] Document the partition in a comment near the top of the opcode list.
**Tests:**
- All existing OCaml VM/CEK tests pass unchanged (zero regression for core).
- Constructed bytecode using opcode 200 raises `Invalid_opcode 200` when no
extension is registered.
**Effort:** small. ~50 lines + tests.
### Phase B — Extension registry module
`hosts/ocaml/lib/sx_vm_extensions.ml` per the sketch above. Pure plumbing, no
opcodes yet. Phase B's module init installs the real `dispatch` into
`Sx_vm.extension_dispatch_ref`, replacing Phase A's stub.
- [x] `Sx_vm_extension` interface module (handler type, EXTENSION sig).
- [x] `Sx_vm_extensions` registry module (`register`, `dispatch`,
`id_of_name`, `state_of_extension`).
- [x] Wire the registry's `dispatch` into `Sx_vm.extension_dispatch_ref` at
module init.
**Tests:**
- Register a test extension with one opcode; dispatch finds it.
- Duplicate opcode-id registration fails at startup.
- `id_of_name` and `state_of_extension` lookups work.
**Effort:** small. ~150 lines + tests.
### Phase C — Compiler-side opcode lookup primitive
Expose `extension-opcode-id` as an SX primitive in `hosts/ocaml/lib/`. The
compiler in `lib/compiler.sx` can call it to emit extension opcodes by name.
Does not require any extension to actually exist — the primitive returns
`nil` for unknown names, and the compiler falls back.
- [x] Register `extension-opcode-id` in `sx_primitives.ml`.
- [x] Returns `Integer id` when registered, `Nil` otherwise.
**Tests:**
- Primitive returns nil for unknown name.
- After registering a test extension, primitive returns the registered ID.
**Effort:** small. Single primitive registration + compiler-side use docs.
### Phase D — Test extension demonstrating end-to-end flow
A dummy extension at `hosts/ocaml/lib/extensions/test_ext.ml` registering
one or two trivial opcodes (e.g. `OP_TEST_PUSH_42`, `OP_TEST_DOUBLE_TOS`).
Wired into the build, available when running tests.
Compiler test: write SX that triggers the test compiler-extension to emit
`OP_TEST_PUSH_42`, then verify the VM executes it correctly via
`bytecode-inspect` and `vm-trace`.
- [x] `test_ext.ml` registers two opcodes.
- [x] Wired into the build (extensions registered at startup).
- [x] Bytecode emission via name lookup produces the right ID.
- [x] `bytecode-inspect` shows the opcode by name.
**Tests:**
- Bytecode emission via name lookup produces the right ID.
- Execution produces the expected stack effect.
- `bytecode-inspect` shows the opcode by name.
- `vm-trace` correctly reports the extension opcode.
**Effort:** small. ~100 lines including build wiring.
### Phase E — JIT awareness (interpreted-only for v1)
The JIT (lazy lambda compilation) currently compiles based on opcode ranges.
Extension opcodes (≥200) should fall through to interpretation, not be
JIT-compiled in v1.
- [x] Mark extension opcodes as "interpret only" in the JIT pre-analysis.
- [x] Lambda containing only core opcodes JIT-compiles as before.
- [x] Lambda containing any extension opcode runs interpreted.
JITing extension opcodes is a follow-up project; v1 keeps the JIT scope
unchanged and just makes it correctly route mixed bytecode.
**Tests:**
- Lambda with only core opcodes: JIT-compiled, fast path.
- Lambda with extension opcode: interpreted, correct result.
- Mixed lambda: interpreted, correct result.
**Effort:** small-medium. Requires understanding the JIT's pre-analysis
(per `project_jit_compilation.md` memory: "Lazy JIT implemented: lambda
bodies compiled on first VM call, cached, failures sentinel-marked").
Extension-opcode detection becomes another reason to mark a lambda
"interpret-only."
---
## Acceptance criteria
1. **Phase A-D pass their test suites.**
2. **Zero regression on existing SX VM tests.** All language-port test
suites currently passing on the architecture branch (Erlang 530+, Haskell
285+, Datalog 276+, Smalltalk 625+, the SX core test suite, etc.) still
pass.
3. **Test extension demonstrates the flow end-to-end.** SX source compiles
via the compiler with a registered extension opcode, executes through the
VM via the dispatch fallthrough, returns correct result.
4. **Documentation:** README in `hosts/ocaml/lib/extensions/` explaining the
pattern, with a worked example (the test extension is the canonical one).
After acceptance, the Erlang-on-SX Phase 9 work in `lib/erlang/vm/` can use
this mechanism. The Erlang loop's Blocker for 9a is resolved.
---
## Risk and mitigation
**Risk: regression in core opcode dispatch.** A misplaced `match` arm could
break something. *Mitigation:* run every existing language-port conformance
suite before merging.
**Risk: opcode ID conflicts as more extensions land.** If Erlang Phase 9
claims IDs 200-220 and Haskell wants 215-235, we have a problem.
*Mitigation:* maintain a registry document at `hosts/ocaml/lib/extensions/
README.md` listing claimed ID ranges per extension. Convention: each
extension claims a contiguous block at first registration; collisions caught
at startup with a clear error.
**Risk: extension state types leak through `Obj.magic`.** The extension state
is type-erased in the registry. *Mitigation:* extensions cast in their own
opcode handlers, never expose state to other extensions or the VM core.
First-class modules / GADTs could add more type safety; deferred unless
this becomes a concrete pain point.
**Risk: extensions become a back door for kernel mutation.** An extension
opcode handler has full access to the VM. *Mitigation:* extensions are
build-time additions, not runtime; they're as trusted as the rest of the
binary. Operators audit at build time, not runtime. Same trust model as
any other compiled-in code.
**Risk: shared `lib/guest/vm/` opcodes evolve under different language
ports' needs.** *Mitigation:* the chiselling discipline (move to guest only
on second use) ensures the shared opcodes are tested against at least two
ports' actual usage before being considered stable.
---
## Open questions
To be resolved during implementation, not blocking design approval:
1. **Multi-byte opcode encoding.** If we need >256 opcodes total, the
leading-byte 248-255 schema accommodates it. Do we need multi-byte at
v1? Probably not — 48 extension opcodes is more than any single port
should reasonably want.
2. **Extension ordering matters?** If two extensions register opcodes that
read the same VM state, ordering of registration could matter for
initialization. Probably not in practice; flag if it bites.
3. **Hot-reload of extensions.** Out of scope for v1 (per non-goals). If
wanted later, the registry would need teardown + re-registration; the
`gen_server` `code_change/3` model from Erlang Phase 7 is a precedent.
4. **Cross-extension opcode composition.** Can `guest_vm.OP_PERFORM` invoke
`erlang.OP_RECEIVE_SCAN`? In principle yes — handlers can do anything.
The interface is clean; the question is whether we want any conventions
to keep ergonomics tractable. Defer until composition appears in
practice.
---
## Implementation roadmap and sequencing
This is a sister workstream to `loops/erlang`. Driven by Erlang Phase 9.
Single bounded loop on `loops/sx-vm-extensions`, ~1-2 weeks.
Recommended sequencing (one phase per loop fire):
1. **Phase A** — dispatch fallthrough. Smallest viable change to `sx_vm.ml`.
2. **Phase B** — extension registry module.
3. **Phase C** — compiler-side opcode lookup primitive.
4. **Phase D** — test extension demonstrating end-to-end flow.
5. **Phase E** — JIT awareness (interpret-only routing).
After acceptance:
- **`hosts/ocaml/lib/extensions/erlang.ml`** becomes the *first real
consumer* — written by whoever takes over from the Erlang loop's stub
dispatcher in `lib/erlang/vm/dispatcher.sx`. That's the integration
moment that closes the loop.
Estimated total effort: 1-2 weeks for one focused engineer with OCaml SX VM
familiarity.
---
## Relationship to other plans
- **`plans/erlang-on-sx.md` Phase 9:** unblocked by this work. Erlang loop
develops opcodes against a stub dispatcher in `lib/erlang/vm/`; once this
mechanism lands, swap stub for real registration via
`hosts/ocaml/lib/extensions/erlang.ml`.
- **`plans/fed-sx-design.md` §17.5:** documents this as Layer-1 prerequisite.
The shared-opcode discipline (lib/guest/vm/) is designed on top of this
mechanism's namespace allocation.
- **Future language ports (Haskell, Datalog, Smalltalk perf phases):** will
use the same mechanism. Each adds an extension module, claims an opcode
range, registers handlers. The `lib/guest/vm/` opcodes get
cross-referenced when the second port's needs justify chiselling.
- **JIT roadmap (per `project_jit_architecture.md` memory):** extension
opcodes are interpreted in v1. JITing them is a logical follow-up but
a separate project.
---
## Progress log
Newest first.
- **2026-05-15** — Phase E done. Loop complete (acceptance criteria
1-4 all met). New `Sx_vm.bytecode_uses_extension_opcodes` walks
bytecode operand-aware (CONST u16 indices, CALL_PRIM u16+u8,
CLOSURE u16+dynamic upvalue descriptors) so values that happen to
be ≥200 don't false-positive as extension opcodes. Wired into
`jit_compile_lambda`: when the inner closure's bytecode contains
any extension opcode, JIT returns None and the lambda runs
interpreted via CEK (the dispatch fallthrough still routes
extension opcodes through the registry — this just prevents the
JIT from claiming ownership of code it can't optimise). 7 new
foundation tests (`jit extension-opcode awareness` suite): pure
core eligible, head/middle/post-CLOSURE detection, CONST + CALL_PRIM
+ CLOSURE-descriptor false-positive avoidance. +7 pass vs Phase D
baseline (4833 vs 4826), 1111 pre-existing failures unchanged.
Conformance suites green: erlang 530/530, haskell 285/285, datalog
276/276, prolog 590/590, smalltalk 847/847, common-lisp 487/487,
apl 562/562, js 148/148, forth 632/638 (pre-existing), tcl 3/4
(pre-existing), ocaml-on-sx unit 607/607.
Loop done. Hand-off: the Erlang loop's Phase 9b stub dispatcher in
`lib/erlang/vm/dispatcher.sx` can now be replaced with a real
`hosts/ocaml/lib/extensions/erlang.ml` consumer.
- **2026-05-15** — Phase D done. New `hosts/ocaml/lib/extensions/` subtree
wired into the `sx` library via `(include_subdirs unqualified)`.
`extensions/test_ext.ml` is the canonical worked example: two
operand-less opcodes (`test_ext.OP_TEST_PUSH_42` = 220,
`test_ext.OP_TEST_DOUBLE_TOS` = 221) carrying `TestExtState` (an
invocation counter that exercises the per-extension state slot).
`extensions/README.md` documents the registration pattern, opcode-ID
range conventions, and naming rules.
`Sx_vm.opcode_name` now consults `extension_opcode_name_ref` (forward
ref) so disassembly shows extension opcodes by name instead of
`UNKNOWN_n`. Registry maintains `name_of_id_table` (reverse of
`by_name`) and installs the lookup at module init alongside the
dispatch ref. 5 new foundation tests (`extensions/test_ext` suite):
`extension-opcode-id` finds OP_TEST_PUSH_42, end-to-end bytecode runs
to 84, disassemble shows opcode names, unregistered ext opcodes still
fall back to UNKNOWN_n, per-extension state counter increments.
+5 pass vs Phase C baseline (4826 vs 4821), 1111 pre-existing failures
unchanged. Conformance suites green: erlang 530/530, haskell 285/285,
datalog 276/276, prolog 590/590, smalltalk 847/847, common-lisp
487/487, apl 562/562, js 148/148, forth 632/638 (pre-existing), tcl
3/4 (pre-existing), ocaml-on-sx unit 607/607.
- **2026-05-15** — Phase C done. `extension-opcode-id` SX primitive
registered from `sx_vm_extensions.ml` module init (avoids the
`sx_primitives ↔ sx_vm` cycle by registering downstream of both).
Accepts a string or symbol; returns `Integer id` for registered
opcode names, `Nil` for unknown — so a missing extension at compile
time degrades to a fallback rather than failure. 5 new foundation
tests (`extension-opcode-id primitive` suite): registered lookup,
unknown → nil, symbol arg, zero-arg rejection, integer-arg
rejection. +5 pass vs Phase B baseline (4821 vs 4816), 1111
pre-existing failures unchanged. Conformance suites green: erlang
530/530, haskell 285/285, datalog 276/276, prolog 590/590, smalltalk
847/847, common-lisp 487/487, apl 562/562, js 148/148, forth 632/638
(pre-existing), tcl 3/4 (pre-existing), ocaml-on-sx unit 607/607.
- **2026-05-14** — Phase B done. Added `hosts/ocaml/lib/sx_vm_extension.ml`
(interface: `handler` type, `extension_state` extensible variant,
`EXTENSION` module type) and `sx_vm_extensions.ml` (registry: `register`,
`dispatch`, `id_of_name`, `state_of_extension`, `_reset_for_tests`).
`let () = install_dispatch ()` at module init replaces Phase A's stub
with the real registry dispatch — Phase A behavior preserved (empty
registry still raises `Invalid_opcode` for unregistered ops). Registry
rejects opcode IDs outside 200-247, duplicate IDs, duplicate names, and
duplicate extension names. 9 new foundation tests (`vm-extension-registry`
suite): id_of_name resolve+miss, state_of_extension resolve+miss,
end-to-end VM dispatch (push 42), opcode composition (push 42 → double
→ 84), duplicate-id / out-of-range / duplicate-name rejection. +9 pass
vs Phase A baseline (4816 vs 4807), 1111 pre-existing failures unchanged.
Conformance suites green: erlang 530/530, haskell 285/285, datalog
276/276, prolog 590/590, smalltalk 847/847, common-lisp 487/487, apl
562/562, js 148/148, forth 632/638 (pre-existing), tcl 3/4 (pre-existing),
ocaml-on-sx unit 607/607.
- **2026-05-14** — Phase A done. Added `Invalid_opcode of int` exception,
`extension_dispatch_ref` (default raises `Invalid_opcode op`), and the
`| op when op >= 200 -> !extension_dispatch_ref op vm frame` arm before the
catch-all in `sx_vm.ml`. Partition comment documents 1-199 core / 200-247
extensions / 248-255 reserved (current core ceiling is OP_DEC = 175).
4 new foundation tests (3 × Invalid_opcode for opcodes 200/224/247, 1 ×
Eval_error for opcode 199 to pin the threshold). Foundation 64/64;
full OCaml test suite +4 pass vs baseline (4807 vs 4803), 1111 pre-existing
failures unchanged. Conformance suites green: erlang 530/530, haskell
285/285, datalog 276/276, prolog 590/590, smalltalk 847/847, common-lisp
305/305, apl 562/562, js 148/148, forth 632/638 (pre-existing), tcl 3/4
(pre-existing), ocaml-on-sx unit 607/607. (Lua 0/16 and ocaml-conformance
baseline programs not exercised — pre-existing scoreboard state and
multi-hour runtime respectively.)