Merge loops/sx-vm-extensions into architecture: hosts/ocaml VM opcode extension mechanism
5 phases (A-E) per plans/sx-vm-opcode-extension.md:
- A: Sx_vm dispatch fallthrough for opcodes ≥200 + Invalid_opcode + extension_dispatch_ref
- B: Sx_vm_extension interface + Sx_vm_extensions registry (register / dispatch /
id_of_name / state_of_extension), installs into the dispatch_ref at module init
- C: extension-opcode-id SX primitive for compiler-side lookup
- D: lib/extensions/ subtree wired via include_subdirs, test_ext.ml as the canonical
worked example, opcode_name forward-ref so disassemble shows ext opcodes by name
- E: bytecode_uses_extension_opcodes scanner + JIT skip path so lambdas containing
extension opcodes run interpreted via CEK
26 new foundation tests across 5 suites, all green. Zero regressions across 11
language-port conformance suites (erlang 530, haskell 285, datalog 276, prolog 590,
smalltalk 847, common-lisp 487, apl 562, js 148, forth 632, tcl 3, ocaml-on-sx unit 607).
Hand-off: lib/erlang/vm/dispatcher.sx (Phase 9b stub) can now be replaced with a real
hosts/ocaml/lib/extensions/erlang.ml consumer.
This commit is contained in:
@@ -67,6 +67,14 @@ let rec deep_equal a b =
|
|||||||
| NativeFn _, NativeFn _ -> a == b
|
| NativeFn _, NativeFn _ -> a == b
|
||||||
| _ -> false
|
| _ -> false
|
||||||
|
|
||||||
|
(* ====================================================================== *)
|
||||||
|
(* Test extensions for the VM extension registry suite (Phase B) *)
|
||||||
|
(* ====================================================================== *)
|
||||||
|
|
||||||
|
(* Extend the extensible variant from sx_vm_extension.ml so the test
|
||||||
|
extensions below can carry their own private state. *)
|
||||||
|
type Sx_vm_extension.extension_state += TestRegState of int ref
|
||||||
|
|
||||||
(* ====================================================================== *)
|
(* ====================================================================== *)
|
||||||
(* Build evaluator environment with test platform functions *)
|
(* Build evaluator environment with test platform functions *)
|
||||||
(* ====================================================================== *)
|
(* ====================================================================== *)
|
||||||
@@ -1282,7 +1290,399 @@ let run_foundation_tests () =
|
|||||||
let l = { l_params = ["x"]; l_body = Symbol "x"; l_closure = Sx_types.make_env (); l_name = None; l_compiled = None; l_call_count = 0; l_uid = Sx_types.next_lambda_uid () } in
|
let l = { l_params = ["x"]; l_body = Symbol "x"; l_closure = Sx_types.make_env (); l_name = None; l_compiled = None; l_call_count = 0; l_uid = Sx_types.next_lambda_uid () } in
|
||||||
assert_true "is_lambda" (Bool (Sx_types.is_lambda (Lambda l)));
|
assert_true "is_lambda" (Bool (Sx_types.is_lambda (Lambda l)));
|
||||||
ignore (Sx_types.set_lambda_name (Lambda l) "my-fn");
|
ignore (Sx_types.set_lambda_name (Lambda l) "my-fn");
|
||||||
assert_eq "lambda name mutated" (String "my-fn") (lambda_name (Lambda l))
|
assert_eq "lambda name mutated" (String "my-fn") (lambda_name (Lambda l));
|
||||||
|
|
||||||
|
Printf.printf "\nSuite: vm-extension-dispatch\n";
|
||||||
|
let make_bc op = ({
|
||||||
|
vc_arity = 0; vc_rest_arity = -1; vc_locals = 0;
|
||||||
|
vc_bytecode = [| op |]; vc_constants = [||];
|
||||||
|
vc_bytecode_list = None; vc_constants_list = None;
|
||||||
|
} : Sx_types.vm_code) in
|
||||||
|
let expect_invalid_opcode label op =
|
||||||
|
let globals = Hashtbl.create 1 in
|
||||||
|
try
|
||||||
|
let _ = Sx_vm.execute_module (make_bc op) globals in
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: %s — expected Invalid_opcode, got a result\n" label
|
||||||
|
with
|
||||||
|
| Sx_vm.Invalid_opcode n when n = op ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: %s\n" label
|
||||||
|
| exn ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: %s — unexpected: %s\n" label (Printexc.to_string exn)
|
||||||
|
in
|
||||||
|
expect_invalid_opcode "opcode 200 raises Invalid_opcode 200" 200;
|
||||||
|
expect_invalid_opcode "opcode 224 raises Invalid_opcode 224" 224;
|
||||||
|
expect_invalid_opcode "opcode 247 raises Invalid_opcode 247" 247;
|
||||||
|
(* Opcode 199 sits just below the extension threshold — should fall to the
|
||||||
|
catch-all (Eval_error), proving the threshold is at 200, not 199. *)
|
||||||
|
let globals = Hashtbl.create 1 in
|
||||||
|
(try
|
||||||
|
let _ = Sx_vm.execute_module (make_bc 199) globals in
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: opcode 199 — expected Eval_error, got a result\n"
|
||||||
|
with
|
||||||
|
| Sx_vm.Invalid_opcode _ ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: opcode 199 routed to extension dispatch (threshold wrong)\n"
|
||||||
|
| Sx_types.Eval_error _ ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: opcode 199 stays in core (catch-all)\n"
|
||||||
|
| exn ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: opcode 199 — unexpected: %s\n" (Printexc.to_string exn));
|
||||||
|
|
||||||
|
Printf.printf "\nSuite: vm-extension-registry\n";
|
||||||
|
(* Sx_vm_extensions self-installs its dispatcher at module init. Reset
|
||||||
|
the registry so prior loaded extensions don't interfere with this
|
||||||
|
test. *)
|
||||||
|
Sx_vm_extensions._reset_for_tests ();
|
||||||
|
let module TestExt : Sx_vm_extension.EXTENSION = struct
|
||||||
|
let name = "test_reg"
|
||||||
|
let init () = TestRegState (ref 0)
|
||||||
|
let opcodes _st = [
|
||||||
|
(210, "test_reg.OP_PUSH_42", (fun vm _frame ->
|
||||||
|
Sx_vm.push vm (Sx_types.Integer 42)));
|
||||||
|
(211, "test_reg.OP_DOUBLE_TOS", (fun vm _frame ->
|
||||||
|
let v = Sx_vm.pop vm in
|
||||||
|
match v with
|
||||||
|
| Sx_types.Integer n -> Sx_vm.push vm (Sx_types.Integer (n * 2))
|
||||||
|
| _ -> failwith "OP_DOUBLE_TOS: not an integer"));
|
||||||
|
]
|
||||||
|
end in
|
||||||
|
Sx_vm_extensions.register (module TestExt);
|
||||||
|
|
||||||
|
(match Sx_vm_extensions.id_of_name "test_reg.OP_PUSH_42" with
|
||||||
|
| Some 210 ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: id_of_name resolves opcode\n"
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: id_of_name: got %s\n"
|
||||||
|
(match other with Some n -> string_of_int n | None -> "None"));
|
||||||
|
|
||||||
|
(match Sx_vm_extensions.id_of_name "nonexistent.OP" with
|
||||||
|
| None ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: id_of_name returns None for unknown\n"
|
||||||
|
| Some _ ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: id_of_name should return None for unknown\n");
|
||||||
|
|
||||||
|
(match Sx_vm_extensions.state_of_extension "test_reg" with
|
||||||
|
| Some (TestRegState _) ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: state_of_extension returns extension state\n"
|
||||||
|
| _ ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: state_of_extension lookup\n");
|
||||||
|
|
||||||
|
(match Sx_vm_extensions.state_of_extension "nonexistent" with
|
||||||
|
| None ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: state_of_extension None for unknown\n"
|
||||||
|
| Some _ ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: state_of_extension should be None\n");
|
||||||
|
|
||||||
|
(* End-to-end dispatch through the VM. Bytecode runs OP_PUSH_42 then
|
||||||
|
OP_RETURN (50); execute_module pops the result. *)
|
||||||
|
let make_bc_seq bytes = ({
|
||||||
|
vc_arity = 0; vc_rest_arity = -1; vc_locals = 0;
|
||||||
|
vc_bytecode = bytes; vc_constants = [||];
|
||||||
|
vc_bytecode_list = None; vc_constants_list = None;
|
||||||
|
} : Sx_types.vm_code) in
|
||||||
|
(let globals = Hashtbl.create 1 in
|
||||||
|
try
|
||||||
|
match Sx_vm.execute_module (make_bc_seq [| 210; 50 |]) globals with
|
||||||
|
| Integer 42 ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: dispatch routes opcode 210 -> push 42\n"
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: dispatch opcode 210: got %s\n"
|
||||||
|
(Sx_types.inspect other)
|
||||||
|
with exn ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: dispatch opcode 210 raised: %s\n"
|
||||||
|
(Printexc.to_string exn));
|
||||||
|
|
||||||
|
(* Compose two extension opcodes: PUSH_42 then DOUBLE_TOS then RETURN.
|
||||||
|
Verifies that successive extension dispatches share VM state. *)
|
||||||
|
(let globals = Hashtbl.create 1 in
|
||||||
|
try
|
||||||
|
match Sx_vm.execute_module (make_bc_seq [| 210; 211; 50 |]) globals with
|
||||||
|
| Integer 84 ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: extension opcodes compose (42 -> 84)\n"
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: composed opcodes: got %s\n"
|
||||||
|
(Sx_types.inspect other)
|
||||||
|
with exn ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: composed opcodes raised: %s\n"
|
||||||
|
(Printexc.to_string exn));
|
||||||
|
|
||||||
|
(* Duplicate opcode-id detection. *)
|
||||||
|
let module DupExt : Sx_vm_extension.EXTENSION = struct
|
||||||
|
let name = "dup_check"
|
||||||
|
let init () = TestRegState (ref 0)
|
||||||
|
let opcodes _st = [
|
||||||
|
(210, "dup_check.OP_X", (fun _vm _frame -> ()));
|
||||||
|
]
|
||||||
|
end in
|
||||||
|
(try
|
||||||
|
Sx_vm_extensions.register (module DupExt);
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: duplicate opcode id should have raised\n"
|
||||||
|
with Failure _ ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: duplicate opcode id rejected\n");
|
||||||
|
|
||||||
|
(* Out-of-range opcode-id detection. *)
|
||||||
|
let module OutExt : Sx_vm_extension.EXTENSION = struct
|
||||||
|
let name = "out_of_range"
|
||||||
|
let init () = TestRegState (ref 0)
|
||||||
|
let opcodes _st = [
|
||||||
|
(300, "out_of_range.OP_X", (fun _vm _frame -> ()));
|
||||||
|
]
|
||||||
|
end in
|
||||||
|
(try
|
||||||
|
Sx_vm_extensions.register (module OutExt);
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: out-of-range opcode should have raised\n"
|
||||||
|
with Failure _ ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: out-of-range opcode rejected\n");
|
||||||
|
|
||||||
|
(* Duplicate extension-name detection. *)
|
||||||
|
let module SameNameExt : Sx_vm_extension.EXTENSION = struct
|
||||||
|
let name = "test_reg" (* same as TestExt above *)
|
||||||
|
let init () = TestRegState (ref 0)
|
||||||
|
let opcodes _st = []
|
||||||
|
end in
|
||||||
|
(try
|
||||||
|
Sx_vm_extensions.register (module SameNameExt);
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: duplicate extension name should have raised\n"
|
||||||
|
with Failure _ ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: duplicate extension name rejected\n");
|
||||||
|
|
||||||
|
Printf.printf "\nSuite: extension-opcode-id primitive\n";
|
||||||
|
let prim = Hashtbl.find Sx_primitives.primitives "extension-opcode-id" in
|
||||||
|
|
||||||
|
(* Known opcode (registered by TestExt above). *)
|
||||||
|
(match prim [String "test_reg.OP_PUSH_42"] with
|
||||||
|
| Integer 210 ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: primitive returns Integer for registered opcode\n"
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: registered opcode lookup: got %s\n"
|
||||||
|
(Sx_types.inspect other));
|
||||||
|
|
||||||
|
(* Unknown opcode → Nil. *)
|
||||||
|
(match prim [String "nonexistent.OP_X"] with
|
||||||
|
| Nil ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: primitive returns nil for unknown opcode\n"
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: unknown opcode lookup: got %s\n"
|
||||||
|
(Sx_types.inspect other));
|
||||||
|
|
||||||
|
(* Symbol arg also accepted (compilers may pass quoted symbols). *)
|
||||||
|
(match prim [Symbol "test_reg.OP_DOUBLE_TOS"] with
|
||||||
|
| Integer 211 ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: primitive accepts Symbol args\n"
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: symbol arg: got %s\n" (Sx_types.inspect other));
|
||||||
|
|
||||||
|
(* Wrong arity / type raises Eval_error. *)
|
||||||
|
(try
|
||||||
|
let _ = prim [] in
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: zero args should have raised\n"
|
||||||
|
with Sx_types.Eval_error _ ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: zero args rejected\n");
|
||||||
|
|
||||||
|
(try
|
||||||
|
let _ = prim [Integer 42] in
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: integer arg should have raised\n"
|
||||||
|
with Sx_types.Eval_error _ ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: integer arg rejected\n");
|
||||||
|
|
||||||
|
Printf.printf "\nSuite: extensions/test_ext (canonical extension)\n";
|
||||||
|
(* Phase D: the real test extension lives at lib/extensions/test_ext.ml.
|
||||||
|
Register it on top of the inline test_reg from earlier suites — the
|
||||||
|
two use disjoint opcode IDs (210/211 vs 220/221) so they coexist. *)
|
||||||
|
Test_ext.register ();
|
||||||
|
|
||||||
|
(* Lookup via the public primitive should now find OP_TEST_PUSH_42. *)
|
||||||
|
(match prim [String "test_ext.OP_TEST_PUSH_42"] with
|
||||||
|
| Integer 220 ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: extension-opcode-id finds test_ext.OP_TEST_PUSH_42\n"
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: opcode lookup: got %s\n" (Sx_types.inspect other));
|
||||||
|
|
||||||
|
(* End-to-end: PUSH_42 + DOUBLE_TOS + RETURN. *)
|
||||||
|
(let globals = Hashtbl.create 1 in
|
||||||
|
try
|
||||||
|
match Sx_vm.execute_module (make_bc_seq [| 220; 221; 50 |]) globals with
|
||||||
|
| Integer 84 ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: extensions/test_ext bytecode executes (84)\n"
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: test_ext bytecode result: got %s\n"
|
||||||
|
(Sx_types.inspect other)
|
||||||
|
with exn ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: test_ext bytecode raised: %s\n"
|
||||||
|
(Printexc.to_string exn));
|
||||||
|
|
||||||
|
(* Disassembly: opcode_name should resolve 220/221 via the registry,
|
||||||
|
not fall back to UNKNOWN_220 / UNKNOWN_221. disassemble returns a
|
||||||
|
Dict; the instruction list lives at key "bytecode". *)
|
||||||
|
(let code = make_bc_seq [| 220; 221; 50 |] in
|
||||||
|
let dis = Sx_vm.disassemble code in
|
||||||
|
let entries = match dis with
|
||||||
|
| Dict d -> (match Hashtbl.find_opt d "bytecode" with
|
||||||
|
| Some (List es) -> es
|
||||||
|
| _ -> [])
|
||||||
|
| _ -> []
|
||||||
|
in
|
||||||
|
let names = List.filter_map (fun entry -> match entry with
|
||||||
|
| Dict d ->
|
||||||
|
(match Hashtbl.find_opt d "opcode" with
|
||||||
|
| Some (String name) -> Some name
|
||||||
|
| _ -> None)
|
||||||
|
| _ -> None) entries
|
||||||
|
in
|
||||||
|
let has name = List.mem name names in
|
||||||
|
if has "test_ext.OP_TEST_PUSH_42" && has "test_ext.OP_TEST_DOUBLE_TOS" then begin
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: disassemble shows extension opcode names\n"
|
||||||
|
end else begin
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: disassemble names: [%s]\n" (String.concat ", " names)
|
||||||
|
end);
|
||||||
|
|
||||||
|
(* Sanity: opcode_name on an unregistered extension opcode still
|
||||||
|
returns UNKNOWN_n. Pick 230 — out of test_ext's range. *)
|
||||||
|
(match Sx_vm.opcode_name 230 with
|
||||||
|
| "UNKNOWN_230" ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: unregistered ext opcode falls back to UNKNOWN_n\n"
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: opcode_name 230: got %s\n" other);
|
||||||
|
|
||||||
|
(* Per-extension state: invocation_count should reflect the two opcodes
|
||||||
|
that ran in the dispatch test above. *)
|
||||||
|
(match Test_ext.invocation_count () with
|
||||||
|
| Some n when n >= 2 ->
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: extension state recorded %d invocations\n" n
|
||||||
|
| other ->
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: invocation_count: %s\n"
|
||||||
|
(match other with Some n -> string_of_int n | None -> "None"));
|
||||||
|
|
||||||
|
Printf.printf "\nSuite: jit extension-opcode awareness\n";
|
||||||
|
let scan = Sx_vm.bytecode_uses_extension_opcodes in
|
||||||
|
let no_consts = [||] in
|
||||||
|
|
||||||
|
(* Pure core ops: scan reports false. *)
|
||||||
|
(* OP_TRUE OP_RETURN *)
|
||||||
|
if not (scan [| 3; 50 |] no_consts) then begin
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: pure core bytecode is JIT-eligible\n"
|
||||||
|
end else begin
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: pure core bytecode flagged as extension\n"
|
||||||
|
end;
|
||||||
|
|
||||||
|
(* Extension opcode anywhere → true. *)
|
||||||
|
if scan [| 220; 50 |] no_consts then begin
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: extension opcode detected at head\n"
|
||||||
|
end else begin
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: extension opcode at head missed\n"
|
||||||
|
end;
|
||||||
|
|
||||||
|
(* Mixed: core + extension → true. *)
|
||||||
|
if scan [| 3; 220; 50 |] no_consts then begin
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: extension opcode detected after core ops\n"
|
||||||
|
end else begin
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: extension opcode after core ops missed\n"
|
||||||
|
end;
|
||||||
|
|
||||||
|
(* Operand bytes ≥200 must NOT trigger. CONST u16 with index 220
|
||||||
|
into a synthetic constant pool — the operand is 220 (lo) 0 (hi),
|
||||||
|
not an opcode. The pool entry at 220 is irrelevant for the scan. *)
|
||||||
|
let big_consts = Array.make 256 Nil in
|
||||||
|
if not (scan [| 1; 220; 0; 50 |] big_consts) then begin
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: CONST operand ≥200 not a false positive\n"
|
||||||
|
end else begin
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: CONST operand ≥200 false-positives as ext op\n"
|
||||||
|
end;
|
||||||
|
|
||||||
|
(* CALL_PRIM has 3 operand bytes (u16 + u8); all ≥200 should not
|
||||||
|
trigger. *)
|
||||||
|
if not (scan [| 52; 220; 200; 200; 50 |] big_consts) then begin
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: CALL_PRIM operands ≥200 not a false positive\n"
|
||||||
|
end else begin
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: CALL_PRIM operands ≥200 false-positive\n"
|
||||||
|
end;
|
||||||
|
|
||||||
|
(* CLOSURE with upvalue descriptors: scan must skip the 2 + 2*n
|
||||||
|
dynamic operand bytes. Build a synthetic constant pool with a
|
||||||
|
Dict at index 0 declaring upvalue-count 1, descriptors that are
|
||||||
|
≥200 — the scan should skip them and not trigger.
|
||||||
|
|
||||||
|
Bytecode layout: CLOSURE 0 0 desc_is_local desc_index RETURN
|
||||||
|
op lo hi 210 220 50
|
||||||
|
With upvalue-count = 1, scan must advance past the 2-byte CLOSURE
|
||||||
|
operand AND the 2 descriptor bytes (210, 220), landing on RETURN. *)
|
||||||
|
let cl_consts = Array.make 1 Nil in
|
||||||
|
let dict = Hashtbl.create 1 in
|
||||||
|
Hashtbl.replace dict "upvalue-count" (Integer 1);
|
||||||
|
cl_consts.(0) <- Dict dict;
|
||||||
|
if not (scan [| 51; 0; 0; 210; 220; 50 |] cl_consts) then begin
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: CLOSURE upvalue descriptors ≥200 skipped\n"
|
||||||
|
end else begin
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: CLOSURE upvalue descriptors false-positive\n"
|
||||||
|
end;
|
||||||
|
|
||||||
|
(* Sanity: opcode after CLOSURE+descriptors that IS an extension
|
||||||
|
opcode triggers correctly. *)
|
||||||
|
if scan [| 51; 0; 0; 210; 220; 221; 50 |] cl_consts then begin
|
||||||
|
incr pass_count;
|
||||||
|
Printf.printf " PASS: extension opcode after CLOSURE detected\n"
|
||||||
|
end else begin
|
||||||
|
incr fail_count;
|
||||||
|
Printf.printf " FAIL: extension opcode after CLOSURE missed\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
(* ====================================================================== *)
|
(* ====================================================================== *)
|
||||||
|
|||||||
@@ -2,3 +2,7 @@
|
|||||||
(name sx)
|
(name sx)
|
||||||
(wrapped false)
|
(wrapped false)
|
||||||
(libraries re re.pcre unix))
|
(libraries re re.pcre unix))
|
||||||
|
|
||||||
|
; Pull in extension modules from lib/extensions/ (test_ext.ml, etc).
|
||||||
|
; See plans/sx-vm-opcode-extension.md.
|
||||||
|
(include_subdirs unqualified)
|
||||||
|
|||||||
71
hosts/ocaml/lib/extensions/README.md
Normal file
71
hosts/ocaml/lib/extensions/README.md
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
# SX VM extensions
|
||||||
|
|
||||||
|
Each `*.ml` file here is a VM extension — a first-class OCaml module that
|
||||||
|
registers specialized bytecode opcodes with `Sx_vm_extensions`. See
|
||||||
|
[`plans/sx-vm-opcode-extension.md`](../../../../plans/sx-vm-opcode-extension.md)
|
||||||
|
for the design.
|
||||||
|
|
||||||
|
## Pattern
|
||||||
|
|
||||||
|
```ocaml
|
||||||
|
(* lib/extensions/myport.ml *)
|
||||||
|
open Sx_types
|
||||||
|
|
||||||
|
type Sx_vm_extension.extension_state += MyportState of { ... }
|
||||||
|
|
||||||
|
module M : Sx_vm_extension.EXTENSION = struct
|
||||||
|
let name = "myport"
|
||||||
|
let init () = MyportState { ... }
|
||||||
|
let opcodes _st = [
|
||||||
|
(id, "myport.OP_NAME", handler);
|
||||||
|
...
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
let register () = Sx_vm_extensions.register (module M)
|
||||||
|
```
|
||||||
|
|
||||||
|
Then call `Myport.register ()` once at startup from any binary that
|
||||||
|
should have the extension loaded.
|
||||||
|
|
||||||
|
## Opcode-ID allocation
|
||||||
|
|
||||||
|
Range 200-247 (per `Sx_vm_extensions.extension_min` /
|
||||||
|
`extension_max`). Conventions:
|
||||||
|
|
||||||
|
| Range | Use |
|
||||||
|
|---------|-------------------------------------------------------------------------|
|
||||||
|
| 200-209 | reserved for `lib/guest/vm/` shared opcodes (chiselled out on 2nd use) |
|
||||||
|
| 210-219 | inline test extensions defined in `bin/run_tests.ml` |
|
||||||
|
| 220-229 | this directory's `test_ext` (the canonical template) |
|
||||||
|
| 230-247 | first-come-first-served by language ports (Erlang first) |
|
||||||
|
|
||||||
|
When a port claims a contiguous block, document it in the table above.
|
||||||
|
The registry rejects collisions at startup with a loud error — there is
|
||||||
|
no silent shadowing.
|
||||||
|
|
||||||
|
## Naming
|
||||||
|
|
||||||
|
Always prefix opcode names with the extension name plus a dot:
|
||||||
|
`myport.OP_<NAME>`. The prefix is a hard convention so that multiple
|
||||||
|
extensions can share the global opcode-name namespace cleanly.
|
||||||
|
|
||||||
|
## State
|
||||||
|
|
||||||
|
`extension_state` is an extensible variant. Add your case (e.g.
|
||||||
|
`MyportState of { ... }`) at the top of your file, return it from
|
||||||
|
`init`, and pattern-match it inside your handlers. Other extensions
|
||||||
|
cannot see your state — the variant case is private to your module.
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
`test_ext.ml` is the canonical worked example. `bin/run_tests.ml`
|
||||||
|
calls `Test_ext.register ()`, then drives bytecode that exercises the
|
||||||
|
opcodes end-to-end (push, double, dispatch, disassemble, invocation
|
||||||
|
counter). Mirror this shape when adding a real port's extension.
|
||||||
|
|
||||||
|
## Build wiring
|
||||||
|
|
||||||
|
`lib/dune` has `(include_subdirs unqualified)`, so any `.ml` you drop
|
||||||
|
in here is automatically part of the `sx` library. Module name follows
|
||||||
|
the filename verbatim (`test_ext.ml` → `Test_ext`).
|
||||||
67
hosts/ocaml/lib/extensions/test_ext.ml
Normal file
67
hosts/ocaml/lib/extensions/test_ext.ml
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
(** {1 [test_ext] — canonical example VM extension}
|
||||||
|
|
||||||
|
A minimal extension demonstrating the registration pattern from
|
||||||
|
[plans/sx-vm-opcode-extension.md]. The opcode IDs (220, 221) sit at
|
||||||
|
the top of the extension range, well clear of anything a real
|
||||||
|
language port would claim.
|
||||||
|
|
||||||
|
Two operand-less opcodes:
|
||||||
|
|
||||||
|
- [test_ext.OP_TEST_PUSH_42] (220) — pushes the integer 42.
|
||||||
|
- [test_ext.OP_TEST_DOUBLE_TOS] (221) — pops the integer on TOS,
|
||||||
|
pushes 2× it.
|
||||||
|
|
||||||
|
These are the smallest stack manipulations that prove the extension
|
||||||
|
mechanism wires through end-to-end (registry → dispatch → human-
|
||||||
|
readable disassembly). Real ports (Erlang Phase 9, future Haskell
|
||||||
|
perf phases) replace this template with their own opcode set.
|
||||||
|
|
||||||
|
Loading: [Test_ext.register ()] adds the extension to
|
||||||
|
[Sx_vm_extensions]. Run-time binaries that want the test opcodes
|
||||||
|
available call this once at startup. Unit tests in
|
||||||
|
[bin/run_tests.ml] do exactly that. *)
|
||||||
|
|
||||||
|
open Sx_types
|
||||||
|
|
||||||
|
(** Per-instance state for [test_ext]. Counts how many times the
|
||||||
|
handlers ran — purely so the extension has *some* state, exercising
|
||||||
|
the [extension_state] machinery. *)
|
||||||
|
type Sx_vm_extension.extension_state += TestExtState of {
|
||||||
|
mutable invocations : int;
|
||||||
|
}
|
||||||
|
|
||||||
|
module M : Sx_vm_extension.EXTENSION = struct
|
||||||
|
let name = "test_ext"
|
||||||
|
let init () = TestExtState { invocations = 0 }
|
||||||
|
|
||||||
|
let opcodes st =
|
||||||
|
let bump () = match st with
|
||||||
|
| TestExtState s -> s.invocations <- s.invocations + 1
|
||||||
|
| _ -> ()
|
||||||
|
in
|
||||||
|
[
|
||||||
|
(220, "test_ext.OP_TEST_PUSH_42",
|
||||||
|
(fun vm _frame -> bump (); Sx_vm.push vm (Integer 42)));
|
||||||
|
|
||||||
|
(221, "test_ext.OP_TEST_DOUBLE_TOS",
|
||||||
|
(fun vm _frame ->
|
||||||
|
bump ();
|
||||||
|
let v = Sx_vm.pop vm in
|
||||||
|
match v with
|
||||||
|
| Integer n -> Sx_vm.push vm (Integer (n * 2))
|
||||||
|
| _ -> raise (Eval_error
|
||||||
|
"test_ext.OP_TEST_DOUBLE_TOS: TOS is not an integer")));
|
||||||
|
]
|
||||||
|
end
|
||||||
|
|
||||||
|
(** Register [test_ext] in [Sx_vm_extensions]. Idempotent only by
|
||||||
|
failing loudly — calling twice raises [Failure]. Binaries call this
|
||||||
|
once at startup; tests may [_reset_for_tests] then re-register. *)
|
||||||
|
let register () = Sx_vm_extensions.register (module M : Sx_vm_extension.EXTENSION)
|
||||||
|
|
||||||
|
(** Read the invocation counter from the live registry state. Returns
|
||||||
|
[None] if [register] hasn't been called yet. *)
|
||||||
|
let invocation_count () =
|
||||||
|
match Sx_vm_extensions.state_of_extension "test_ext" with
|
||||||
|
| Some (TestExtState s) -> Some s.invocations
|
||||||
|
| _ -> None
|
||||||
@@ -44,6 +44,11 @@ type vm = {
|
|||||||
ip past OP_PERFORM, stack ready for a result push). *)
|
ip past OP_PERFORM, stack ready for a result push). *)
|
||||||
exception VmSuspended of value * vm
|
exception VmSuspended of value * vm
|
||||||
|
|
||||||
|
(** Raised by the extension dispatch fallthrough when an opcode in the
|
||||||
|
extension range (≥ 200) is encountered with no handler registered.
|
||||||
|
Carries the offending opcode id. See plans/sx-vm-opcode-extension.md. *)
|
||||||
|
exception Invalid_opcode of int
|
||||||
|
|
||||||
(* Register the VM suspension converter so sx_runtime.sx_apply_cek can
|
(* Register the VM suspension converter so sx_runtime.sx_apply_cek can
|
||||||
catch VmSuspended and convert it to CekPerformRequest without a
|
catch VmSuspended and convert it to CekPerformRequest without a
|
||||||
direct dependency on this module. *)
|
direct dependency on this module. *)
|
||||||
@@ -57,6 +62,21 @@ let () = Sx_types._convert_vm_suspension := (fun exn ->
|
|||||||
let jit_compile_ref : (lambda -> (string, value) Hashtbl.t -> vm_closure option) ref =
|
let jit_compile_ref : (lambda -> (string, value) Hashtbl.t -> vm_closure option) ref =
|
||||||
ref (fun _ _ -> None)
|
ref (fun _ _ -> None)
|
||||||
|
|
||||||
|
(** Forward reference for extension opcode dispatch — Phase B installs the
|
||||||
|
real registry's dispatch function here at module init. Until then, any
|
||||||
|
opcode in the extension range raises [Invalid_opcode]. Same forward-ref
|
||||||
|
pattern as [jit_compile_ref] above; keeps [Sx_vm_extensions] free to
|
||||||
|
depend on [Sx_vm]'s [vm] / [frame] types without a cycle. *)
|
||||||
|
let extension_dispatch_ref : (int -> vm -> frame -> unit) ref =
|
||||||
|
ref (fun op _vm _frame -> raise (Invalid_opcode op))
|
||||||
|
|
||||||
|
(** Forward reference for extension opcode → name lookup, used by
|
||||||
|
[opcode_name] / [disassemble] for human-readable disassembly. The
|
||||||
|
registry installs a real lookup at module init; default returns
|
||||||
|
[None] (then [opcode_name] falls back to "UNKNOWN_n"). *)
|
||||||
|
let extension_opcode_name_ref : (int -> string option) ref =
|
||||||
|
ref (fun _ -> None)
|
||||||
|
|
||||||
(* JIT threshold and counters live in Sx_types so primitives can read them
|
(* JIT threshold and counters live in Sx_types so primitives can read them
|
||||||
without creating a sx_primitives → sx_vm dependency cycle. *)
|
without creating a sx_primitives → sx_vm dependency cycle. *)
|
||||||
|
|
||||||
@@ -875,6 +895,15 @@ and run vm =
|
|||||||
let request = pop vm in
|
let request = pop vm in
|
||||||
raise (VmSuspended (request, vm))
|
raise (VmSuspended (request, vm))
|
||||||
|
|
||||||
|
(* ---- Extension dispatch fallthrough ----
|
||||||
|
Opcode partition (see plans/sx-vm-opcode-extension.md):
|
||||||
|
0 reserved / NOP
|
||||||
|
1-199 core opcodes (current ceiling 175 = OP_DEC)
|
||||||
|
200-247 extension opcodes (registered via Sx_vm_extensions)
|
||||||
|
248-255 reserved for future expansion / multi-byte
|
||||||
|
Any opcode ≥ 200 routes through the extension registry. *)
|
||||||
|
| op when op >= 200 -> !extension_dispatch_ref op vm frame
|
||||||
|
|
||||||
| opcode ->
|
| opcode ->
|
||||||
raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d"
|
raise (Eval_error (Printf.sprintf "VM: unknown opcode %d at ip=%d"
|
||||||
opcode (frame.ip - 1)))
|
opcode (frame.ip - 1)))
|
||||||
@@ -1027,6 +1056,62 @@ let _jit_is_broken_name n =
|
|||||||
|| n = "hs-repeat-while" || n = "hs-repeat-until"
|
|| n = "hs-repeat-while" || n = "hs-repeat-until"
|
||||||
|| n = "hs-for-each" || n = "hs-put!"
|
|| n = "hs-for-each" || n = "hs-put!"
|
||||||
|
|
||||||
|
(** Scan bytecode for any extension opcode (≥ 200, the registry's
|
||||||
|
[Sx_vm_extensions.extension_min]). Walks operand bytes correctly
|
||||||
|
so values that happen to be ≥200 (e.g. a CONST u16 index pointing
|
||||||
|
into a large pool) do not trigger false positives. CLOSURE's
|
||||||
|
dynamic upvalue descriptors are read from the constant pool entry
|
||||||
|
at the same index it pushes.
|
||||||
|
|
||||||
|
Used by [jit_compile_lambda] (Phase E of the opcode-extension
|
||||||
|
plan): a lambda whose compiled body contains any extension opcode
|
||||||
|
is routed through interpretation rather than JIT. Extensions
|
||||||
|
interpret their opcodes via the registry; the JIT does not
|
||||||
|
currently know how to compile them.
|
||||||
|
|
||||||
|
Operand-size logic mirrors [opcode_operand_size] (which is defined
|
||||||
|
later, in the disassembly section); inlined here so this helper can
|
||||||
|
sit before [jit_compile_lambda] in the file. *)
|
||||||
|
let bytecode_uses_extension_opcodes (bc : int array) (consts : value array) =
|
||||||
|
let core_operand_size = function
|
||||||
|
| 1 | 20 | 21 | 64 | 65 | 128 -> 2 (* u16 *)
|
||||||
|
| 16 | 17 | 18 | 19 | 48 | 49 | 144 -> 1 (* u8 *)
|
||||||
|
| 32 | 33 | 34 | 35 -> 2 (* i16 *)
|
||||||
|
| 52 -> 3 (* CALL_PRIM: u16 + u8 *)
|
||||||
|
| _ -> 0
|
||||||
|
in
|
||||||
|
let len = Array.length bc in
|
||||||
|
let ip = ref 0 in
|
||||||
|
let found = ref false in
|
||||||
|
while not !found && !ip < len do
|
||||||
|
let op = bc.(!ip) in
|
||||||
|
if op >= 200 then found := true
|
||||||
|
else begin
|
||||||
|
ip := !ip + 1;
|
||||||
|
let extra = match op with
|
||||||
|
| 51 (* CLOSURE *) when !ip + 1 < len ->
|
||||||
|
let lo = bc.(!ip) in
|
||||||
|
let hi = bc.(!ip + 1) in
|
||||||
|
let idx = lo lor (hi lsl 8) in
|
||||||
|
let uv_count =
|
||||||
|
if idx < Array.length consts then
|
||||||
|
(match consts.(idx) with
|
||||||
|
| Dict d ->
|
||||||
|
(match Hashtbl.find_opt d "upvalue-count" with
|
||||||
|
| Some (Integer n) -> n
|
||||||
|
| Some (Number n) -> int_of_float n
|
||||||
|
| _ -> 0)
|
||||||
|
| _ -> 0)
|
||||||
|
else 0
|
||||||
|
in
|
||||||
|
2 + uv_count * 2
|
||||||
|
| _ -> core_operand_size op
|
||||||
|
in
|
||||||
|
ip := !ip + extra
|
||||||
|
end
|
||||||
|
done;
|
||||||
|
!found
|
||||||
|
|
||||||
let jit_compile_lambda (l : lambda) globals =
|
let jit_compile_lambda (l : lambda) globals =
|
||||||
let fn_name = match l.l_name with Some n -> n | None -> "<anon>" in
|
let fn_name = match l.l_name with Some n -> n | None -> "<anon>" in
|
||||||
if !_jit_compiling then (
|
if !_jit_compiling then (
|
||||||
@@ -1089,8 +1174,18 @@ let jit_compile_lambda (l : lambda) globals =
|
|||||||
if idx < Array.length outer_code.vc_constants then
|
if idx < Array.length outer_code.vc_constants then
|
||||||
let inner_val = outer_code.vc_constants.(idx) in
|
let inner_val = outer_code.vc_constants.(idx) in
|
||||||
let code = code_from_value inner_val in
|
let code = code_from_value inner_val in
|
||||||
Some { vm_code = code; vm_upvalues = [||];
|
(* Phase E: if the inner lambda's bytecode contains any
|
||||||
vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure }
|
extension opcode (≥200), skip JIT and let the lambda run
|
||||||
|
interpreted via CEK. Extension opcodes dispatch correctly
|
||||||
|
through the VM's registry fallthrough, but the JIT has no
|
||||||
|
knowledge of them and shouldn't claim ownership. *)
|
||||||
|
if bytecode_uses_extension_opcodes code.vc_bytecode code.vc_constants then begin
|
||||||
|
Printf.eprintf "[jit] SKIP %s: bytecode uses extension opcodes (interpret-only in v1)\n%!"
|
||||||
|
fn_name;
|
||||||
|
None
|
||||||
|
end else
|
||||||
|
Some { vm_code = code; vm_upvalues = [||];
|
||||||
|
vm_name = l.l_name; vm_env_ref = effective_globals; vm_closure_env = Some l.l_closure }
|
||||||
else begin
|
else begin
|
||||||
Printf.eprintf "[jit] FAIL %s: closure index %d out of bounds (pool=%d)\n%!"
|
Printf.eprintf "[jit] FAIL %s: closure index %d out of bounds (pool=%d)\n%!"
|
||||||
fn_name idx (Array.length outer_code.vc_constants);
|
fn_name idx (Array.length outer_code.vc_constants);
|
||||||
@@ -1200,7 +1295,12 @@ let opcode_name = function
|
|||||||
| 164 -> "EQ" | 165 -> "LT" | 166 -> "GT" | 167 -> "NOT"
|
| 164 -> "EQ" | 165 -> "LT" | 166 -> "GT" | 167 -> "NOT"
|
||||||
| 168 -> "LEN" | 169 -> "FIRST" | 170 -> "REST" | 171 -> "NTH"
|
| 168 -> "LEN" | 169 -> "FIRST" | 170 -> "REST" | 171 -> "NTH"
|
||||||
| 172 -> "CONS" | 173 -> "NEG" | 174 -> "INC" | 175 -> "DEC"
|
| 172 -> "CONS" | 173 -> "NEG" | 174 -> "INC" | 175 -> "DEC"
|
||||||
| n -> Printf.sprintf "UNKNOWN_%d" n
|
| n ->
|
||||||
|
(* Extension opcodes (≥200) get their human-readable name from the
|
||||||
|
registry; defaults to UNKNOWN_n if the extension isn't loaded. *)
|
||||||
|
(match !extension_opcode_name_ref n with
|
||||||
|
| Some name -> name
|
||||||
|
| None -> Printf.sprintf "UNKNOWN_%d" n)
|
||||||
|
|
||||||
(** Number of extra operand bytes consumed by each opcode.
|
(** Number of extra operand bytes consumed by each opcode.
|
||||||
Returns (format, total_bytes) where format describes the operand types. *)
|
Returns (format, total_bytes) where format describes the operand types. *)
|
||||||
|
|||||||
48
hosts/ocaml/lib/sx_vm_extension.ml
Normal file
48
hosts/ocaml/lib/sx_vm_extension.ml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
(** {1 VM extension interface}
|
||||||
|
|
||||||
|
Type definitions for VM bytecode extensions. See
|
||||||
|
[plans/sx-vm-opcode-extension.md].
|
||||||
|
|
||||||
|
An extension is a first-class module of type [EXTENSION]: it has a
|
||||||
|
stable [name], an [init] that returns its private state, and an
|
||||||
|
[opcodes] function that lists the opcodes it provides.
|
||||||
|
|
||||||
|
Opcode handlers receive the live [vm] and the active [frame]. They
|
||||||
|
read operands via [Sx_vm.read_u8] / [read_u16], manipulate the stack
|
||||||
|
via [push] / [pop] / [peek], and update the frame's [ip] as needed. *)
|
||||||
|
|
||||||
|
(** A handler for an extension opcode. Reads operands from bytecode,
|
||||||
|
manipulates the VM stack, updates the frame's instruction pointer.
|
||||||
|
May raise exceptions (which propagate via the existing VM error path). *)
|
||||||
|
type handler = Sx_vm.vm -> Sx_vm.frame -> unit
|
||||||
|
|
||||||
|
(** State an extension carries alongside the VM. Opaque to the VM core;
|
||||||
|
extensions extend this with their own constructor and cast as needed.
|
||||||
|
|
||||||
|
Extensible variant — extensions add cases:
|
||||||
|
{[
|
||||||
|
type Sx_vm_extension.extension_state +=
|
||||||
|
| ErlangState of erlang_scheduler
|
||||||
|
]} *)
|
||||||
|
type extension_state = ..
|
||||||
|
|
||||||
|
(** An extension is a first-class module of this signature. *)
|
||||||
|
module type EXTENSION = sig
|
||||||
|
(** Stable name for this extension (e.g. ["erlang"], ["guest_vm"]).
|
||||||
|
Used as the lookup key in the registry and as the prefix for opcode
|
||||||
|
names ([erlang.OP_PATTERN_TUPLE_2] etc). *)
|
||||||
|
val name : string
|
||||||
|
|
||||||
|
(** Initialize per-instance state. Called once when [register] is
|
||||||
|
invoked on this extension. *)
|
||||||
|
val init : unit -> extension_state
|
||||||
|
|
||||||
|
(** Opcodes this extension provides. Each is
|
||||||
|
[(opcode_id, opcode_name, handler)].
|
||||||
|
|
||||||
|
[opcode_id] must be in the range 200-247 (the extension partition;
|
||||||
|
see the partition comment at the top of [Sx_vm]'s dispatch loop).
|
||||||
|
Conflicts with already-registered opcodes cause [register] to
|
||||||
|
fail. *)
|
||||||
|
val opcodes : extension_state -> (int * string * handler) list
|
||||||
|
end
|
||||||
120
hosts/ocaml/lib/sx_vm_extensions.ml
Normal file
120
hosts/ocaml/lib/sx_vm_extensions.ml
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
(** {1 VM extension registry}
|
||||||
|
|
||||||
|
Holds the live registry of extension opcodes and installs the
|
||||||
|
[dispatch] function into [Sx_vm.extension_dispatch_ref] at module
|
||||||
|
init time, replacing Phase A's stub.
|
||||||
|
|
||||||
|
See [plans/sx-vm-opcode-extension.md] and [Sx_vm_extension] for the
|
||||||
|
extension interface. *)
|
||||||
|
|
||||||
|
open Sx_vm_extension
|
||||||
|
|
||||||
|
(** The opcode range an extension is allowed to claim.
|
||||||
|
Mirrors the partition comment in [Sx_vm]. *)
|
||||||
|
let extension_min = 200
|
||||||
|
let extension_max = 247
|
||||||
|
|
||||||
|
(** opcode_id → handler *)
|
||||||
|
let by_id : (int, handler) Hashtbl.t = Hashtbl.create 64
|
||||||
|
|
||||||
|
(** opcode_name → opcode_id *)
|
||||||
|
let by_name : (string, int) Hashtbl.t = Hashtbl.create 64
|
||||||
|
|
||||||
|
(** opcode_id → opcode_name (reverse of [by_name]; used by
|
||||||
|
[Sx_vm.opcode_name] for disassembly). *)
|
||||||
|
let name_of_id_table : (int, string) Hashtbl.t = Hashtbl.create 64
|
||||||
|
|
||||||
|
(** extension_name → state *)
|
||||||
|
let states : (string, extension_state) Hashtbl.t = Hashtbl.create 8
|
||||||
|
|
||||||
|
(** Registered extension names, newest first. *)
|
||||||
|
let extensions : string list ref = ref []
|
||||||
|
|
||||||
|
(** Dispatch an extension opcode to its registered handler. Raises
|
||||||
|
[Sx_vm.Invalid_opcode] if no handler is registered for [op]. *)
|
||||||
|
let dispatch op vm frame =
|
||||||
|
match Hashtbl.find_opt by_id op with
|
||||||
|
| Some handler -> handler vm frame
|
||||||
|
| None -> raise (Sx_vm.Invalid_opcode op)
|
||||||
|
|
||||||
|
(** Register an extension. Fails if the extension name is already
|
||||||
|
registered, or if any opcode_id is outside the extension range or
|
||||||
|
collides with an already-registered opcode. *)
|
||||||
|
let register (m : (module EXTENSION)) =
|
||||||
|
let module M = (val m) in
|
||||||
|
if Hashtbl.mem states M.name then
|
||||||
|
failwith (Printf.sprintf
|
||||||
|
"Sx_vm_extensions: extension %S already registered" M.name);
|
||||||
|
let st = M.init () in
|
||||||
|
let ops = M.opcodes st in
|
||||||
|
List.iter (fun (id, opname, _h) ->
|
||||||
|
if id < extension_min || id > extension_max then
|
||||||
|
failwith (Printf.sprintf
|
||||||
|
"Sx_vm_extensions: opcode %d (%s) outside extension range %d-%d"
|
||||||
|
id opname extension_min extension_max);
|
||||||
|
if Hashtbl.mem by_id id then
|
||||||
|
failwith (Printf.sprintf
|
||||||
|
"Sx_vm_extensions: opcode %d (%s) already registered" id opname);
|
||||||
|
if Hashtbl.mem by_name opname then
|
||||||
|
failwith (Printf.sprintf
|
||||||
|
"Sx_vm_extensions: opcode name %S already registered" opname)
|
||||||
|
) ops;
|
||||||
|
Hashtbl.add states M.name st;
|
||||||
|
List.iter (fun (id, opname, h) ->
|
||||||
|
Hashtbl.add by_id id h;
|
||||||
|
Hashtbl.add by_name opname id;
|
||||||
|
Hashtbl.add name_of_id_table id opname
|
||||||
|
) ops;
|
||||||
|
extensions := M.name :: !extensions
|
||||||
|
|
||||||
|
(** Look up the opcode_id for an opcode_name. Returns [None] if no
|
||||||
|
extension provides that opcode. *)
|
||||||
|
let id_of_name name = Hashtbl.find_opt by_name name
|
||||||
|
|
||||||
|
(** Look up the opcode_name for an opcode_id. Returns [None] if no
|
||||||
|
extension provides that opcode. Used by disassembly. *)
|
||||||
|
let name_of_id id = Hashtbl.find_opt name_of_id_table id
|
||||||
|
|
||||||
|
(** Look up the state of an extension by name. Returns [None] if the
|
||||||
|
extension is not registered. *)
|
||||||
|
let state_of_extension name = Hashtbl.find_opt states name
|
||||||
|
|
||||||
|
(** Names of all registered extensions, newest first. *)
|
||||||
|
let registered_extensions () = !extensions
|
||||||
|
|
||||||
|
(** Test-only: clear the registry. Used by unit tests to isolate
|
||||||
|
extensions between test cases. The dispatch_ref is left in place. *)
|
||||||
|
let _reset_for_tests () =
|
||||||
|
Hashtbl.clear by_id;
|
||||||
|
Hashtbl.clear by_name;
|
||||||
|
Hashtbl.clear name_of_id_table;
|
||||||
|
Hashtbl.clear states;
|
||||||
|
extensions := []
|
||||||
|
|
||||||
|
(** Install our [dispatch] into [Sx_vm.extension_dispatch_ref] and our
|
||||||
|
[name_of_id] into [Sx_vm.extension_opcode_name_ref], replacing
|
||||||
|
the Phase A stubs. Idempotent. Called automatically at module init. *)
|
||||||
|
let install_dispatch () =
|
||||||
|
Sx_vm.extension_dispatch_ref := dispatch;
|
||||||
|
Sx_vm.extension_opcode_name_ref := name_of_id
|
||||||
|
|
||||||
|
let () = install_dispatch ()
|
||||||
|
|
||||||
|
(** Compiler-side opcode lookup: register the [extension-opcode-id]
|
||||||
|
primitive. Compilers ([lib/compiler.sx]) call this to emit
|
||||||
|
extension opcodes by name. Returns [Integer id] when registered,
|
||||||
|
[Nil] otherwise — so missing extensions degrade to a fallback
|
||||||
|
rather than failure. *)
|
||||||
|
let () =
|
||||||
|
Sx_primitives.register "extension-opcode-id" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Sx_types.String name] ->
|
||||||
|
(match id_of_name name with
|
||||||
|
| Some id -> Sx_types.Integer id
|
||||||
|
| None -> Sx_types.Nil)
|
||||||
|
| [Sx_types.Symbol name] ->
|
||||||
|
(match id_of_name name with
|
||||||
|
| Some id -> Sx_types.Integer id
|
||||||
|
| None -> Sx_types.Nil)
|
||||||
|
| _ -> raise (Sx_types.Eval_error
|
||||||
|
"extension-opcode-id: expected one string or symbol"))
|
||||||
86
plans/agent-briefings/sx-vm-extensions-loop.md
Normal file
86
plans/agent-briefings/sx-vm-extensions-loop.md
Normal file
@@ -0,0 +1,86 @@
|
|||||||
|
# sx-vm-extensions loop agent
|
||||||
|
|
||||||
|
Role: drives `plans/sx-vm-opcode-extension.md` to completion. One phase per
|
||||||
|
fire (A → B → C → D → E). Bounded loop — after Phase E acceptance, the loop
|
||||||
|
is done.
|
||||||
|
|
||||||
|
```
|
||||||
|
description: sx-vm-extensions queue loop
|
||||||
|
subagent_type: general-purpose
|
||||||
|
run_in_background: true
|
||||||
|
isolation: worktree (already on loops/sx-vm-extensions)
|
||||||
|
```
|
||||||
|
|
||||||
|
## What this loop is for
|
||||||
|
|
||||||
|
Mechanism in `hosts/ocaml/lib/` that lets language ports register specialized
|
||||||
|
bytecode opcodes without modifying the SX VM core. Direct prerequisite for
|
||||||
|
**erlang-on-sx Phase 9** (the BEAM analog) and a structural enabler for any
|
||||||
|
future language port that wants performance-critical opcodes.
|
||||||
|
|
||||||
|
## The queue
|
||||||
|
|
||||||
|
Per `plans/sx-vm-opcode-extension.md`, in order:
|
||||||
|
|
||||||
|
- **Phase A** — Opcode ID partition + dispatch fallthrough in `sx_vm.ml`.
|
||||||
|
Add `Invalid_opcode of int` exception, `extension_dispatch_ref`, the
|
||||||
|
`| op when op >= 200 -> !extension_dispatch_ref op vm frame` arm, and a
|
||||||
|
partition comment near the opcode list.
|
||||||
|
- **Phase B** — Extension registry module (`sx_vm_extensions.ml`).
|
||||||
|
`register`, `dispatch`, `id_of_name`, `state_of_extension`. Wire dispatch
|
||||||
|
into Phase A's ref at module init.
|
||||||
|
- **Phase C** — Compiler-side opcode lookup primitive (`extension-opcode-id`).
|
||||||
|
- **Phase D** — Test extension at `hosts/ocaml/lib/extensions/test_ext.ml`,
|
||||||
|
end-to-end SX → bytecode → VM dispatch flow.
|
||||||
|
- **Phase E** — JIT awareness: extension opcodes mark a lambda as
|
||||||
|
interpret-only.
|
||||||
|
|
||||||
|
## Per-fire workflow (hard)
|
||||||
|
|
||||||
|
1. Read `plans/sx-vm-opcode-extension.md` — find the first un-ticked phase.
|
||||||
|
2. Implement the phase (only files in `hosts/ocaml/**` and the plan file).
|
||||||
|
3. Build via `sx_build target=ocaml`.
|
||||||
|
4. Run regression: every existing language-port conformance suite plus
|
||||||
|
the OCaml unit tests. The list lives at `lib/<lang>/conformance.sh` —
|
||||||
|
13 suites at last count (apl, common-lisp, datalog, erlang, forth, guest,
|
||||||
|
haskell, js, lua, ocaml, prolog, smalltalk, tcl).
|
||||||
|
5. If green, commit (short factual message — `vm-ext: phase A — dispatch
|
||||||
|
fallthrough` style).
|
||||||
|
6. Tick the `[ ]` for the completed phase in the plan, append one dated
|
||||||
|
line to the Progress log (newest first).
|
||||||
|
7. Stop. Wait for the next fire.
|
||||||
|
|
||||||
|
## Ground rules (hard)
|
||||||
|
|
||||||
|
- **Scope:** only `hosts/ocaml/**` and `plans/sx-vm-opcode-extension.md`.
|
||||||
|
Do **not** edit `lib/<lang>/**`, `spec/**`, `shared/**`, or any other
|
||||||
|
language port's tests.
|
||||||
|
- **One phase per fire.** Don't combine phases even if a phase looks small.
|
||||||
|
The point of the loop is incremental commits.
|
||||||
|
- **Commit locally only.** Do **not** push. Do **not** touch `main`.
|
||||||
|
- **Worktree:** you are on `loops/sx-vm-extensions` in
|
||||||
|
`/root/rose-ash-loops/sx-vm-extensions`.
|
||||||
|
- **OCaml SX VM gotchas:**
|
||||||
|
- `vm` and `frame` types are defined in `sx_vm.ml`, not `sx_types.ml`.
|
||||||
|
Forward refs (like the existing `jit_compile_ref` pattern) are how
|
||||||
|
sibling modules avoid circular dependency.
|
||||||
|
- Current core opcode ceiling is 175 (OP_DEC). The extension threshold
|
||||||
|
is 200, leaving 24 spare slots for future core opcodes.
|
||||||
|
- JIT compilation is lazy per-lambda. See `project_jit_compilation.md`
|
||||||
|
in memory for the cache + sentinel pattern.
|
||||||
|
- **SX edits:** `sx-tree` MCP tools only (none expected for this loop, but
|
||||||
|
if needed).
|
||||||
|
- **OCaml edits:** Edit/Write tools are fine — these aren't `.sx` files.
|
||||||
|
|
||||||
|
## Done condition
|
||||||
|
|
||||||
|
Phase E acceptance: all 13 (or however many exist at the time) language-port
|
||||||
|
conformance suites pass, OCaml unit tests pass, the test extension from
|
||||||
|
Phase D demonstrates end-to-end flow including JIT routing. Loop is
|
||||||
|
complete; mark and stop.
|
||||||
|
|
||||||
|
## After acceptance
|
||||||
|
|
||||||
|
Hand off to the Erlang loop: `hosts/ocaml/lib/extensions/erlang.ml` becomes
|
||||||
|
the first real consumer, written against this mechanism instead of the
|
||||||
|
Phase 9b stub dispatcher in `lib/erlang/vm/dispatcher.sx`.
|
||||||
555
plans/sx-vm-opcode-extension.md
Normal file
555
plans/sx-vm-opcode-extension.md
Normal file
@@ -0,0 +1,555 @@
|
|||||||
|
# SX VM Opcode Extension Mechanism
|
||||||
|
|
||||||
|
Mechanism in `hosts/ocaml/lib/` that lets language ports register specialized
|
||||||
|
bytecode opcodes without modifying the SX VM core. Direct prerequisite for
|
||||||
|
**erlang-on-sx Phase 9** (the BEAM analog) and a structural enabler for any
|
||||||
|
future language port that wants performance-critical opcodes.
|
||||||
|
|
||||||
|
Reference: `plans/erlang-on-sx.md` Phase 9, `plans/fed-sx-design.md` §17.5,
|
||||||
|
`hosts/ocaml/lib/sx_vm.ml` (current VM).
|
||||||
|
|
||||||
|
Status: **complete** on `loops/sx-vm-extensions` (Phases A-E landed
|
||||||
|
2026-05-14 / 2026-05-15). Ready for first real consumer
|
||||||
|
(`hosts/ocaml/lib/extensions/erlang.ml`, replacing the Phase 9b stub
|
||||||
|
dispatcher in `lib/erlang/vm/dispatcher.sx`).
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Goal
|
||||||
|
|
||||||
|
Allow language ports to register custom bytecode opcodes in the SX VM, with:
|
||||||
|
|
||||||
|
- **Zero overhead for core opcodes.** Existing opcodes (current ceiling 175,
|
||||||
|
see `sx_vm.ml`) must dispatch identically. No regression for any existing
|
||||||
|
language port or the core SX runtime.
|
||||||
|
- **One additional dispatch step for extension opcodes.** Acceptable cost; the
|
||||||
|
win comes from avoiding the general CEK machinery.
|
||||||
|
- **Per-extension state slot.** Erlang's process scheduler, Haskell's thunk
|
||||||
|
cache, etc. need somewhere to hang state alongside the VM.
|
||||||
|
- **Compiler awareness.** The bytecode compiler (`lib/compiler.sx`) must be
|
||||||
|
able to emit extension opcodes by name, looked up against the registered
|
||||||
|
set.
|
||||||
|
- **JIT compatibility.** Existing JIT (lazy lambda compilation) continues to
|
||||||
|
work for code paths using only core opcodes. Extension opcodes are
|
||||||
|
interpreted in v1; JITing them is a follow-up.
|
||||||
|
|
||||||
|
## Non-goals
|
||||||
|
|
||||||
|
- **Hot opcode reload.** Adding/replacing opcodes mid-runtime is not in
|
||||||
|
scope. Extensions are compile-time additions to the OCaml binary. (If
|
||||||
|
needed, that's a separate project.)
|
||||||
|
- **Per-instance opcode sets.** All running instances of the SX VM share
|
||||||
|
the same opcode set determined at build time. Selective opcode loading
|
||||||
|
per instance is out of scope.
|
||||||
|
- **Opcode hot-swap or supersession.** Once registered, opcodes are stable
|
||||||
|
for the lifetime of the binary.
|
||||||
|
- **Language-port isolation at the dispatch layer.** Two language ports can
|
||||||
|
see each other's opcodes (they share the dispatch table). Isolation is a
|
||||||
|
build-time concern — don't compile in extensions you don't trust.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Why now
|
||||||
|
|
||||||
|
The Erlang-on-SX Phase 9 work needs this. Without it, Phase 9b-9g (the actual
|
||||||
|
opcode implementations) have nowhere to plug in. The Erlang loop hit this
|
||||||
|
dependency as a Blocker (`0abf05ed`); this design is what unblocks it.
|
||||||
|
|
||||||
|
It also enables the **shared opcode pattern** discussed in `plans/fed-sx-
|
||||||
|
design.md` §17.5: opcodes Erlang Phase 9 produces that other ports could
|
||||||
|
plausibly use (pattern match, perform/handle, record access) get chiselled
|
||||||
|
out to `lib/guest/vm/` when a second port has an actual second use. Without
|
||||||
|
the extension mechanism, each port would have to fork the SX VM core or
|
||||||
|
modify shared dispatch — neither acceptable.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architectural overview
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────────────────────────────────────┐
|
||||||
|
│ SX VM core (hosts/ocaml/lib/sx_vm.ml) │
|
||||||
|
│ │
|
||||||
|
│ ┌────────────────────────────────────┐ │
|
||||||
|
│ │ Bytecode dispatch loop │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ match op with │ │
|
||||||
|
│ │ | 1 (OP_CONST) -> ... │ │
|
||||||
|
│ │ | 2 (OP_NIL) -> ... │ │
|
||||||
|
│ │ | ... │ │
|
||||||
|
│ │ | 175 -> ... (last core opcode) │ │
|
||||||
|
│ │ | op when op >= 200 -> │ │
|
||||||
|
│ │ !extension_dispatch_ref op │ │ ◄── new
|
||||||
|
│ │ vm frame │ │
|
||||||
|
│ └────────────────────────────────────┘ │
|
||||||
|
│ │
|
||||||
|
│ ┌────────────────────────────────────┐ │
|
||||||
|
│ │ Extension registry │ │
|
||||||
|
│ │ opcode_id -> handler │ │ ◄── Phase B
|
||||||
|
│ │ opcode_name -> opcode_id │ │
|
||||||
|
│ │ extension_state per extension │ │
|
||||||
|
│ └────────────────────────────────────┘ │
|
||||||
|
└──────────────────────────────────────────┘
|
||||||
|
▲
|
||||||
|
│ register at startup
|
||||||
|
┌──────────────────┴──────────────────────┐
|
||||||
|
│ Extension modules │
|
||||||
|
│ hosts/ocaml/lib/extensions/erlang.ml │
|
||||||
|
│ hosts/ocaml/lib/extensions/haskell.ml │
|
||||||
|
│ hosts/ocaml/lib/extensions/datalog.ml │
|
||||||
|
│ hosts/ocaml/lib/extensions/guest_vm.ml │ ◄── shared opcodes
|
||||||
|
└─────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Opcode ID space partition
|
||||||
|
|
||||||
|
Current SX VM uses opcode IDs from 1 to 175 (per inspection of `sx_vm.ml`,
|
||||||
|
ceiling at OP_DEC = 175). We partition the 0-255 space:
|
||||||
|
|
||||||
|
| Range | Use |
|
||||||
|
|---------|------------------------------------------------------------------|
|
||||||
|
| 0 | reserved / NOP |
|
||||||
|
| 1-199 | **core opcodes** — owned by the SX VM, locked schema |
|
||||||
|
| 200-247 | **extension opcodes** — registered by extensions (ports + shared) |
|
||||||
|
| 248-255 | reserved for future expansion / multi-byte opcodes |
|
||||||
|
|
||||||
|
This gives the core 24 free slots above the current 175 ceiling for future
|
||||||
|
core additions, and 48 slots for extensions. Erlang Phase 9 expects to need
|
||||||
|
fewer than 30 specialized opcodes, so this is comfortable headroom.
|
||||||
|
|
||||||
|
The plan originally proposed a finer split (`128-199` for `lib/guest/vm/`
|
||||||
|
shared, `200-247` for ports). That distinction is preserved at the **naming
|
||||||
|
level** (`guest_vm.OP_X` vs `erlang.OP_Y`) and policed by the registry
|
||||||
|
(duplicate IDs fail at startup), without consuming separate ID ranges. The
|
||||||
|
chiselling discipline (move an opcode to `guest_vm` when a second port uses
|
||||||
|
it) operates at the source level.
|
||||||
|
|
||||||
|
If we need more than 256 opcodes total, multi-byte opcodes (a leading 248-255
|
||||||
|
byte plus a second byte) extend the space without breaking the schema.
|
||||||
|
|
||||||
|
### Extension module signature
|
||||||
|
|
||||||
|
```ocaml
|
||||||
|
(* hosts/ocaml/lib/sx_vm_extension.ml *)
|
||||||
|
|
||||||
|
(** A handler for an extension opcode. Reads operands from bytecode,
|
||||||
|
manipulates the VM stack, updates the frame's instruction pointer.
|
||||||
|
May raise exceptions (which propagate via the existing VM error path). *)
|
||||||
|
type handler = vm -> frame -> unit
|
||||||
|
|
||||||
|
(** State an extension carries alongside the VM. Opaque to the VM core;
|
||||||
|
extensions cast as needed. *)
|
||||||
|
type extension_state = ..
|
||||||
|
|
||||||
|
module type EXTENSION = sig
|
||||||
|
(** Stable name for this extension (e.g. "erlang", "guest_vm"). *)
|
||||||
|
val name : string
|
||||||
|
|
||||||
|
(** Initialize per-instance state. Called once when the VM starts and the
|
||||||
|
extension is loaded. *)
|
||||||
|
val init : unit -> extension_state
|
||||||
|
|
||||||
|
(** Opcodes this extension provides. Each is (opcode_id, opcode_name, handler).
|
||||||
|
opcode_id must be in 200-247. Conflicts cause startup failure. *)
|
||||||
|
val opcodes : extension_state -> (int * string * handler) list
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
### Registration and dispatch
|
||||||
|
|
||||||
|
```ocaml
|
||||||
|
(* hosts/ocaml/lib/sx_vm_extensions.ml *)
|
||||||
|
|
||||||
|
let extensions : (module EXTENSION) list ref = ref []
|
||||||
|
let states : (string, extension_state) Hashtbl.t = Hashtbl.create 8
|
||||||
|
let by_id : (int, handler) Hashtbl.t = Hashtbl.create 64
|
||||||
|
let by_name : (string, int) Hashtbl.t = Hashtbl.create 64
|
||||||
|
|
||||||
|
let register (m : (module EXTENSION)) =
|
||||||
|
let module M = (val m) in
|
||||||
|
let st = M.init () in
|
||||||
|
Hashtbl.add states M.name st;
|
||||||
|
List.iter (fun (id, name, h) ->
|
||||||
|
if Hashtbl.mem by_id id then
|
||||||
|
failwith (Printf.sprintf "Opcode %d (%s) already registered" id name);
|
||||||
|
Hashtbl.add by_id id h;
|
||||||
|
Hashtbl.add by_name name id
|
||||||
|
) (M.opcodes st);
|
||||||
|
extensions := m :: !extensions
|
||||||
|
|
||||||
|
let dispatch op vm frame =
|
||||||
|
match Hashtbl.find_opt by_id op with
|
||||||
|
| Some handler -> handler vm frame
|
||||||
|
| None -> raise (Invalid_opcode op)
|
||||||
|
|
||||||
|
let id_of_name name = Hashtbl.find_opt by_name name
|
||||||
|
let state_of_extension name = Hashtbl.find_opt states name
|
||||||
|
```
|
||||||
|
|
||||||
|
Phase B installs this dispatcher into `Sx_vm.extension_dispatch_ref` at
|
||||||
|
module init. Until then, the ref's default raises `Invalid_opcode op` for
|
||||||
|
any opcode ≥ 200, which is the Phase A test condition.
|
||||||
|
|
||||||
|
The dispatch path adds **one hashtable lookup per extension opcode**.
|
||||||
|
Acceptable cost — and Erlang's specialized opcodes win >100× over going
|
||||||
|
through the general CEK machine, so the overhead is negligible by comparison.
|
||||||
|
|
||||||
|
### Bytecode compiler integration
|
||||||
|
|
||||||
|
The compiler (`lib/compiler.sx`) needs to know extension opcode IDs to emit
|
||||||
|
them. New SX primitive exposed to the compiler:
|
||||||
|
|
||||||
|
```sx
|
||||||
|
(extension-opcode-id "erlang.OP_PATTERN_TUPLE_2") ; → 200, or nil if not loaded
|
||||||
|
```
|
||||||
|
|
||||||
|
When the compiler wants to emit a specialized opcode, it queries by name. If
|
||||||
|
the extension isn't loaded, the compiler falls back to the general path
|
||||||
|
(emit a `CALL_PRIM` or general SX `case`). This means a language port's
|
||||||
|
optimization is opt-in per build, and missing extensions degrade to slower
|
||||||
|
correct execution rather than failure.
|
||||||
|
|
||||||
|
Naming convention: `<extension-name>.OP_<NAME>`. So `erlang.OP_PATTERN_TUPLE_2`,
|
||||||
|
`guest_vm.OP_PERFORM`, etc.
|
||||||
|
|
||||||
|
### Per-extension state access
|
||||||
|
|
||||||
|
Some opcodes need state beyond the VM stack (Erlang's scheduler, mailbox
|
||||||
|
state, etc.). Extensions store state in their `init`-returned value, accessed
|
||||||
|
via `state_of_extension`:
|
||||||
|
|
||||||
|
```ocaml
|
||||||
|
let op_spawn vm frame =
|
||||||
|
let st = Sx_vm_extensions.state_of_extension "erlang"
|
||||||
|
|> Option.get
|
||||||
|
|> Obj.magic in (* extension casts to its known type *)
|
||||||
|
let body = pop vm in
|
||||||
|
let pid = Erlang_scheduler.spawn st body in
|
||||||
|
push vm (pid_value pid);
|
||||||
|
frame.ip <- frame.ip + 1
|
||||||
|
```
|
||||||
|
|
||||||
|
Shared scheduler state lives in the Erlang extension's state value. Other
|
||||||
|
extensions don't see it.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Phase plan
|
||||||
|
|
||||||
|
Five sub-phases in dependency order. Each is testable in isolation.
|
||||||
|
|
||||||
|
### Phase A — Opcode ID partition + dispatch fallthrough
|
||||||
|
|
||||||
|
- [x] Define `exception Invalid_opcode of int` in `sx_vm.ml`.
|
||||||
|
- [x] Add `extension_dispatch_ref : (int -> vm -> frame -> unit) ref`
|
||||||
|
whose default handler raises `Invalid_opcode op`. Forward-declared in
|
||||||
|
the same style as the existing `jit_compile_ref`.
|
||||||
|
- [x] Add `| op when op >= 200 -> !extension_dispatch_ref op vm frame` arm
|
||||||
|
in the dispatch loop, immediately before the catch-all.
|
||||||
|
- [x] Document the partition in a comment near the top of the opcode list.
|
||||||
|
|
||||||
|
**Tests:**
|
||||||
|
- All existing OCaml VM/CEK tests pass unchanged (zero regression for core).
|
||||||
|
- Constructed bytecode using opcode 200 raises `Invalid_opcode 200` when no
|
||||||
|
extension is registered.
|
||||||
|
|
||||||
|
**Effort:** small. ~50 lines + tests.
|
||||||
|
|
||||||
|
### Phase B — Extension registry module
|
||||||
|
|
||||||
|
`hosts/ocaml/lib/sx_vm_extensions.ml` per the sketch above. Pure plumbing, no
|
||||||
|
opcodes yet. Phase B's module init installs the real `dispatch` into
|
||||||
|
`Sx_vm.extension_dispatch_ref`, replacing Phase A's stub.
|
||||||
|
|
||||||
|
- [x] `Sx_vm_extension` interface module (handler type, EXTENSION sig).
|
||||||
|
- [x] `Sx_vm_extensions` registry module (`register`, `dispatch`,
|
||||||
|
`id_of_name`, `state_of_extension`).
|
||||||
|
- [x] Wire the registry's `dispatch` into `Sx_vm.extension_dispatch_ref` at
|
||||||
|
module init.
|
||||||
|
|
||||||
|
**Tests:**
|
||||||
|
- Register a test extension with one opcode; dispatch finds it.
|
||||||
|
- Duplicate opcode-id registration fails at startup.
|
||||||
|
- `id_of_name` and `state_of_extension` lookups work.
|
||||||
|
|
||||||
|
**Effort:** small. ~150 lines + tests.
|
||||||
|
|
||||||
|
### Phase C — Compiler-side opcode lookup primitive
|
||||||
|
|
||||||
|
Expose `extension-opcode-id` as an SX primitive in `hosts/ocaml/lib/`. The
|
||||||
|
compiler in `lib/compiler.sx` can call it to emit extension opcodes by name.
|
||||||
|
|
||||||
|
Does not require any extension to actually exist — the primitive returns
|
||||||
|
`nil` for unknown names, and the compiler falls back.
|
||||||
|
|
||||||
|
- [x] Register `extension-opcode-id` in `sx_primitives.ml`.
|
||||||
|
- [x] Returns `Integer id` when registered, `Nil` otherwise.
|
||||||
|
|
||||||
|
**Tests:**
|
||||||
|
- Primitive returns nil for unknown name.
|
||||||
|
- After registering a test extension, primitive returns the registered ID.
|
||||||
|
|
||||||
|
**Effort:** small. Single primitive registration + compiler-side use docs.
|
||||||
|
|
||||||
|
### Phase D — Test extension demonstrating end-to-end flow
|
||||||
|
|
||||||
|
A dummy extension at `hosts/ocaml/lib/extensions/test_ext.ml` registering
|
||||||
|
one or two trivial opcodes (e.g. `OP_TEST_PUSH_42`, `OP_TEST_DOUBLE_TOS`).
|
||||||
|
Wired into the build, available when running tests.
|
||||||
|
|
||||||
|
Compiler test: write SX that triggers the test compiler-extension to emit
|
||||||
|
`OP_TEST_PUSH_42`, then verify the VM executes it correctly via
|
||||||
|
`bytecode-inspect` and `vm-trace`.
|
||||||
|
|
||||||
|
- [x] `test_ext.ml` registers two opcodes.
|
||||||
|
- [x] Wired into the build (extensions registered at startup).
|
||||||
|
- [x] Bytecode emission via name lookup produces the right ID.
|
||||||
|
- [x] `bytecode-inspect` shows the opcode by name.
|
||||||
|
|
||||||
|
**Tests:**
|
||||||
|
- Bytecode emission via name lookup produces the right ID.
|
||||||
|
- Execution produces the expected stack effect.
|
||||||
|
- `bytecode-inspect` shows the opcode by name.
|
||||||
|
- `vm-trace` correctly reports the extension opcode.
|
||||||
|
|
||||||
|
**Effort:** small. ~100 lines including build wiring.
|
||||||
|
|
||||||
|
### Phase E — JIT awareness (interpreted-only for v1)
|
||||||
|
|
||||||
|
The JIT (lazy lambda compilation) currently compiles based on opcode ranges.
|
||||||
|
Extension opcodes (≥200) should fall through to interpretation, not be
|
||||||
|
JIT-compiled in v1.
|
||||||
|
|
||||||
|
- [x] Mark extension opcodes as "interpret only" in the JIT pre-analysis.
|
||||||
|
- [x] Lambda containing only core opcodes JIT-compiles as before.
|
||||||
|
- [x] Lambda containing any extension opcode runs interpreted.
|
||||||
|
|
||||||
|
JITing extension opcodes is a follow-up project; v1 keeps the JIT scope
|
||||||
|
unchanged and just makes it correctly route mixed bytecode.
|
||||||
|
|
||||||
|
**Tests:**
|
||||||
|
- Lambda with only core opcodes: JIT-compiled, fast path.
|
||||||
|
- Lambda with extension opcode: interpreted, correct result.
|
||||||
|
- Mixed lambda: interpreted, correct result.
|
||||||
|
|
||||||
|
**Effort:** small-medium. Requires understanding the JIT's pre-analysis
|
||||||
|
(per `project_jit_compilation.md` memory: "Lazy JIT implemented: lambda
|
||||||
|
bodies compiled on first VM call, cached, failures sentinel-marked").
|
||||||
|
Extension-opcode detection becomes another reason to mark a lambda
|
||||||
|
"interpret-only."
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Acceptance criteria
|
||||||
|
|
||||||
|
1. **Phase A-D pass their test suites.**
|
||||||
|
2. **Zero regression on existing SX VM tests.** All language-port test
|
||||||
|
suites currently passing on the architecture branch (Erlang 530+, Haskell
|
||||||
|
285+, Datalog 276+, Smalltalk 625+, the SX core test suite, etc.) still
|
||||||
|
pass.
|
||||||
|
3. **Test extension demonstrates the flow end-to-end.** SX source compiles
|
||||||
|
via the compiler with a registered extension opcode, executes through the
|
||||||
|
VM via the dispatch fallthrough, returns correct result.
|
||||||
|
4. **Documentation:** README in `hosts/ocaml/lib/extensions/` explaining the
|
||||||
|
pattern, with a worked example (the test extension is the canonical one).
|
||||||
|
|
||||||
|
After acceptance, the Erlang-on-SX Phase 9 work in `lib/erlang/vm/` can use
|
||||||
|
this mechanism. The Erlang loop's Blocker for 9a is resolved.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Risk and mitigation
|
||||||
|
|
||||||
|
**Risk: regression in core opcode dispatch.** A misplaced `match` arm could
|
||||||
|
break something. *Mitigation:* run every existing language-port conformance
|
||||||
|
suite before merging.
|
||||||
|
|
||||||
|
**Risk: opcode ID conflicts as more extensions land.** If Erlang Phase 9
|
||||||
|
claims IDs 200-220 and Haskell wants 215-235, we have a problem.
|
||||||
|
*Mitigation:* maintain a registry document at `hosts/ocaml/lib/extensions/
|
||||||
|
README.md` listing claimed ID ranges per extension. Convention: each
|
||||||
|
extension claims a contiguous block at first registration; collisions caught
|
||||||
|
at startup with a clear error.
|
||||||
|
|
||||||
|
**Risk: extension state types leak through `Obj.magic`.** The extension state
|
||||||
|
is type-erased in the registry. *Mitigation:* extensions cast in their own
|
||||||
|
opcode handlers, never expose state to other extensions or the VM core.
|
||||||
|
First-class modules / GADTs could add more type safety; deferred unless
|
||||||
|
this becomes a concrete pain point.
|
||||||
|
|
||||||
|
**Risk: extensions become a back door for kernel mutation.** An extension
|
||||||
|
opcode handler has full access to the VM. *Mitigation:* extensions are
|
||||||
|
build-time additions, not runtime; they're as trusted as the rest of the
|
||||||
|
binary. Operators audit at build time, not runtime. Same trust model as
|
||||||
|
any other compiled-in code.
|
||||||
|
|
||||||
|
**Risk: shared `lib/guest/vm/` opcodes evolve under different language
|
||||||
|
ports' needs.** *Mitigation:* the chiselling discipline (move to guest only
|
||||||
|
on second use) ensures the shared opcodes are tested against at least two
|
||||||
|
ports' actual usage before being considered stable.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Open questions
|
||||||
|
|
||||||
|
To be resolved during implementation, not blocking design approval:
|
||||||
|
|
||||||
|
1. **Multi-byte opcode encoding.** If we need >256 opcodes total, the
|
||||||
|
leading-byte 248-255 schema accommodates it. Do we need multi-byte at
|
||||||
|
v1? Probably not — 48 extension opcodes is more than any single port
|
||||||
|
should reasonably want.
|
||||||
|
2. **Extension ordering matters?** If two extensions register opcodes that
|
||||||
|
read the same VM state, ordering of registration could matter for
|
||||||
|
initialization. Probably not in practice; flag if it bites.
|
||||||
|
3. **Hot-reload of extensions.** Out of scope for v1 (per non-goals). If
|
||||||
|
wanted later, the registry would need teardown + re-registration; the
|
||||||
|
`gen_server` `code_change/3` model from Erlang Phase 7 is a precedent.
|
||||||
|
4. **Cross-extension opcode composition.** Can `guest_vm.OP_PERFORM` invoke
|
||||||
|
`erlang.OP_RECEIVE_SCAN`? In principle yes — handlers can do anything.
|
||||||
|
The interface is clean; the question is whether we want any conventions
|
||||||
|
to keep ergonomics tractable. Defer until composition appears in
|
||||||
|
practice.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Implementation roadmap and sequencing
|
||||||
|
|
||||||
|
This is a sister workstream to `loops/erlang`. Driven by Erlang Phase 9.
|
||||||
|
Single bounded loop on `loops/sx-vm-extensions`, ~1-2 weeks.
|
||||||
|
|
||||||
|
Recommended sequencing (one phase per loop fire):
|
||||||
|
|
||||||
|
1. **Phase A** — dispatch fallthrough. Smallest viable change to `sx_vm.ml`.
|
||||||
|
2. **Phase B** — extension registry module.
|
||||||
|
3. **Phase C** — compiler-side opcode lookup primitive.
|
||||||
|
4. **Phase D** — test extension demonstrating end-to-end flow.
|
||||||
|
5. **Phase E** — JIT awareness (interpret-only routing).
|
||||||
|
|
||||||
|
After acceptance:
|
||||||
|
|
||||||
|
- **`hosts/ocaml/lib/extensions/erlang.ml`** becomes the *first real
|
||||||
|
consumer* — written by whoever takes over from the Erlang loop's stub
|
||||||
|
dispatcher in `lib/erlang/vm/dispatcher.sx`. That's the integration
|
||||||
|
moment that closes the loop.
|
||||||
|
|
||||||
|
Estimated total effort: 1-2 weeks for one focused engineer with OCaml SX VM
|
||||||
|
familiarity.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Relationship to other plans
|
||||||
|
|
||||||
|
- **`plans/erlang-on-sx.md` Phase 9:** unblocked by this work. Erlang loop
|
||||||
|
develops opcodes against a stub dispatcher in `lib/erlang/vm/`; once this
|
||||||
|
mechanism lands, swap stub for real registration via
|
||||||
|
`hosts/ocaml/lib/extensions/erlang.ml`.
|
||||||
|
- **`plans/fed-sx-design.md` §17.5:** documents this as Layer-1 prerequisite.
|
||||||
|
The shared-opcode discipline (lib/guest/vm/) is designed on top of this
|
||||||
|
mechanism's namespace allocation.
|
||||||
|
- **Future language ports (Haskell, Datalog, Smalltalk perf phases):** will
|
||||||
|
use the same mechanism. Each adds an extension module, claims an opcode
|
||||||
|
range, registers handlers. The `lib/guest/vm/` opcodes get
|
||||||
|
cross-referenced when the second port's needs justify chiselling.
|
||||||
|
- **JIT roadmap (per `project_jit_architecture.md` memory):** extension
|
||||||
|
opcodes are interpreted in v1. JITing them is a logical follow-up but
|
||||||
|
a separate project.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Progress log
|
||||||
|
|
||||||
|
Newest first.
|
||||||
|
|
||||||
|
- **2026-05-15** — Phase E done. Loop complete (acceptance criteria
|
||||||
|
1-4 all met). New `Sx_vm.bytecode_uses_extension_opcodes` walks
|
||||||
|
bytecode operand-aware (CONST u16 indices, CALL_PRIM u16+u8,
|
||||||
|
CLOSURE u16+dynamic upvalue descriptors) so values that happen to
|
||||||
|
be ≥200 don't false-positive as extension opcodes. Wired into
|
||||||
|
`jit_compile_lambda`: when the inner closure's bytecode contains
|
||||||
|
any extension opcode, JIT returns None and the lambda runs
|
||||||
|
interpreted via CEK (the dispatch fallthrough still routes
|
||||||
|
extension opcodes through the registry — this just prevents the
|
||||||
|
JIT from claiming ownership of code it can't optimise). 7 new
|
||||||
|
foundation tests (`jit extension-opcode awareness` suite): pure
|
||||||
|
core eligible, head/middle/post-CLOSURE detection, CONST + CALL_PRIM
|
||||||
|
+ CLOSURE-descriptor false-positive avoidance. +7 pass vs Phase D
|
||||||
|
baseline (4833 vs 4826), 1111 pre-existing failures unchanged.
|
||||||
|
Conformance suites green: erlang 530/530, haskell 285/285, datalog
|
||||||
|
276/276, prolog 590/590, smalltalk 847/847, common-lisp 487/487,
|
||||||
|
apl 562/562, js 148/148, forth 632/638 (pre-existing), tcl 3/4
|
||||||
|
(pre-existing), ocaml-on-sx unit 607/607.
|
||||||
|
|
||||||
|
Loop done. Hand-off: the Erlang loop's Phase 9b stub dispatcher in
|
||||||
|
`lib/erlang/vm/dispatcher.sx` can now be replaced with a real
|
||||||
|
`hosts/ocaml/lib/extensions/erlang.ml` consumer.
|
||||||
|
|
||||||
|
- **2026-05-15** — Phase D done. New `hosts/ocaml/lib/extensions/` subtree
|
||||||
|
wired into the `sx` library via `(include_subdirs unqualified)`.
|
||||||
|
`extensions/test_ext.ml` is the canonical worked example: two
|
||||||
|
operand-less opcodes (`test_ext.OP_TEST_PUSH_42` = 220,
|
||||||
|
`test_ext.OP_TEST_DOUBLE_TOS` = 221) carrying `TestExtState` (an
|
||||||
|
invocation counter that exercises the per-extension state slot).
|
||||||
|
`extensions/README.md` documents the registration pattern, opcode-ID
|
||||||
|
range conventions, and naming rules.
|
||||||
|
|
||||||
|
`Sx_vm.opcode_name` now consults `extension_opcode_name_ref` (forward
|
||||||
|
ref) so disassembly shows extension opcodes by name instead of
|
||||||
|
`UNKNOWN_n`. Registry maintains `name_of_id_table` (reverse of
|
||||||
|
`by_name`) and installs the lookup at module init alongside the
|
||||||
|
dispatch ref. 5 new foundation tests (`extensions/test_ext` suite):
|
||||||
|
`extension-opcode-id` finds OP_TEST_PUSH_42, end-to-end bytecode runs
|
||||||
|
to 84, disassemble shows opcode names, unregistered ext opcodes still
|
||||||
|
fall back to UNKNOWN_n, per-extension state counter increments.
|
||||||
|
+5 pass vs Phase C baseline (4826 vs 4821), 1111 pre-existing failures
|
||||||
|
unchanged. Conformance suites green: erlang 530/530, haskell 285/285,
|
||||||
|
datalog 276/276, prolog 590/590, smalltalk 847/847, common-lisp
|
||||||
|
487/487, apl 562/562, js 148/148, forth 632/638 (pre-existing), tcl
|
||||||
|
3/4 (pre-existing), ocaml-on-sx unit 607/607.
|
||||||
|
|
||||||
|
- **2026-05-15** — Phase C done. `extension-opcode-id` SX primitive
|
||||||
|
registered from `sx_vm_extensions.ml` module init (avoids the
|
||||||
|
`sx_primitives ↔ sx_vm` cycle by registering downstream of both).
|
||||||
|
Accepts a string or symbol; returns `Integer id` for registered
|
||||||
|
opcode names, `Nil` for unknown — so a missing extension at compile
|
||||||
|
time degrades to a fallback rather than failure. 5 new foundation
|
||||||
|
tests (`extension-opcode-id primitive` suite): registered lookup,
|
||||||
|
unknown → nil, symbol arg, zero-arg rejection, integer-arg
|
||||||
|
rejection. +5 pass vs Phase B baseline (4821 vs 4816), 1111
|
||||||
|
pre-existing failures unchanged. Conformance suites green: erlang
|
||||||
|
530/530, haskell 285/285, datalog 276/276, prolog 590/590, smalltalk
|
||||||
|
847/847, common-lisp 487/487, apl 562/562, js 148/148, forth 632/638
|
||||||
|
(pre-existing), tcl 3/4 (pre-existing), ocaml-on-sx unit 607/607.
|
||||||
|
|
||||||
|
- **2026-05-14** — Phase B done. Added `hosts/ocaml/lib/sx_vm_extension.ml`
|
||||||
|
(interface: `handler` type, `extension_state` extensible variant,
|
||||||
|
`EXTENSION` module type) and `sx_vm_extensions.ml` (registry: `register`,
|
||||||
|
`dispatch`, `id_of_name`, `state_of_extension`, `_reset_for_tests`).
|
||||||
|
`let () = install_dispatch ()` at module init replaces Phase A's stub
|
||||||
|
with the real registry dispatch — Phase A behavior preserved (empty
|
||||||
|
registry still raises `Invalid_opcode` for unregistered ops). Registry
|
||||||
|
rejects opcode IDs outside 200-247, duplicate IDs, duplicate names, and
|
||||||
|
duplicate extension names. 9 new foundation tests (`vm-extension-registry`
|
||||||
|
suite): id_of_name resolve+miss, state_of_extension resolve+miss,
|
||||||
|
end-to-end VM dispatch (push 42), opcode composition (push 42 → double
|
||||||
|
→ 84), duplicate-id / out-of-range / duplicate-name rejection. +9 pass
|
||||||
|
vs Phase A baseline (4816 vs 4807), 1111 pre-existing failures unchanged.
|
||||||
|
Conformance suites green: erlang 530/530, haskell 285/285, datalog
|
||||||
|
276/276, prolog 590/590, smalltalk 847/847, common-lisp 487/487, apl
|
||||||
|
562/562, js 148/148, forth 632/638 (pre-existing), tcl 3/4 (pre-existing),
|
||||||
|
ocaml-on-sx unit 607/607.
|
||||||
|
|
||||||
|
- **2026-05-14** — Phase A done. Added `Invalid_opcode of int` exception,
|
||||||
|
`extension_dispatch_ref` (default raises `Invalid_opcode op`), and the
|
||||||
|
`| op when op >= 200 -> !extension_dispatch_ref op vm frame` arm before the
|
||||||
|
catch-all in `sx_vm.ml`. Partition comment documents 1-199 core / 200-247
|
||||||
|
extensions / 248-255 reserved (current core ceiling is OP_DEC = 175).
|
||||||
|
4 new foundation tests (3 × Invalid_opcode for opcodes 200/224/247, 1 ×
|
||||||
|
Eval_error for opcode 199 to pin the threshold). Foundation 64/64;
|
||||||
|
full OCaml test suite +4 pass vs baseline (4807 vs 4803), 1111 pre-existing
|
||||||
|
failures unchanged. Conformance suites green: erlang 530/530, haskell
|
||||||
|
285/285, datalog 276/276, prolog 590/590, smalltalk 847/847, common-lisp
|
||||||
|
305/305, apl 562/562, js 148/148, forth 632/638 (pre-existing), tcl 3/4
|
||||||
|
(pre-existing), ocaml-on-sx unit 607/607. (Lua 0/16 and ocaml-conformance
|
||||||
|
baseline programs not exercised — pre-existing scoreboard state and
|
||||||
|
multi-hour runtime respectively.)
|
||||||
|
|
||||||
Reference in New Issue
Block a user