From 4dfccc244d3b731059be02dcc3bbd6a0f0104bfc Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 15 May 2026 01:05:30 +0000 Subject: [PATCH] =?UTF-8?q?vm-ext:=20phase=20D=20=E2=80=94=20extensions/?= =?UTF-8?q?=20subtree=20+=20test=5Fext=20+=20opcode=5Fname=20lookup?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lib/extensions/ becomes the new home for VM extensions, wired in via (include_subdirs unqualified). README documents the registration pattern, opcode-ID range conventions (200-209 guest_vm, 210-219 inline test, 220-229 test_ext, 230-247 ports), and naming rules. extensions/test_ext.ml is the canonical worked example — two operand-less opcodes (220 push 42, 221 double TOS) carrying a per- extension state slot (TestExtState invocation counter). Test_ext.register called from run_tests.ml at the start of the Phase D suite, on top of the inline test_reg from earlier suites (disjoint opcode IDs). Sx_vm.opcode_name now consults extension_opcode_name_ref (forward ref in the same style as extension_dispatch_ref), so disassemble shows extension opcodes by name instead of UNKNOWN_n. Registry maintains name_of_id_table and installs the lookup at module init. Tests: 5 new foundation cases — primitive resolves test_ext name, end-to-end bytecode (push + double + return → 84), disassemble shows "test_ext.OP_TEST_PUSH_42" / "test_ext.OP_TEST_DOUBLE_TOS", unregistered ext opcodes still fall back to UNKNOWN_n, invocation counter records the two dispatches. +5 pass vs Phase C baseline, no regressions across 11 conformance suites. --- hosts/ocaml/bin/run_tests.ml | 81 +++++++++++++++++++++++++- hosts/ocaml/lib/dune | 4 ++ hosts/ocaml/lib/extensions/README.md | 71 ++++++++++++++++++++++ hosts/ocaml/lib/extensions/test_ext.ml | 67 +++++++++++++++++++++ hosts/ocaml/lib/sx_vm.ml | 14 ++++- hosts/ocaml/lib/sx_vm_extensions.ml | 20 +++++-- 6 files changed, 251 insertions(+), 6 deletions(-) create mode 100644 hosts/ocaml/lib/extensions/README.md create mode 100644 hosts/ocaml/lib/extensions/test_ext.ml diff --git a/hosts/ocaml/bin/run_tests.ml b/hosts/ocaml/bin/run_tests.ml index 4995b507..4145a56a 100644 --- a/hosts/ocaml/bin/run_tests.ml +++ b/hosts/ocaml/bin/run_tests.ml @@ -1518,7 +1518,86 @@ let run_foundation_tests () = Printf.printf " FAIL: integer arg should have raised\n" with Sx_types.Eval_error _ -> incr pass_count; - Printf.printf " PASS: integer arg rejected\n") + Printf.printf " PASS: integer arg rejected\n"); + + Printf.printf "\nSuite: extensions/test_ext (canonical extension)\n"; + (* Phase D: the real test extension lives at lib/extensions/test_ext.ml. + Register it on top of the inline test_reg from earlier suites — the + two use disjoint opcode IDs (210/211 vs 220/221) so they coexist. *) + Test_ext.register (); + + (* Lookup via the public primitive should now find OP_TEST_PUSH_42. *) + (match prim [String "test_ext.OP_TEST_PUSH_42"] with + | Integer 220 -> + incr pass_count; + Printf.printf " PASS: extension-opcode-id finds test_ext.OP_TEST_PUSH_42\n" + | other -> + incr fail_count; + Printf.printf " FAIL: opcode lookup: got %s\n" (Sx_types.inspect other)); + + (* End-to-end: PUSH_42 + DOUBLE_TOS + RETURN. *) + (let globals = Hashtbl.create 1 in + try + match Sx_vm.execute_module (make_bc_seq [| 220; 221; 50 |]) globals with + | Integer 84 -> + incr pass_count; + Printf.printf " PASS: extensions/test_ext bytecode executes (84)\n" + | other -> + incr fail_count; + Printf.printf " FAIL: test_ext bytecode result: got %s\n" + (Sx_types.inspect other) + with exn -> + incr fail_count; + Printf.printf " FAIL: test_ext bytecode raised: %s\n" + (Printexc.to_string exn)); + + (* Disassembly: opcode_name should resolve 220/221 via the registry, + not fall back to UNKNOWN_220 / UNKNOWN_221. disassemble returns a + Dict; the instruction list lives at key "bytecode". *) + (let code = make_bc_seq [| 220; 221; 50 |] in + let dis = Sx_vm.disassemble code in + let entries = match dis with + | Dict d -> (match Hashtbl.find_opt d "bytecode" with + | Some (List es) -> es + | _ -> []) + | _ -> [] + in + let names = List.filter_map (fun entry -> match entry with + | Dict d -> + (match Hashtbl.find_opt d "opcode" with + | Some (String name) -> Some name + | _ -> None) + | _ -> None) entries + in + let has name = List.mem name names in + if has "test_ext.OP_TEST_PUSH_42" && has "test_ext.OP_TEST_DOUBLE_TOS" then begin + incr pass_count; + Printf.printf " PASS: disassemble shows extension opcode names\n" + end else begin + incr fail_count; + Printf.printf " FAIL: disassemble names: [%s]\n" (String.concat ", " names) + end); + + (* Sanity: opcode_name on an unregistered extension opcode still + returns UNKNOWN_n. Pick 230 — out of test_ext's range. *) + (match Sx_vm.opcode_name 230 with + | "UNKNOWN_230" -> + incr pass_count; + Printf.printf " PASS: unregistered ext opcode falls back to UNKNOWN_n\n" + | other -> + incr fail_count; + Printf.printf " FAIL: opcode_name 230: got %s\n" other); + + (* Per-extension state: invocation_count should reflect the two opcodes + that ran in the dispatch test above. *) + (match Test_ext.invocation_count () with + | Some n when n >= 2 -> + incr pass_count; + Printf.printf " PASS: extension state recorded %d invocations\n" n + | other -> + incr fail_count; + Printf.printf " FAIL: invocation_count: %s\n" + (match other with Some n -> string_of_int n | None -> "None")) (* ====================================================================== *) diff --git a/hosts/ocaml/lib/dune b/hosts/ocaml/lib/dune index 0a5bf1a7..c20b5306 100644 --- a/hosts/ocaml/lib/dune +++ b/hosts/ocaml/lib/dune @@ -2,3 +2,7 @@ (name sx) (wrapped false) (libraries re re.pcre unix)) + +; Pull in extension modules from lib/extensions/ (test_ext.ml, etc). +; See plans/sx-vm-opcode-extension.md. +(include_subdirs unqualified) diff --git a/hosts/ocaml/lib/extensions/README.md b/hosts/ocaml/lib/extensions/README.md new file mode 100644 index 00000000..20ffff72 --- /dev/null +++ b/hosts/ocaml/lib/extensions/README.md @@ -0,0 +1,71 @@ +# SX VM extensions + +Each `*.ml` file here is a VM extension — a first-class OCaml module that +registers specialized bytecode opcodes with `Sx_vm_extensions`. See +[`plans/sx-vm-opcode-extension.md`](../../../../plans/sx-vm-opcode-extension.md) +for the design. + +## Pattern + +```ocaml +(* lib/extensions/myport.ml *) +open Sx_types + +type Sx_vm_extension.extension_state += MyportState of { ... } + +module M : Sx_vm_extension.EXTENSION = struct + let name = "myport" + let init () = MyportState { ... } + let opcodes _st = [ + (id, "myport.OP_NAME", handler); + ... + ] +end + +let register () = Sx_vm_extensions.register (module M) +``` + +Then call `Myport.register ()` once at startup from any binary that +should have the extension loaded. + +## Opcode-ID allocation + +Range 200-247 (per `Sx_vm_extensions.extension_min` / +`extension_max`). Conventions: + +| Range | Use | +|---------|-------------------------------------------------------------------------| +| 200-209 | reserved for `lib/guest/vm/` shared opcodes (chiselled out on 2nd use) | +| 210-219 | inline test extensions defined in `bin/run_tests.ml` | +| 220-229 | this directory's `test_ext` (the canonical template) | +| 230-247 | first-come-first-served by language ports (Erlang first) | + +When a port claims a contiguous block, document it in the table above. +The registry rejects collisions at startup with a loud error — there is +no silent shadowing. + +## Naming + +Always prefix opcode names with the extension name plus a dot: +`myport.OP_`. The prefix is a hard convention so that multiple +extensions can share the global opcode-name namespace cleanly. + +## State + +`extension_state` is an extensible variant. Add your case (e.g. +`MyportState of { ... }`) at the top of your file, return it from +`init`, and pattern-match it inside your handlers. Other extensions +cannot see your state — the variant case is private to your module. + +## Testing + +`test_ext.ml` is the canonical worked example. `bin/run_tests.ml` +calls `Test_ext.register ()`, then drives bytecode that exercises the +opcodes end-to-end (push, double, dispatch, disassemble, invocation +counter). Mirror this shape when adding a real port's extension. + +## Build wiring + +`lib/dune` has `(include_subdirs unqualified)`, so any `.ml` you drop +in here is automatically part of the `sx` library. Module name follows +the filename verbatim (`test_ext.ml` → `Test_ext`). diff --git a/hosts/ocaml/lib/extensions/test_ext.ml b/hosts/ocaml/lib/extensions/test_ext.ml new file mode 100644 index 00000000..22906c85 --- /dev/null +++ b/hosts/ocaml/lib/extensions/test_ext.ml @@ -0,0 +1,67 @@ +(** {1 [test_ext] — canonical example VM extension} + + A minimal extension demonstrating the registration pattern from + [plans/sx-vm-opcode-extension.md]. The opcode IDs (220, 221) sit at + the top of the extension range, well clear of anything a real + language port would claim. + + Two operand-less opcodes: + + - [test_ext.OP_TEST_PUSH_42] (220) — pushes the integer 42. + - [test_ext.OP_TEST_DOUBLE_TOS] (221) — pops the integer on TOS, + pushes 2× it. + + These are the smallest stack manipulations that prove the extension + mechanism wires through end-to-end (registry → dispatch → human- + readable disassembly). Real ports (Erlang Phase 9, future Haskell + perf phases) replace this template with their own opcode set. + + Loading: [Test_ext.register ()] adds the extension to + [Sx_vm_extensions]. Run-time binaries that want the test opcodes + available call this once at startup. Unit tests in + [bin/run_tests.ml] do exactly that. *) + +open Sx_types + +(** Per-instance state for [test_ext]. Counts how many times the + handlers ran — purely so the extension has *some* state, exercising + the [extension_state] machinery. *) +type Sx_vm_extension.extension_state += TestExtState of { + mutable invocations : int; +} + +module M : Sx_vm_extension.EXTENSION = struct + let name = "test_ext" + let init () = TestExtState { invocations = 0 } + + let opcodes st = + let bump () = match st with + | TestExtState s -> s.invocations <- s.invocations + 1 + | _ -> () + in + [ + (220, "test_ext.OP_TEST_PUSH_42", + (fun vm _frame -> bump (); Sx_vm.push vm (Integer 42))); + + (221, "test_ext.OP_TEST_DOUBLE_TOS", + (fun vm _frame -> + bump (); + let v = Sx_vm.pop vm in + match v with + | Integer n -> Sx_vm.push vm (Integer (n * 2)) + | _ -> raise (Eval_error + "test_ext.OP_TEST_DOUBLE_TOS: TOS is not an integer"))); + ] +end + +(** Register [test_ext] in [Sx_vm_extensions]. Idempotent only by + failing loudly — calling twice raises [Failure]. Binaries call this + once at startup; tests may [_reset_for_tests] then re-register. *) +let register () = Sx_vm_extensions.register (module M : Sx_vm_extension.EXTENSION) + +(** Read the invocation counter from the live registry state. Returns + [None] if [register] hasn't been called yet. *) +let invocation_count () = + match Sx_vm_extensions.state_of_extension "test_ext" with + | Some (TestExtState s) -> Some s.invocations + | _ -> None diff --git a/hosts/ocaml/lib/sx_vm.ml b/hosts/ocaml/lib/sx_vm.ml index 752c1acc..5654f630 100644 --- a/hosts/ocaml/lib/sx_vm.ml +++ b/hosts/ocaml/lib/sx_vm.ml @@ -70,6 +70,13 @@ let jit_compile_ref : (lambda -> (string, value) Hashtbl.t -> vm_closure option) let extension_dispatch_ref : (int -> vm -> frame -> unit) ref = ref (fun op _vm _frame -> raise (Invalid_opcode op)) +(** Forward reference for extension opcode → name lookup, used by + [opcode_name] / [disassemble] for human-readable disassembly. The + registry installs a real lookup at module init; default returns + [None] (then [opcode_name] falls back to "UNKNOWN_n"). *) +let extension_opcode_name_ref : (int -> string option) ref = + ref (fun _ -> None) + (* JIT threshold and counters live in Sx_types so primitives can read them without creating a sx_primitives → sx_vm dependency cycle. *) @@ -1214,7 +1221,12 @@ let opcode_name = function | 164 -> "EQ" | 165 -> "LT" | 166 -> "GT" | 167 -> "NOT" | 168 -> "LEN" | 169 -> "FIRST" | 170 -> "REST" | 171 -> "NTH" | 172 -> "CONS" | 173 -> "NEG" | 174 -> "INC" | 175 -> "DEC" - | n -> Printf.sprintf "UNKNOWN_%d" n + | n -> + (* Extension opcodes (≥200) get their human-readable name from the + registry; defaults to UNKNOWN_n if the extension isn't loaded. *) + (match !extension_opcode_name_ref n with + | Some name -> name + | None -> Printf.sprintf "UNKNOWN_%d" n) (** Number of extra operand bytes consumed by each opcode. Returns (format, total_bytes) where format describes the operand types. *) diff --git a/hosts/ocaml/lib/sx_vm_extensions.ml b/hosts/ocaml/lib/sx_vm_extensions.ml index e301b57c..e222622f 100644 --- a/hosts/ocaml/lib/sx_vm_extensions.ml +++ b/hosts/ocaml/lib/sx_vm_extensions.ml @@ -20,6 +20,10 @@ let by_id : (int, handler) Hashtbl.t = Hashtbl.create 64 (** opcode_name → opcode_id *) let by_name : (string, int) Hashtbl.t = Hashtbl.create 64 +(** opcode_id → opcode_name (reverse of [by_name]; used by + [Sx_vm.opcode_name] for disassembly). *) +let name_of_id_table : (int, string) Hashtbl.t = Hashtbl.create 64 + (** extension_name → state *) let states : (string, extension_state) Hashtbl.t = Hashtbl.create 8 @@ -58,7 +62,8 @@ let register (m : (module EXTENSION)) = Hashtbl.add states M.name st; List.iter (fun (id, opname, h) -> Hashtbl.add by_id id h; - Hashtbl.add by_name opname id + Hashtbl.add by_name opname id; + Hashtbl.add name_of_id_table id opname ) ops; extensions := M.name :: !extensions @@ -66,6 +71,10 @@ let register (m : (module EXTENSION)) = extension provides that opcode. *) let id_of_name name = Hashtbl.find_opt by_name name +(** Look up the opcode_name for an opcode_id. Returns [None] if no + extension provides that opcode. Used by disassembly. *) +let name_of_id id = Hashtbl.find_opt name_of_id_table id + (** Look up the state of an extension by name. Returns [None] if the extension is not registered. *) let state_of_extension name = Hashtbl.find_opt states name @@ -78,13 +87,16 @@ let registered_extensions () = !extensions let _reset_for_tests () = Hashtbl.clear by_id; Hashtbl.clear by_name; + Hashtbl.clear name_of_id_table; Hashtbl.clear states; extensions := [] -(** Install our [dispatch] into [Sx_vm.extension_dispatch_ref], replacing - the Phase A stub. Idempotent. Called automatically at module init. *) +(** Install our [dispatch] into [Sx_vm.extension_dispatch_ref] and our + [name_of_id] into [Sx_vm.extension_opcode_name_ref], replacing + the Phase A stubs. Idempotent. Called automatically at module init. *) let install_dispatch () = - Sx_vm.extension_dispatch_ref := dispatch + Sx_vm.extension_dispatch_ref := dispatch; + Sx_vm.extension_opcode_name_ref := name_of_id let () = install_dispatch ()