Files
rose-ash/hosts/ocaml/bootstrap.py
giles dc7aa709bd review quick-wins: JIT gate, crash guards, crit-2 signal-return, regen repair
Server (sx_server.ml):
- HTTP mode: JIT hook now opt-in via SX_SERVING_JIT, matching epoch mode
  (was unconditional — live serving-JIT miscompiles J1/J2/J3 de-risked)
- command channel: malformed/non-ASCII line returns an error response
  instead of killing the shared process (C1/C1b)
- response cache: soft error pages no longer cached (S4);
  http_render_page returns (html, is_error)

Kernel spec + regen:
- crit-2: signal-return frame stored the saved kont under :f but the reader
  looked up "saved-kont" — handler value became the whole program's result
  and the covering test passed vacuously. Fixed; raise-continuable now also
  resumes at the raise site (rest-k, not unwound-k), mirroring signal-condition
- quasiquote: R7RS longhand unquote-splicing aliased to splice-unquote
  (used to serialize literally — silent zero-splice)
- guard: re-raise sentinel gensym'd per execution (was forgeable by any
  (list '__guard-reraise__ x) value)
- do: IIFE-head form no longer misparses as a Scheme do-loop
- render: area/base/embed/param/track added to HTML_TAGS (were void-only
  and rendered as Undefined symbol)
- REGEN REPAIR: checked-in sx_ref.ml carried hand-written additions that
  every regeneration silently lost (let-values/define-values/delay/
  delay-force registrations, AdtValue define-type) plus 5 regen blockers
  (arrow-name mangling, 3-arg get, &rest defines, HO-position helper refs,
  transpiler prim-table gaps). Moved into bootstrap.py FIXUPS/skips and the
  transpiler prim table — regen is now reproducible, compiles, and tests
  at baseline (CI Dockerfile.test steps 3-4 could not previously have
  produced a compiling kernel)

Primitives:
- contains?: dict key-check arm per its spec doc
- expt: promotes to float on int63 overflow ((expt 2 100) returned 0)
- mcp_tree parity with sx_primitives: get (Integer indices + 3-arg default),
  split (literal substring, was char-class — the historical gotcha lived
  here), empty? on ""/{}, contains?, equal?, keyword-name, char-code
  (Integer), parse-number (Integer-aware)

Python/docs:
- shared/sx/boundary.py: dead validation now logs a one-time WARNING instead
  of silently no-oping (full revival gated: tier-1 declarations deleted and
  SX_BOUNDARY_STRICT=1 is live in production compose)
- CLAUDE.md: canonical reference now points at spec/*.sx; island authoring
  rules corrected (let IS sequential, bodies ARE implicit begin)

Verification: full suite 5762 passed / 274 failed — fail set byte-identical
to the pre-change baseline (273 in-progress hs-* + pre-existing r7rs radix
shadow). All repros verified fixed on both the native binary and the rebuilt
WASM browser kernel. Review findings: /tmp/sx-review/*.md

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-07-03 13:49:43 +00:00

392 lines
15 KiB
Python

#!/usr/bin/env python3
"""
Bootstrap compiler: SX spec -> OCaml.
Loads the SX-to-OCaml transpiler (transpiler.sx), feeds it the spec files,
and produces sx_ref.ml — the transpiled evaluator as native OCaml.
Usage:
python3 hosts/ocaml/bootstrap.py --output hosts/ocaml/lib/sx_ref.ml
"""
from __future__ import annotations
import os
import sys
_HERE = os.path.dirname(os.path.abspath(__file__))
_PROJECT = os.path.abspath(os.path.join(_HERE, "..", ".."))
sys.path.insert(0, _PROJECT)
from shared.sx.parser import parse_all
from shared.sx.types import Symbol
def extract_defines(source: str) -> list[tuple[str, list]]:
"""Parse .sx source, return list of (name, define-expr) for top-level defines."""
exprs = parse_all(source)
defines = []
for expr in exprs:
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
if expr[0].name == "define":
name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1])
defines.append((name, expr))
return defines
# OCaml preamble — opens and runtime helpers
PREAMBLE = """\
(* sx_ref.ml — Auto-generated from SX spec by hosts/ocaml/bootstrap.py *)
(* Do not edit — regenerate with: python3 hosts/ocaml/bootstrap.py *)
[@@@warning "-26-27"]
open Sx_types
open Sx_runtime
(* Trampoline — forward ref, resolved after eval_expr is defined. *)
let trampoline_fn : (value -> value) ref = ref (fun v -> v)
let trampoline v = !trampoline_fn v
(* Step limit for timeout detection — set to 0 to disable *)
let step_limit : int ref = ref 0
let step_count : int ref = ref 0
(* === Mutable globals — backing refs for transpiler's !_ref / _ref := === *)
let _strict_ref = ref (Bool false)
let _prim_param_types_ref = ref Nil
let _last_error_kont_ref = ref Nil
let _protocol_registry_ = Dict (Hashtbl.create 0)
"""
# OCaml fixups — wire up trampoline + iterative CEK run + JIT hook
FIXUPS = """\
(* Wire up trampoline to resolve thunks via the CEK machine *)
let () = trampoline_fn := (fun v ->
match v with
| Thunk (expr, env) -> eval_expr expr (Env env)
| _ -> v)
(* Wire up the primitives trampoline so call_any in HO forms resolves Thunks *)
let () = Sx_primitives._sx_trampoline_fn := !trampoline_fn
(* Override recursive cek_run with iterative loop.
On error, capture the kont from the last state for comp-trace. *)
let cek_run_iterative state =
let s = ref state in
(try
while not (match cek_terminal_p !s with Bool true -> true | _ -> false)
&& not (match cek_suspended_p !s with Bool true -> true | _ -> false) do
s := cek_step !s
done;
(match cek_suspended_p !s with
| Bool true ->
(match !_cek_io_suspend_hook with
| Some hook -> hook !s
| None -> raise (Eval_error "IO suspension in non-IO context"))
| _ -> cek_value !s)
with Eval_error msg ->
_last_error_kont_ref := cek_kont !s;
raise (Eval_error msg))
(* Collect component trace from a kont value *)
let collect_comp_trace kont =
let trace = ref [] in
let k = ref kont in
while (match !k with List (_::_) -> true | _ -> false) do
(match !k with
| List (frame :: rest) ->
(match frame with
| CekFrame f when f.cf_type = "comp-trace" ->
let name = match f.cf_name with String s -> s | _ -> "?" in
let file = match f.cf_env with String s -> s | Nil -> "" | _ -> "" in
trace := (name, file) :: !trace
| Dict d when (match Hashtbl.find_opt d "type" with Some (String "comp-trace") -> true | _ -> false) ->
let name = match Hashtbl.find_opt d "name" with Some (String s) -> s | _ -> "?" in
let file = match Hashtbl.find_opt d "file" with Some (String s) -> s | _ -> "" in
trace := (name, file) :: !trace
| _ -> ());
k := List rest
| _ -> k := List [])
done;
List.rev !trace
(* Format a comp-trace into a human-readable string *)
let format_comp_trace trace =
match trace with
| [] -> ""
| entries ->
let lines = List.mapi (fun i (name, file) ->
let prefix = if i = 0 then " in " else " called from " in
if file = "" then prefix ^ "~" ^ name
else prefix ^ "~" ^ name ^ " (" ^ file ^ ")"
) entries in
"\n" ^ String.concat "\n" lines
(* Enhance an error message with component trace *)
let enhance_error_with_trace msg =
let trace = collect_comp_trace !_last_error_kont_ref in
_last_error_kont_ref := Nil;
msg ^ (format_comp_trace trace)
(* Hand-written sf_define_type — skipped from transpile because the spec uses
&rest params and empty-dict literals that the transpiler can't emit cleanly.
Implements: (define-type Name (Ctor1 f1 f2) (Ctor2 f3) ...)
Creates constructor fns, Name?/Ctor? predicates, Ctor-field accessors,
and records ctors in *adt-registry*. *)
let sf_define_type args env_val =
let items = (match args with List l -> l | _ -> []) in
let type_sym = List.nth items 0 in
let type_name = value_to_string type_sym in
let ctor_specs = List.tl items in
let env_has_v k = sx_truthy (env_has env_val (String k)) in
let env_bind_v k v = ignore (env_bind env_val (String k) v) in
let env_get_v k = env_get env_val (String k) in
if not (env_has_v "*adt-registry*") then
env_bind_v "*adt-registry*" (Dict (Hashtbl.create 8));
let registry = env_get_v "*adt-registry*" in
let ctor_names = List.map (fun spec ->
(match spec with List (sym :: _) -> String (value_to_string sym) | _ -> Nil)
) ctor_specs in
(match registry with Dict d -> Hashtbl.replace d type_name (List ctor_names) | _ -> ());
env_bind_v (type_name ^ "?")
(NativeFn (type_name ^ "?", fun pargs ->
(match pargs with
| [v] ->
(match v with
| AdtValue a -> Bool (a.av_type = type_name)
| _ -> Bool false)
| _ -> Bool false)));
List.iter (fun spec ->
(match spec with
| List (sym :: fields) ->
let cn = value_to_string sym in
let field_names = List.map value_to_string fields in
let arity = List.length fields in
env_bind_v cn
(NativeFn (cn, fun ctor_args ->
if List.length ctor_args <> arity then
raise (Eval_error (Printf.sprintf "%s: expected %d args, got %d"
cn arity (List.length ctor_args)))
else
AdtValue {
av_type = type_name;
av_ctor = cn;
av_fields = Array.of_list ctor_args;
}));
env_bind_v (cn ^ "?")
(NativeFn (cn ^ "?", fun pargs ->
(match pargs with
| [v] ->
(match v with
| AdtValue a -> Bool (a.av_ctor = cn)
| _ -> Bool false)
| _ -> Bool false)));
List.iteri (fun idx fname ->
env_bind_v (cn ^ "-" ^ fname)
(NativeFn (cn ^ "-" ^ fname, fun pargs ->
(match pargs with
| [v] ->
(match v with
| AdtValue a ->
if idx < Array.length a.av_fields then a.av_fields.(idx)
else raise (Eval_error (cn ^ "-" ^ fname ^ ": index out of bounds"))
| _ -> raise (Eval_error (cn ^ "-" ^ fname ^ ": not an ADT")))
| _ -> raise (Eval_error (cn ^ "-" ^ fname ^ ": expected 1 arg")))))
) field_names
| _ -> ())
) ctor_specs;
Nil
(* Register special forms via custom_special_forms so the CEK dispatch finds
them. The top-level (register-special-form! ...) calls in spec/evaluator.sx
are not defines and therefore are not transpiled; we wire them up here.
let-values/define-values/delay/delay-force point at the TRANSPILED spec
functions (sf_let_values etc.) — these registrations were previously
hand-appended to the generated sx_ref.ml and were silently lost on every
regeneration (2026-07 review). *)
let () = ignore (register_special_form (String "define-type")
(NativeFn ("define-type", fun call_args ->
match call_args with
| [args; env] -> sf_define_type args env
| _ -> Nil)))
let () = ignore (register_special_form (String "let-values")
(NativeFn ("let-values", fun call_args ->
match call_args with
| [args; env] -> sf_let_values args env
| _ -> Nil)))
let () = ignore (register_special_form (String "define-values")
(NativeFn ("define-values", fun call_args ->
match call_args with
| [args; env] -> sf_define_values args env
| _ -> Nil)))
let () = ignore (register_special_form (String "delay")
(NativeFn ("delay", fun call_args ->
match call_args with
| [args; env] -> sf_delay args env
| _ -> Nil)))
let () = ignore (register_special_form (String "delay-force")
(NativeFn ("delay-force", fun call_args ->
match call_args with
| [args; env] -> sf_delay_force args env
| _ -> Nil)))
"""
def compile_spec_to_ml(spec_dir: str | None = None) -> str:
"""Compile the SX spec to OCaml source."""
import tempfile
from shared.sx.ocaml_sync import OcamlSync
from shared.sx.parser import serialize
if spec_dir is None:
spec_dir = os.path.join(_PROJECT, "spec")
# Load the transpiler into OCaml kernel
bridge = OcamlSync()
transpiler_path = os.path.join(_HERE, "transpiler.sx")
bridge.load(transpiler_path)
# Spec files to transpile (in dependency order)
# stdlib.sx functions are already registered as OCaml primitives —
# only the evaluator needs transpilation.
sx_files = [
("evaluator.sx", "evaluator (frames + eval + CEK)"),
]
parts = [PREAMBLE]
for filename, label in sx_files:
filepath = os.path.join(spec_dir, filename)
if not os.path.exists(filepath):
print(f"Warning: {filepath} not found, skipping", file=sys.stderr)
continue
with open(filepath) as f:
src = f.read()
defines = extract_defines(src)
# Skip defines provided by preamble, fixups, or already-registered primitives
# Skip: preamble-provided, math primitives, and stdlib functions
# that use loop/named-let (transpiler can't handle those yet)
skip = {"trampoline", "ceil", "floor", "round", "abs", "min", "max",
"debug-log", "debug_log", "range", "chunk-every", "zip-pairs",
"string-contains?", "starts-with?", "ends-with?",
"string-replace", "trim", "split", "index-of",
"pad-left", "pad-right", "char-at", "substring",
# sf-define-type uses &rest + empty-dict literals that the transpiler
# can't emit as valid OCaml; hand-written implementation in FIXUPS.
"sf-define-type",
# Arrow-named portability shims for non-OCaml hosts. All three
# are registered natively in sx_primitives.ml, and the
# transpiler has no mangling rule for '>' in identifiers
# (emits invalid OCaml like `string_>symbol`).
"string->symbol", "symbol->string", "integer->char",
# values/in-range use &rest, which the transpiler can't emit
# (see sf-define-type above). in-range is registered natively;
# values has never been in the compiled kernel — bound only in
# run_tests.ml (2026-07 review F-7); making it a real kernel
# primitive is a tracked follow-up.
"values", "in-range", "build-range",
# zero-arg guest shim; the native gensym (with optional
# prefix) is registered in sx_primitives.ml
"gensym"}
defines = [(n, e) for n, e in defines if n not in skip]
# Deduplicate — keep last definition for each name (CEK overrides tree-walk)
seen = {}
for i, (n, e) in enumerate(defines):
seen[n] = i
defines = [(n, e) for i, (n, e) in enumerate(defines) if seen[n] == i]
# Build the defines list and known names for the transpiler
defines_list = [[name, expr] for name, expr in defines]
known_names = [name for name, _ in defines]
# Serialize defines + known names to temp file, load into kernel
defines_sx = serialize(defines_list)
known_sx = serialize(known_names)
with tempfile.NamedTemporaryFile(mode="w", suffix=".sx", delete=False) as tmp:
tmp.write(f"(define _defines \'{defines_sx})\n")
tmp.write(f"(define _known_defines \'{known_sx})\n")
tmp_path = tmp.name
try:
bridge.load(tmp_path)
finally:
os.unlink(tmp_path)
# Call ml-translate-file — emits as single let rec block
result = bridge.eval("(ml-translate-file _defines)")
parts.append(f"\n(* === Transpiled from {label} === *)\n")
parts.append(result)
bridge.stop()
parts.append(FIXUPS)
output = "\n".join(parts)
# Mutable globals (*strict*, *prim-param-types*) are now handled by
# the transpiler directly — it emits !_ref for reads, _ref := for writes.
import re
# Remove `and _protocol_registry_ = (Dict ...)` from the let rec block —
# it's defined in the preamble as a top-level let, and Hashtbl.create
# is not allowed as a let rec right-hand side.
output = re.sub(
r'\n\(\* \*protocol-registry\*.*?\nand _protocol_registry_ =\n \(Dict \(Hashtbl\.create 0\)\)\n',
'\n',
output
)
# Patch transpiled cek_run to invoke _cek_io_suspend_hook on suspension
# instead of unconditionally raising Eval_error. This is the fix for the
# tree-walk eval_expr path: sf_letrec init exprs / non-last body exprs,
# macro bodies, qq_expand, dynamic-wind / scope / provide bodies all use
# `trampoline (eval_expr ...)` and were swallowing CEK suspensions as
# "IO suspension in non-IO context" errors. With the hook, the suspension
# propagates as VmSuspended to the outer driver (browser callFn / server
# eval_expr_io). When the hook is unset (pure-CEK harness), the legacy
# error is preserved as the fallback.
output = re.sub(
r'\(raise \(Eval_error \(value_to_str \(String "IO suspension in non-IO context"\)\)\)\)',
'(match !_cek_io_suspend_hook with Some hook -> hook final | None -> '
'(raise (Eval_error (value_to_str (String "IO suspension in non-IO context")))))',
output,
count=1,
)
return output
def main():
import argparse
parser = argparse.ArgumentParser(description="Bootstrap SX spec -> OCaml")
parser.add_argument(
"--output", "-o",
default=None,
help="Output file (default: stdout)",
)
args = parser.parse_args()
result = compile_spec_to_ml()
if args.output:
with open(args.output, "w") as f:
f.write(result)
size = os.path.getsize(args.output)
print(f"Wrote {args.output} ({size} bytes)", file=sys.stderr)
else:
print(result)
if __name__ == "__main__":
main()