#!/usr/bin/env python3 """ Bootstrap compiler: SX spec -> OCaml. Loads the SX-to-OCaml transpiler (transpiler.sx), feeds it the spec files, and produces sx_ref.ml — the transpiled evaluator as native OCaml. Usage: python3 hosts/ocaml/bootstrap.py --output hosts/ocaml/lib/sx_ref.ml """ from __future__ import annotations import os import sys _HERE = os.path.dirname(os.path.abspath(__file__)) _PROJECT = os.path.abspath(os.path.join(_HERE, "..", "..")) sys.path.insert(0, _PROJECT) from shared.sx.parser import parse_all from shared.sx.types import Symbol def extract_defines(source: str) -> list[tuple[str, list]]: """Parse .sx source, return list of (name, define-expr) for top-level defines.""" exprs = parse_all(source) defines = [] for expr in exprs: if isinstance(expr, list) and expr and isinstance(expr[0], Symbol): if expr[0].name == "define": name = expr[1].name if isinstance(expr[1], Symbol) else str(expr[1]) defines.append((name, expr)) return defines # OCaml preamble — opens and runtime helpers PREAMBLE = """\ (* sx_ref.ml — Auto-generated from SX spec by hosts/ocaml/bootstrap.py *) (* Do not edit — regenerate with: python3 hosts/ocaml/bootstrap.py *) [@@@warning "-26-27"] open Sx_types open Sx_runtime (* Trampoline — forward ref, resolved after eval_expr is defined. *) let trampoline_fn : (value -> value) ref = ref (fun v -> v) let trampoline v = !trampoline_fn v (* === Mutable state for strict mode === *) (* These are defined as top-level refs because the transpiler cannot handle global set! mutation (it creates local refs that shadow the global). *) let _strict_ref = ref (Bool false) let _prim_param_types_ref = ref Nil (* JIT call hook — cek_call checks this before CEK dispatch for named lambdas. Registered by sx_server.ml after compiler loads. Tests run with hook = None (pure CEK, no compilation dependency). *) let jit_call_hook : (value -> value list -> value option) option ref = ref None (* Component trace — captures kont from last CEK error for diagnostics *) let _last_error_kont : value ref = ref Nil """ # OCaml fixups — wire up trampoline + iterative CEK run + JIT hook FIXUPS = """\ (* Wire up trampoline to resolve thunks via the CEK machine *) let () = trampoline_fn := (fun v -> match v with | Thunk (expr, env) -> eval_expr expr (Env env) | _ -> v) (* Wire up the primitives trampoline so call_any in HO forms resolves Thunks *) let () = Sx_primitives._sx_trampoline_fn := !trampoline_fn (* Override recursive cek_run with iterative loop. On error, capture the kont from the last state for comp-trace. *) let cek_run_iterative state = let s = ref state in (try while not (match cek_terminal_p !s with Bool true -> true | _ -> false) do s := cek_step !s done; cek_value !s with Eval_error msg -> _last_error_kont := cek_kont !s; raise (Eval_error msg)) (* Collect component trace from a kont value *) let collect_comp_trace kont = let trace = ref [] in let k = ref kont in while (match !k with List (_::_) -> true | _ -> false) do (match !k with | List (frame :: rest) -> (match frame with | CekFrame f when f.cf_type = "comp-trace" -> let name = match f.cf_name with String s -> s | _ -> "?" in let file = match f.cf_env with String s -> s | Nil -> "" | _ -> "" in trace := (name, file) :: !trace | Dict d when (match Hashtbl.find_opt d "type" with Some (String "comp-trace") -> true | _ -> false) -> let name = match Hashtbl.find_opt d "name" with Some (String s) -> s | _ -> "?" in let file = match Hashtbl.find_opt d "file" with Some (String s) -> s | _ -> "" in trace := (name, file) :: !trace | _ -> ()); k := List rest | _ -> k := List []) done; List.rev !trace (* Format a comp-trace into a human-readable string *) let format_comp_trace trace = match trace with | [] -> "" | entries -> let lines = List.mapi (fun i (name, file) -> let prefix = if i = 0 then " in " else " called from " in if file = "" then prefix ^ "~" ^ name else prefix ^ "~" ^ name ^ " (" ^ file ^ ")" ) entries in "\n" ^ String.concat "\n" lines (* Enhance an error message with component trace *) let enhance_error_with_trace msg = let trace = collect_comp_trace !_last_error_kont in _last_error_kont := Nil; msg ^ (format_comp_trace trace) """ def compile_spec_to_ml(spec_dir: str | None = None) -> str: """Compile the SX spec to OCaml source.""" import tempfile from shared.sx.ocaml_sync import OcamlSync from shared.sx.parser import serialize if spec_dir is None: spec_dir = os.path.join(_PROJECT, "spec") # Load the transpiler into OCaml kernel bridge = OcamlSync() transpiler_path = os.path.join(_HERE, "transpiler.sx") bridge.load(transpiler_path) # Spec files to transpile (in dependency order) # stdlib.sx functions are already registered as OCaml primitives — # only the evaluator needs transpilation. sx_files = [ ("evaluator.sx", "evaluator (frames + eval + CEK)"), ] parts = [PREAMBLE] for filename, label in sx_files: filepath = os.path.join(spec_dir, filename) if not os.path.exists(filepath): print(f"Warning: {filepath} not found, skipping", file=sys.stderr) continue with open(filepath) as f: src = f.read() defines = extract_defines(src) # Skip defines provided by preamble, fixups, or already-registered primitives # Skip: preamble-provided, math primitives, and stdlib functions # that use loop/named-let (transpiler can't handle those yet) skip = {"trampoline", "ceil", "floor", "round", "abs", "min", "max", "debug-log", "debug_log", "range", "chunk-every", "zip-pairs", "string-contains?", "starts-with?", "ends-with?", "string-replace", "trim", "split", "index-of", "pad-left", "pad-right", "char-at", "substring"} defines = [(n, e) for n, e in defines if n not in skip] # Deduplicate — keep last definition for each name (CEK overrides tree-walk) seen = {} for i, (n, e) in enumerate(defines): seen[n] = i defines = [(n, e) for i, (n, e) in enumerate(defines) if seen[n] == i] # Build the defines list and known names for the transpiler defines_list = [[name, expr] for name, expr in defines] known_names = [name for name, _ in defines] # Serialize defines + known names to temp file, load into kernel defines_sx = serialize(defines_list) known_sx = serialize(known_names) with tempfile.NamedTemporaryFile(mode="w", suffix=".sx", delete=False) as tmp: tmp.write(f"(define _defines \'{defines_sx})\n") tmp.write(f"(define _known_defines \'{known_sx})\n") tmp_path = tmp.name try: bridge.load(tmp_path) finally: os.unlink(tmp_path) # Call ml-translate-file — emits as single let rec block result = bridge.eval("(ml-translate-file _defines)") parts.append(f"\n(* === Transpiled from {label} === *)\n") parts.append(result) bridge.stop() parts.append(FIXUPS) output = "\n".join(parts) # Post-process: fix mutable globals that the transpiler can't handle. # The transpiler emits local refs for set! targets within functions, # but top-level globals (*strict*, *prim-param-types*) need to use # the pre-declared refs from the preamble. import re # Fix *strict*: use _strict_ref instead of immutable let rec binding output = re.sub( r'and _strict_ =\n \(Bool false\)', 'and _strict_ = !_strict_ref', output, ) # Fix set-strict!: use _strict_ref instead of local ref output = re.sub( r'and set_strict_b val\' =\n let _strict_ = ref Nil in \(_strict_ := val\'; Nil\)', "and set_strict_b val' =\n _strict_ref := val'; Nil", output, ) # Fix *prim-param-types*: use _prim_param_types_ref output = re.sub( r'and _prim_param_types_ =\n Nil', 'and _prim_param_types_ = !_prim_param_types_ref', output, ) # Fix set-prim-param-types!: use _prim_param_types_ref output = re.sub( r'and set_prim_param_types_b types =\n let _prim_param_types_ = ref Nil in \(_prim_param_types_ := types; Nil\)', "and set_prim_param_types_b types =\n _prim_param_types_ref := types; Nil", output, ) # Fix all runtime reads of _strict_ and _prim_param_types_ to deref # the mutable refs instead of using the stale let-rec bindings. # This is needed because let-rec value bindings capture initial values. # Use regex with word boundary to avoid replacing _strict_ref with # !_strict_refref. def fix_mutable_reads(text): lines = text.split('\n') fixed = [] for line in lines: # Skip the definition lines stripped = line.strip() if stripped.startswith('and _strict_ =') or stripped.startswith('and _prim_param_types_ ='): fixed.append(line) continue # Replace _strict_ as a standalone identifier only (not inside # other names like set_strict_b). Match when preceded by space, # paren, or start-of-line, and followed by space, paren, or ;. line = re.sub(r'(?<=[ (])_strict_(?=[ );])', '!_strict_ref', line) line = re.sub(r'(?<=[ (])_prim_param_types_(?=[ );])', '!_prim_param_types_ref', line) fixed.append(line) return '\n'.join(fixed) output = fix_mutable_reads(output) # Fix cek_call: the spec passes (make-env) as the env arg to # continue_with_call, but the transpiler evaluates make-env at # transpile time (it's a primitive), producing Dict instead of Env. output = output.replace( "((Dict (Hashtbl.create 0))) (a) ((List []))", "(Env (Sx_types.make_env ())) (a) ((List []))", ) # Inject JIT dispatch + &rest handling into continue_with_call's lambda branch. # Replace the entire lambda binding + make_cek_state section. cwc_lambda_old = ( 'else (if sx_truthy ((is_lambda (f))) then ' '(let params = (lambda_params (f)) in let local = (env_merge ((lambda_closure (f))) (env)) in ' '(if sx_truthy ((prim_call ">" [(len (args)); (len (params))])) then ' '(raise (Eval_error (value_to_str (String (sx_str [' '(let _or = (lambda_name (f)) in if sx_truthy _or then _or else (String "lambda")); ' '(String " expects "); (len (params)); (String " args, got "); (len (args))])))))' ' else (let () = ignore ((List.iter (fun pair -> ignore (' '(env_bind local (sx_to_string (first (pair))) (nth (pair) ((Number 1.0))))))' ' (sx_to_list (prim_call "zip" [params; args])); Nil)) in ' '(let () = ignore ((List.iter (fun p -> ignore ((env_bind local (sx_to_string p) Nil)))' ' (sx_to_list (prim_call "slice" [params; (len (args))])); Nil)) in ' '(make_cek_state ((lambda_body (f))) (local) (kont))))))' ) cwc_lambda_new = ( 'else (if sx_truthy ((is_lambda (f))) then ' '(let params = (lambda_params (f)) in let local = (env_merge ((lambda_closure (f))) (env)) in ' '(if not (bind_lambda_with_rest params args local) then begin ' 'let pl = sx_to_list params and al = sx_to_list args in ' 'if List.length al > List.length pl then ' 'raise (Eval_error (Printf.sprintf "%s expects %d args, got %d" ' '(match lambda_name f with String s -> s | _ -> "lambda") ' '(List.length pl) (List.length al))); ' 'List.iter (fun pair -> ignore (env_bind local (sx_to_string (first pair)) (nth pair (Number 1.0)))) ' '(sx_to_list (prim_call "zip" [params; args])); ' 'List.iter (fun p -> ignore (env_bind local (sx_to_string p) Nil)) ' '(sx_to_list (prim_call "slice" [params; len args])) end; ' '(match !jit_call_hook, f with ' '| Some hook, Lambda l when l.l_name <> None -> ' 'let args_list = match args with List a | ListRef { contents = a } -> a | _ -> [] in ' '(match hook f args_list with ' 'Some result -> make_cek_value result local kont ' '| None -> make_cek_state (lambda_body f) local kont) ' '| _ -> make_cek_state ((lambda_body (f))) (local) (kont))))' ) if cwc_lambda_old in output: output = output.replace(cwc_lambda_old, cwc_lambda_new, 1) else: import sys print("WARNING: Could not find continue_with_call lambda pattern for &rest+JIT injection", file=sys.stderr) # Patch call_lambda and continue_with_call to handle &rest in lambda params. # The transpiler can't handle the index-of-based approach, so we inject it. REST_HELPER = """ (* &rest lambda param binding — injected by bootstrap.py *) and bind_lambda_with_rest params args local = let param_list = sx_to_list params in let arg_list = sx_to_list args in let rec find_rest i = function | [] -> None | h :: rp :: _ when value_to_str h = "&rest" -> Some (i, value_to_str rp) | _ :: tl -> find_rest (i + 1) tl in match find_rest 0 param_list with | Some (pos, rest_name) -> let positional = List.filteri (fun i _ -> i < pos) param_list in List.iteri (fun i p -> let v = if i < List.length arg_list then List.nth arg_list i else Nil in ignore (env_bind local (value_to_str p) v) ) positional; let rest_args = if List.length arg_list > pos then List (List.filteri (fun i _ -> i >= pos) arg_list) else List [] in ignore (env_bind local rest_name rest_args); true | None -> false """ # Inject the helper before call_lambda output = output.replace( "(* call-lambda *)\nand call_lambda", REST_HELPER + "\n(* call-lambda *)\nand call_lambda", ) # Patch call_lambda to use &rest-aware binding call_lambda_marker = "(* call-lambda *)\nand call_lambda f args caller_env =\n" call_comp_marker = "\n(* call-component *)" if call_lambda_marker in output and call_comp_marker in output: start = output.index(call_lambda_marker) end = output.index(call_comp_marker) new_call_lambda = """(* call-lambda *) and call_lambda f args caller_env = let params = lambda_params f in let local = env_merge (lambda_closure f) caller_env in if not (bind_lambda_with_rest params args local) then begin let pl = sx_to_list params and al = sx_to_list args in if List.length al > List.length pl then raise (Eval_error (Printf.sprintf "%s expects %d args, got %d" (match lambda_name f with String s -> s | _ -> "lambda") (List.length pl) (List.length al))); List.iter (fun pair -> ignore (env_bind local (sx_to_string (first pair)) (nth pair (Number 1.0))) ) (sx_to_list (prim_call "zip" [params; args])); List.iter (fun p -> ignore (env_bind local (sx_to_string p) Nil) ) (sx_to_list (prim_call "slice" [params; len args])) end; make_thunk (lambda_body f) local """ output = output[:start] + new_call_lambda + output[end:] else: print("WARNING: Could not find call_lambda for &rest injection", file=sys.stderr) # Instrument recursive cek_run to capture kont on error (for comp-trace). # The iterative cek_run_iterative already does this, but cek_call uses # the recursive cek_run. cek_run_old = ( 'and cek_run state =\n' ' (if sx_truthy ((cek_terminal_p (state))) then (cek_value (state)) else (cek_run ((cek_step (state)))))' ) cek_run_new = ( 'and cek_run state =\n' ' (if sx_truthy ((cek_terminal_p (state))) then (cek_value (state)) else\n' ' try cek_run ((cek_step (state)))\n' ' with Eval_error msg ->\n' ' (if !_last_error_kont = Nil then _last_error_kont := cek_kont state);\n' ' raise (Eval_error msg))' ) if cek_run_old in output: output = output.replace(cek_run_old, cek_run_new, 1) return output def main(): import argparse parser = argparse.ArgumentParser(description="Bootstrap SX spec -> OCaml") parser.add_argument( "--output", "-o", default=None, help="Output file (default: stdout)", ) args = parser.parse_args() result = compile_spec_to_ml() if args.output: with open(args.output, "w") as f: f.write(result) size = os.path.getsize(args.output) print(f"Wrote {args.output} ({size} bytes)", file=sys.stderr) else: print(result) if __name__ == "__main__": main()