OCaml bootstrapper: transpiler compiles full CEK evaluator (61/61 tests)
SX-to-OCaml transpiler (transpiler.sx) generates sx_ref.ml (~90KB, ~135 mutually recursive functions) from the spec evaluator. Foundation tests all pass: parser, primitives, env operations, type system. Key design decisions: - Env variant added to value type for CEK state dict storage - Continuation carries optional data dict for captured frames - Dynamic var tracking distinguishes OCaml fn calls from SX value dispatch - Single let rec...and block for forward references between all defines - Unused ref pre-declarations eliminated via let-bound name detection Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
181
hosts/ocaml/lib/sx_parser.ml
Normal file
181
hosts/ocaml/lib/sx_parser.ml
Normal file
@@ -0,0 +1,181 @@
|
||||
(** S-expression parser.
|
||||
|
||||
Recursive descent over a string, producing [Sx_types.value list].
|
||||
Supports: lists, dicts, symbols, keywords, strings (with escapes),
|
||||
numbers, booleans, nil, comments, quote/quasiquote/unquote sugar. *)
|
||||
|
||||
open Sx_types
|
||||
|
||||
type state = {
|
||||
src : string;
|
||||
len : int;
|
||||
mutable pos : int;
|
||||
}
|
||||
|
||||
let make_state src = { src; len = String.length src; pos = 0 }
|
||||
|
||||
let peek s = if s.pos < s.len then Some s.src.[s.pos] else None
|
||||
let advance s = s.pos <- s.pos + 1
|
||||
let at_end s = s.pos >= s.len
|
||||
|
||||
let skip_whitespace_and_comments s =
|
||||
let rec go () =
|
||||
if at_end s then ()
|
||||
else match s.src.[s.pos] with
|
||||
| ' ' | '\t' | '\n' | '\r' | ',' -> advance s; go ()
|
||||
| ';' ->
|
||||
while s.pos < s.len && s.src.[s.pos] <> '\n' do advance s done;
|
||||
if s.pos < s.len then advance s;
|
||||
go ()
|
||||
| _ -> ()
|
||||
in go ()
|
||||
|
||||
let is_symbol_char = function
|
||||
| '(' | ')' | '[' | ']' | '{' | '}' | '"' | '\'' | '`'
|
||||
| ' ' | '\t' | '\n' | '\r' | ',' | ';' -> false
|
||||
| _ -> true
|
||||
|
||||
let read_string s =
|
||||
(* s.pos is on the opening quote *)
|
||||
advance s;
|
||||
let buf = Buffer.create 64 in
|
||||
let rec go () =
|
||||
if at_end s then raise (Parse_error "Unterminated string");
|
||||
let c = s.src.[s.pos] in
|
||||
advance s;
|
||||
if c = '"' then Buffer.contents buf
|
||||
else if c = '\\' then begin
|
||||
if at_end s then raise (Parse_error "Unterminated string escape");
|
||||
let esc = s.src.[s.pos] in
|
||||
advance s;
|
||||
(match esc with
|
||||
| 'n' -> Buffer.add_char buf '\n'
|
||||
| 't' -> Buffer.add_char buf '\t'
|
||||
| 'r' -> Buffer.add_char buf '\r'
|
||||
| '"' -> Buffer.add_char buf '"'
|
||||
| '\\' -> Buffer.add_char buf '\\'
|
||||
| 'u' ->
|
||||
(* \uXXXX — read 4 hex digits, encode as UTF-8 *)
|
||||
if s.pos + 4 > s.len then raise (Parse_error "Incomplete \\u escape");
|
||||
let hex = String.sub s.src s.pos 4 in
|
||||
s.pos <- s.pos + 4;
|
||||
let code = int_of_string ("0x" ^ hex) in
|
||||
let ubuf = Buffer.create 4 in
|
||||
Buffer.add_utf_8_uchar ubuf (Uchar.of_int code);
|
||||
Buffer.add_string buf (Buffer.contents ubuf)
|
||||
| _ -> Buffer.add_char buf '\\'; Buffer.add_char buf esc);
|
||||
go ()
|
||||
end else begin
|
||||
Buffer.add_char buf c;
|
||||
go ()
|
||||
end
|
||||
in go ()
|
||||
|
||||
let read_symbol s =
|
||||
let start = s.pos in
|
||||
while s.pos < s.len && is_symbol_char s.src.[s.pos] do advance s done;
|
||||
String.sub s.src start (s.pos - start)
|
||||
|
||||
let try_number str =
|
||||
match float_of_string_opt str with
|
||||
| Some n -> Some (Number n)
|
||||
| None -> None
|
||||
|
||||
let rec read_value s : value =
|
||||
skip_whitespace_and_comments s;
|
||||
if at_end s then raise (Parse_error "Unexpected end of input");
|
||||
match s.src.[s.pos] with
|
||||
| '(' -> read_list s ')'
|
||||
| '[' -> read_list s ']'
|
||||
| '{' -> read_dict s
|
||||
| '"' -> String (read_string s)
|
||||
| '\'' -> advance s; List [Symbol "quote"; read_value s]
|
||||
| '`' -> advance s; List [Symbol "quasiquote"; read_value s]
|
||||
| '~' when s.pos + 1 < s.len && s.src.[s.pos + 1] = '@' ->
|
||||
advance s; advance s; (* skip ~@ *)
|
||||
List [Symbol "splice-unquote"; read_value s]
|
||||
| _ ->
|
||||
(* Check for unquote: , followed by non-whitespace *)
|
||||
if s.src.[s.pos] = ',' && s.pos + 1 < s.len &&
|
||||
s.src.[s.pos + 1] <> ' ' && s.src.[s.pos + 1] <> '\n' then begin
|
||||
advance s;
|
||||
if s.pos < s.len && s.src.[s.pos] = '@' then begin
|
||||
advance s;
|
||||
List [Symbol "splice-unquote"; read_value s]
|
||||
end else
|
||||
List [Symbol "unquote"; read_value s]
|
||||
end else begin
|
||||
(* Symbol, keyword, number, or boolean *)
|
||||
let token = read_symbol s in
|
||||
if token = "" then raise (Parse_error ("Unexpected char: " ^ String.make 1 s.src.[s.pos]));
|
||||
match token with
|
||||
| "true" -> Bool true
|
||||
| "false" -> Bool false
|
||||
| "nil" -> Nil
|
||||
| _ when token.[0] = ':' ->
|
||||
Keyword (String.sub token 1 (String.length token - 1))
|
||||
| _ ->
|
||||
match try_number token with
|
||||
| Some n -> n
|
||||
| None -> Symbol token
|
||||
end
|
||||
|
||||
and read_list s close_char =
|
||||
advance s; (* skip opening paren/bracket *)
|
||||
let items = ref [] in
|
||||
let rec go () =
|
||||
skip_whitespace_and_comments s;
|
||||
if at_end s then raise (Parse_error "Unterminated list");
|
||||
if s.src.[s.pos] = close_char then begin
|
||||
advance s;
|
||||
List (List.rev !items)
|
||||
end else begin
|
||||
items := read_value s :: !items;
|
||||
go ()
|
||||
end
|
||||
in go ()
|
||||
|
||||
and read_dict s =
|
||||
advance s; (* skip { *)
|
||||
let d = make_dict () in
|
||||
let rec go () =
|
||||
skip_whitespace_and_comments s;
|
||||
if at_end s then raise (Parse_error "Unterminated dict");
|
||||
if s.src.[s.pos] = '}' then begin
|
||||
advance s;
|
||||
Dict d
|
||||
end else begin
|
||||
let key = read_value s in
|
||||
let key_str = match key with
|
||||
| Keyword k -> k
|
||||
| String k -> k
|
||||
| Symbol k -> k
|
||||
| _ -> raise (Parse_error "Dict key must be keyword, string, or symbol")
|
||||
in
|
||||
let v = read_value s in
|
||||
dict_set d key_str v;
|
||||
go ()
|
||||
end
|
||||
in go ()
|
||||
|
||||
|
||||
(** Parse a string into a list of SX values. *)
|
||||
let parse_all src =
|
||||
let s = make_state src in
|
||||
let results = ref [] in
|
||||
let rec go () =
|
||||
skip_whitespace_and_comments s;
|
||||
if at_end s then List.rev !results
|
||||
else begin
|
||||
results := read_value s :: !results;
|
||||
go ()
|
||||
end
|
||||
in go ()
|
||||
|
||||
(** Parse a file into a list of SX values. *)
|
||||
let parse_file path =
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let src = really_input_string ic n in
|
||||
close_in ic;
|
||||
parse_all src
|
||||
Reference in New Issue
Block a user