Preserve ;; comments through MCP tree edit round-trips

Parser gains Comment(string) AST variant and ~comments:true mode that
captures top-level ;; lines instead of discarding them. All MCP edit
tools (replace_node, insert_child, delete_node, wrap_node, rename_symbol,
replace_by_pattern, insert_near, rename_across, pretty_print, write_file)
now preserve comments: separate before tree-tools operate (so index paths
stay correct), re-interleave after editing, emit in pretty_print_file.

Default parse path (evaluator, runtime, compiler) is unchanged — comments
are still stripped unless explicitly requested.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-03 16:35:44 +00:00
parent 5f5e9379d4
commit 2e329f273a
3 changed files with 167 additions and 52 deletions

View File

@@ -308,11 +308,49 @@ let call_sx fn_name args =
let fn = env_get e fn_name in
Sx_ref.cek_call fn (List args)
let parse_file path =
let parse_file ?(comments=false) path =
let src = In_channel.with_open_text path In_channel.input_all in
let exprs = Sx_parser.parse_all src in
let exprs = Sx_parser.parse_all ~comments src in
List exprs
(* Comment preservation for edit round-trips.
Comments are separated before tree-tools operate (so paths stay correct),
then re-interleaved before writing. *)
type comment_map = {
before : (int, value list) Hashtbl.t; (* expr_index → comments before it *)
trailing : value list; (* comments after last expression *)
}
let separate_comments items =
let before = Hashtbl.create 16 in
let exprs = ref [] in
let pending = ref [] in
let idx = ref 0 in
List.iter (fun item ->
match item with
| Comment _ -> pending := item :: !pending
| _ ->
if !pending <> [] then
Hashtbl.replace before !idx (List.rev !pending);
pending := [];
exprs := item :: !exprs;
incr idx
) items;
let trailing = List.rev !pending in
(List.rev !exprs, { before; trailing })
let reinterleave exprs cmap =
let result = ref [] in
List.iteri (fun i expr ->
(match Hashtbl.find_opt cmap.before i with
| Some cs -> List.iter (fun c -> result := c :: !result) cs
| None -> ());
result := expr :: !result
) exprs;
List.iter (fun c -> result := c :: !result) cmap.trailing;
List.rev !result
let parse_path_str s =
(* Parse SX path string: "(0 3 2)" or "(0,3,2)" or "0 3 2" → SX list of numbers.
Commas are unquote in SX, so strip them before parsing. *)
@@ -437,13 +475,63 @@ let pretty_print_value ?(max_width=80) v =
Buffer.contents buf
let pretty_print_file exprs =
String.concat "\n\n" (List.map pretty_print_value exprs) ^ "\n"
(* Comments attach to the following expression — no blank line between
comment and expression. Non-comment expressions get one blank line
between them (matching the old String.concat "\n\n" behaviour). *)
let buf = Buffer.create 4096 in
let rec emit first prev_was_comment = function
| [] -> ()
| Comment text :: rest ->
(* Blank line before comment block, unless it's the first item
or follows another comment *)
if not first && not prev_was_comment then Buffer.add_char buf '\n';
Buffer.add_string buf text;
Buffer.add_char buf '\n';
emit false true rest
| v :: rest ->
(* Blank line between non-comment expressions; no blank line
after a comment (comment sticks to its expression) *)
if not first && not prev_was_comment then Buffer.add_char buf '\n';
Buffer.add_string buf (pretty_print_value v);
Buffer.add_char buf '\n';
emit false false rest
in
emit true false exprs;
Buffer.contents buf
(* Parse a file preserving comments, return clean tree + comment map *)
let parse_file_with_comments path =
let src = In_channel.with_open_text path In_channel.input_all in
let all_items = Sx_parser.parse_all ~comments:true src in
let exprs, cmap = separate_comments all_items in
(List exprs, cmap)
(* Write an edited tree back with comments re-interleaved *)
let write_edit_with_comments file cmap result =
match result with
| Dict d ->
(match Hashtbl.find_opt d "ok" with
| Some new_tree ->
let items = match new_tree with
| List items | ListRef { contents = items } -> items
| _ -> [new_tree]
in
let merged = reinterleave items cmap in
let source = pretty_print_file merged in
Out_channel.with_open_text file (fun oc -> output_string oc source);
text_result (Printf.sprintf "OK — wrote %d bytes to %s" (String.length source) file)
| None ->
let err = match Hashtbl.find_opt d "error" with
| Some (String s) -> s | Some v -> value_to_string v | None -> "Unknown error"
in
error_result ("Error: " ^ err))
| _ -> error_result "Unexpected result type"
(* ------------------------------------------------------------------ *)
(* Tool handlers *)
(* ------------------------------------------------------------------ *)
let rec handle_tool name args =
let handle_tool name args =
let open Yojson.Safe.Util in
match name with
| "sx_read_tree" ->
@@ -519,31 +607,31 @@ let rec handle_tool name args =
| "sx_replace_node" ->
let file = args |> member "file" |> to_string in
let tree = parse_file file in
let tree, cmap = parse_file_with_comments file in
let path = resolve_path tree (args |> member "path" |> to_string) in
let src = args |> member "new_source" |> to_string in
write_edit file (call_sx "replace-node" [tree; path; String src])
write_edit_with_comments file cmap (call_sx "replace-node" [tree; path; String src])
| "sx_insert_child" ->
let file = args |> member "file" |> to_string in
let tree = parse_file file in
let tree, cmap = parse_file_with_comments file in
let path = resolve_path tree (args |> member "path" |> to_string) in
let index = args |> member "index" |> to_int in
let src = args |> member "new_source" |> to_string in
write_edit file (call_sx "insert-child" [tree; path; Number (float_of_int index); String src])
write_edit_with_comments file cmap (call_sx "insert-child" [tree; path; Number (float_of_int index); String src])
| "sx_delete_node" ->
let file = args |> member "file" |> to_string in
let tree = parse_file file in
let tree, cmap = parse_file_with_comments file in
let path = resolve_path tree (args |> member "path" |> to_string) in
write_edit file (call_sx "delete-node" [tree; path])
write_edit_with_comments file cmap (call_sx "delete-node" [tree; path])
| "sx_wrap_node" ->
let file = args |> member "file" |> to_string in
let tree = parse_file file in
let tree, cmap = parse_file_with_comments file in
let path = resolve_path tree (args |> member "path" |> to_string) in
let wrapper = args |> member "wrapper" |> to_string in
write_edit file (call_sx "wrap-node" [tree; path; String wrapper])
write_edit_with_comments file cmap (call_sx "wrap-node" [tree; path; String wrapper])
| "sx_format_check" ->
let file = args |> member "file" |> to_string in
@@ -734,7 +822,7 @@ let rec handle_tool name args =
| "sx_pretty_print" ->
let file = args |> member "file" |> to_string in
let exprs = Sx_parser.parse_all (In_channel.with_open_text file In_channel.input_all) in
let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text file In_channel.input_all) in
let source = pretty_print_file exprs in
Out_channel.with_open_text file (fun oc -> output_string oc source);
text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length exprs))
@@ -1364,9 +1452,9 @@ let rec handle_tool name args =
| "sx_write_file" ->
let file = args |> member "file" |> to_string in
let source = args |> member "source" |> to_string in
(* Validate by parsing first *)
(* Validate by parsing first — preserve comments *)
(try
let exprs = Sx_parser.parse_all source in
let exprs = Sx_parser.parse_all ~comments:true source in
if exprs = [] then error_result "Source parsed to empty — nothing to write"
else begin
let output = pretty_print_file exprs in
@@ -1377,13 +1465,13 @@ let rec handle_tool name args =
| "sx_rename_symbol" ->
let file = args |> member "file" |> to_string in
let tree = parse_file file in
let tree, cmap = parse_file_with_comments file in
let old_name = args |> member "old_name" |> to_string in
let new_name = args |> member "new_name" |> to_string in
let new_tree = call_sx "rename-symbol" [tree; String old_name; String new_name] in
let count = call_sx "count-renames" [tree; String old_name] in
let count_str = value_to_string count in
write_edit file (Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d))
write_edit_with_comments file cmap (Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d))
|> (fun result ->
match result with
| `Assoc [("content", `List [`Assoc [("type", _); ("text", `String s)]])] when not (String.starts_with ~prefix:"Error" s) ->
@@ -1392,22 +1480,22 @@ let rec handle_tool name args =
| "sx_replace_by_pattern" ->
let file = args |> member "file" |> to_string in
let tree = parse_file file in
let tree, cmap = parse_file_with_comments file in
let pattern = args |> member "pattern" |> to_string in
let src = args |> member "new_source" |> to_string in
let all = args |> member "all" |> to_bool_option |> Option.value ~default:false in
if all then
write_edit file (call_sx "replace-all-by-pattern" [tree; String pattern; String src])
write_edit_with_comments file cmap (call_sx "replace-all-by-pattern" [tree; String pattern; String src])
else
write_edit file (call_sx "replace-by-pattern" [tree; String pattern; String src])
write_edit_with_comments file cmap (call_sx "replace-by-pattern" [tree; String pattern; String src])
| "sx_insert_near" ->
let file = args |> member "file" |> to_string in
let tree = parse_file file in
let tree, cmap = parse_file_with_comments file in
let pattern = args |> member "pattern" |> to_string in
let position = args |> member "position" |> to_string_option |> Option.value ~default:"after" in
let src = args |> member "new_source" |> to_string in
write_edit file (call_sx "insert-near-pattern" [tree; String pattern; String position; String src])
write_edit_with_comments file cmap (call_sx "insert-near-pattern" [tree; String pattern; String position; String src])
| "sx_rename_across" ->
let dir = args |> member "dir" |> to_string in
@@ -1418,7 +1506,7 @@ let rec handle_tool name args =
let results = List.filter_map (fun path ->
let rel = relative_path ~base:dir path in
try
let tree = parse_file path in
let tree, cmap = parse_file_with_comments path in
let count = call_sx "count-renames" [tree; String old_name] in
match count with
| Number n when n > 0.0 ->
@@ -1430,7 +1518,8 @@ let rec handle_tool name args =
| List items | ListRef { contents = items } -> items
| _ -> [new_tree]
in
let source = pretty_print_file items in
let merged = reinterleave items cmap in
let source = pretty_print_file merged in
Out_channel.with_open_text path (fun oc -> output_string oc source);
Some (Printf.sprintf "%s: %d occurrences renamed" rel (int_of_float n))
end
@@ -2095,25 +2184,6 @@ let rec handle_tool name args =
| _ -> error_result ("Unknown tool: " ^ name)
and write_edit file result =
match result with
| Dict d ->
(match Hashtbl.find_opt d "ok" with
| Some new_tree ->
let items = match new_tree with
| List items | ListRef { contents = items } -> items
| _ -> [new_tree]
in
let source = pretty_print_file items in
Out_channel.with_open_text file (fun oc -> output_string oc source);
text_result (Printf.sprintf "OK — wrote %d bytes to %s" (String.length source) file)
| None ->
let err = match Hashtbl.find_opt d "error" with
| Some (String s) -> s | Some v -> value_to_string v | None -> "Unknown error"
in
error_result ("Error: " ^ err))
| _ -> error_result "Unexpected result type"
(* ------------------------------------------------------------------ *)
(* MCP tool definitions *)
(* ------------------------------------------------------------------ *)

View File

@@ -30,6 +30,23 @@ let skip_whitespace_and_comments s =
| _ -> ()
in go ()
(* Skip whitespace only — leaves comments for capture *)
let skip_whitespace s =
let rec go () =
if at_end s then ()
else match s.src.[s.pos] with
| ' ' | '\t' | '\n' | '\r' -> advance s; go ()
| _ -> ()
in go ()
(* Read a comment line starting at ';', returns text after ";;" (or ";") *)
let read_comment s =
let start = s.pos in
while s.pos < s.len && s.src.[s.pos] <> '\n' do advance s done;
let text = String.sub s.src start (s.pos - start) in
if s.pos < s.len then advance s;
text
(* Character classification — matches spec/parser.sx ident-start/ident-char.
ident-start: a-z A-Z _ ~ * + - > < = / ! ? &
ident-char: ident-start plus 0-9 . : / # , *)
@@ -202,23 +219,48 @@ and read_dict s =
in go ()
(** Parse a string into a list of SX values. *)
let parse_all src =
(** Parse a string into a list of SX values.
When [~comments:true], top-level ;; comments are preserved as [Comment]
nodes in the result list. Default is [false] (strip comments). *)
let parse_all ?(comments=false) src =
let s = make_state src in
let results = ref [] in
let rec go () =
skip_whitespace_and_comments s;
if at_end s then List.rev !results
else begin
results := read_value s :: !results;
go ()
if comments then begin
skip_whitespace s;
if at_end s then List.rev !results
else if s.src.[s.pos] = ';' then begin
(* Collect consecutive comment lines into one Comment node *)
let lines = ref [] in
let rec collect () =
skip_whitespace s;
if not (at_end s) && s.src.[s.pos] = ';' then begin
lines := read_comment s :: !lines;
collect ()
end
in
collect ();
let text = String.concat "\n" (List.rev !lines) in
results := Comment text :: !results;
go ()
end else begin
results := read_value s :: !results;
go ()
end
end else begin
skip_whitespace_and_comments s;
if at_end s then List.rev !results
else begin
results := read_value s :: !results;
go ()
end
end
in go ()
(** Parse a file into a list of SX values. *)
let parse_file path =
let parse_file ?(comments=false) path =
let ic = open_in path in
let n = in_channel_length ic in
let src = really_input_string ic n in
close_in ic;
parse_all src
parse_all ~comments src

View File

@@ -67,6 +67,7 @@ and value =
| CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *)
| CekFrame of cek_frame (** Optimized CEK continuation frame. *)
| VmClosure of vm_closure (** VM-compiled closure — callable within the VM without allocating a new VM. *)
| Comment of string (** Source comment — preserved through parse/edit/pretty-print round-trips. *)
(** CEK machine state — record instead of Dict for performance.
5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *)
@@ -347,6 +348,7 @@ let type_of = function
| CekState _ -> "dict" (* CEK state behaves as a dict for type checks *)
| CekFrame _ -> "dict"
| VmClosure _ -> "function"
| Comment _ -> "comment"
let is_nil = function Nil -> true | _ -> false
let is_lambda = function Lambda _ -> true | _ -> false
@@ -541,3 +543,4 @@ let rec inspect = function
| CekState _ -> "<cek-state>"
| CekFrame f -> Printf.sprintf "<frame:%s>" f.cf_type
| VmClosure cl -> Printf.sprintf "<vm:%s>" (match cl.vm_name with Some n -> n | None -> "anon")
| Comment text -> ";;" ^ text