Migrate MCP tools from comment_map to CST-based round-tripping

Replace the comment preservation workaround (comment_map type,
separate_comments, reinterleave, strip_interior_comments,
extract_fragment_comments, inject_comments — ~170 lines) with
CST-based editing (~80 lines).

write_edit_cst: compares old AST vs new AST per node. Unchanged
nodes keep original source verbatim. Changed nodes are pretty-printed
with original leading trivia preserved. New nodes (insertions) get
minimal formatting.

parse_file_cst: returns (AST tree, CST file). AST goes to tree-tools,
CST is used for write-back.

extract_cst_comments / inject_cst_comments: read comment trivia from
CST nodes for summarise/read_tree display.

Net: -39 lines. 24/24 CST round-trip tests, 2583/2583 evaluator tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-03 18:13:34 +00:00
parent 5390df7b0b
commit af63d49451

View File

@@ -313,79 +313,70 @@ let parse_file ?(comments=false) path =
let exprs = Sx_parser.parse_all ~comments src in
List exprs
(* Comment preservation for edit round-trips.
Comments are separated before tree-tools operate (so paths stay correct),
then re-interleaved before writing. *)
(* CST-based round-tripping — replaces comment_map machinery.
Parse file as CST for lossless writes, project to AST for tree-tools. *)
type comment_map = {
before : (int, value list) Hashtbl.t; (* expr_index → comments before it *)
trailing : value list; (* comments after last expression *)
}
(* Parse file → (AST for tree-tools, CST for writing back) *)
let parse_file_cst path =
let cst = Sx_parser.parse_file_cst path in
let ast = List (List.map Sx_cst.cst_to_ast cst.nodes) in
(ast, cst)
(* Recursively strip Comment nodes from inside lists — tree-tools
must not see them or index-based paths break. *)
let rec strip_interior_comments v =
match v with
| List items ->
let clean = List.filter (function Comment _ -> false | _ -> true) items in
List (List.map strip_interior_comments clean)
| ListRef { contents = items } ->
let clean = List.filter (function Comment _ -> false | _ -> true) items in
List (List.map strip_interior_comments clean)
| _ -> v
(* Extract comment text from CST trivia for display in summarise/read_tree *)
let extract_cst_comments (cst : Sx_parser.cst_file) =
let tbl = Hashtbl.create 16 in
List.iteri (fun i node ->
let trivia = match node with
| Sx_cst.CstAtom r -> r.leading_trivia
| Sx_cst.CstList r -> r.leading_trivia
| Sx_cst.CstDict r -> r.leading_trivia
in
let comments = List.filter_map (function
| Sx_cst.LineComment text ->
(* Strip trailing newline for display *)
let t = if String.length text > 0 && text.[String.length text - 1] = '\n'
then String.sub text 0 (String.length text - 1) else text in
Some t
| _ -> None
) trivia in
if comments <> [] then
Hashtbl.replace tbl i comments
) cst.nodes;
tbl
let separate_comments items =
let before = Hashtbl.create 16 in
let exprs = ref [] in
let pending = ref [] in
let idx = ref 0 in
List.iter (fun item ->
match item with
| Comment _ -> pending := item :: !pending
| _ ->
if !pending <> [] then
Hashtbl.replace before !idx (List.rev !pending);
pending := [];
(* Strip interior comments from the expression before passing to tree-tools *)
exprs := strip_interior_comments item :: !exprs;
incr idx
) items;
let trailing = List.rev !pending in
(List.rev !exprs, { before; trailing })
let reinterleave exprs cmap =
let result = ref [] in
List.iteri (fun i expr ->
(match Hashtbl.find_opt cmap.before i with
| Some cs -> List.iter (fun c -> result := c :: !result) cs
| None -> ());
result := expr :: !result
) exprs;
List.iter (fun c -> result := c :: !result) cmap.trailing;
List.rev !result
(* Extract leading comments from fragment source and attach to a position
in the comment map. Returns the clean source (comments stripped). *)
let extract_fragment_comments src cmap target_idx =
let items = Sx_parser.parse_all ~comments:true src in
let cmt_nodes = ref [] in
let expr_nodes = ref [] in
List.iter (fun item ->
match item with
| Comment _ -> cmt_nodes := item :: !cmt_nodes
| _ -> expr_nodes := item :: !expr_nodes
) items;
let cmts = List.rev !cmt_nodes in
if cmts <> [] then begin
let existing = match Hashtbl.find_opt cmap.before target_idx with
| Some cs -> cs | None -> [] in
Hashtbl.replace cmap.before target_idx (existing @ cmts)
end;
(* Reconstruct source without comments *)
if cmts = [] then src
(* Inject comment text into summarise/annotate output.
Matches [N] markers and inserts comment lines that precede expr N. *)
let inject_cst_comments output comment_tbl =
if Hashtbl.length comment_tbl = 0 then output
else
let clean = List.rev !expr_nodes in
String.concat " " (List.map inspect clean)
let lines = String.split_on_char '\n' output in
let buf = Buffer.create (String.length output + 512) in
let first = ref true in
List.iter (fun line ->
let idx = if String.length line > 1 && line.[0] = '[' then
(try Scanf.sscanf line "[%d]" (fun n -> Some n) with _ -> None)
else
let trimmed = String.trim line in
if String.length trimmed > 1 && trimmed.[0] = '[' then
(try Scanf.sscanf trimmed "[%d]" (fun n -> Some n) with _ -> None)
else None
in
(match idx with
| Some n ->
(match Hashtbl.find_opt comment_tbl n with
| Some comments ->
List.iter (fun text ->
if not !first then Buffer.add_char buf '\n';
first := false;
Buffer.add_string buf text
) comments
| None -> ())
| None -> ());
if not !first then Buffer.add_char buf '\n';
first := false;
Buffer.add_string buf line
) lines;
Buffer.contents buf
let parse_path_str s =
(* Parse SX path string: "(0 3 2)" or "(0,3,2)" or "0 3 2" → SX list of numbers.
@@ -551,25 +542,49 @@ let pretty_print_file exprs =
emit true false exprs;
Buffer.contents buf
(* Parse a file preserving comments, return clean tree + comment map *)
let parse_file_with_comments path =
let src = In_channel.with_open_text path In_channel.input_all in
let all_items = Sx_parser.parse_all ~comments:true src in
let exprs, cmap = separate_comments all_items in
(List exprs, cmap)
(* Write an edited tree back with comments re-interleaved *)
let write_edit_with_comments file cmap result =
(* Apply an AST-level edit result back to the CST and write the file.
Unchanged nodes keep their original source; changed nodes are pretty-printed
with the original leading trivia preserved. *)
let write_edit_cst file (cst : Sx_parser.cst_file) result =
match result with
| Dict d ->
(match Hashtbl.find_opt d "ok" with
| Some new_tree ->
let items = match new_tree with
let new_items = match new_tree with
| List items | ListRef { contents = items } -> items
| _ -> [new_tree]
in
let merged = reinterleave items cmap in
let source = pretty_print_file merged in
let old_nodes = cst.nodes in
let old_asts = List.map Sx_cst.cst_to_ast old_nodes in
let new_cst_nodes = List.mapi (fun i new_ast ->
if i < List.length old_nodes then
let old_ast = List.nth old_asts i in
if inspect old_ast = inspect new_ast then
List.nth old_nodes i
else
let pp = pretty_print_value new_ast in
let new_cst = Sx_parser.parse_all_cst pp in
let orig_trivia = match List.nth old_nodes i with
| Sx_cst.CstAtom r -> r.leading_trivia
| Sx_cst.CstList r -> r.leading_trivia
| Sx_cst.CstDict r -> r.leading_trivia
in
(match new_cst.nodes with
| [node] ->
(match node with
| Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = orig_trivia }
| Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = orig_trivia }
| Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = orig_trivia })
| _ -> List.nth old_nodes i)
else
let pp = pretty_print_value new_ast in
let new_cst = Sx_parser.parse_all_cst ("\n\n" ^ pp) in
(match new_cst.nodes with node :: _ -> node | [] ->
Sx_cst.CstAtom { leading_trivia = []; token = pp;
value = new_ast; span = { start_offset = 0; end_offset = 0 } })
) new_items in
let source = Sx_cst.cst_file_to_source new_cst_nodes cst.trailing_trivia in
Out_channel.with_open_text file (fun oc -> output_string oc source);
text_result (Printf.sprintf "OK — wrote %d bytes to %s" (String.length source) file)
| None ->
@@ -579,38 +594,6 @@ let write_edit_with_comments file cmap result =
error_result ("Error: " ^ err))
| _ -> error_result "Unexpected result type"
(* Inject comment text into summarise/annotate output.
Matches [N] markers and inserts the comment block that precedes expr N. *)
let inject_comments output cmap =
if Hashtbl.length cmap.before = 0 && cmap.trailing = [] then output
else
let lines = String.split_on_char '\n' output in
let buf = Buffer.create (String.length output + 512) in
let first = ref true in
List.iter (fun line ->
(* Check if line starts with [N] *)
let idx = if String.length line > 1 && line.[0] = '[' then
(try Scanf.sscanf line "[%d]" (fun n -> Some n) with _ -> None)
else None in
(match idx with
| Some n ->
(match Hashtbl.find_opt cmap.before n with
| Some comments ->
List.iter (fun c ->
if not !first then Buffer.add_char buf '\n';
first := false;
match c with
| Comment text -> Buffer.add_string buf text
| _ -> ()
) comments
| None -> ())
| None -> ());
if not !first then Buffer.add_char buf '\n';
first := false;
Buffer.add_string buf line
) lines;
Buffer.contents buf
(* ------------------------------------------------------------------ *)
(* Tool handlers *)
(* ------------------------------------------------------------------ *)
@@ -620,36 +603,36 @@ let handle_tool name args =
match name with
| "sx_read_tree" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let focus = args |> member "focus" |> to_string_option in
let max_depth = args |> member "max_depth" |> to_int_option in
let max_lines = args |> member "max_lines" |> to_int_option in
let offset = args |> member "offset" |> to_int_option |> Option.value ~default:0 in
(match focus with
| Some pattern ->
text_result (inject_comments (value_to_string (call_sx "annotate-focused" [tree; String pattern])) cmap)
text_result (inject_cst_comments (value_to_string (call_sx "annotate-focused" [tree; String pattern])) (extract_cst_comments cst))
| None ->
match max_lines with
| Some limit ->
text_result (inject_comments (value_to_string (call_sx "annotate-paginated"
[tree; Number (float_of_int offset); Number (float_of_int limit)])) cmap)
text_result (inject_cst_comments (value_to_string (call_sx "annotate-paginated"
[tree; Number (float_of_int offset); Number (float_of_int limit)])) (extract_cst_comments cst))
| None ->
match max_depth with
| Some depth ->
text_result (inject_comments (value_to_string (call_sx "summarise" [tree; Number (float_of_int depth)])) cmap)
text_result (inject_cst_comments (value_to_string (call_sx "summarise" [tree; Number (float_of_int depth)])) (extract_cst_comments cst))
| None ->
let full = value_to_string (call_sx "annotate-tree" [tree]) in
let line_count = 1 + String.fold_left (fun n c -> if c = '\n' then n + 1 else n) 0 full in
if line_count <= 200 then text_result (inject_comments full cmap)
if line_count <= 200 then text_result (inject_cst_comments full (extract_cst_comments cst))
else
let summary = value_to_string (call_sx "summarise" [tree; Number 2.0]) in
text_result (inject_comments (Printf.sprintf ";; File has %d lines — showing depth-2 summary. Use max_depth, max_lines, or focus to control output.\n%s" line_count summary) cmap))
text_result (inject_cst_comments (Printf.sprintf ";; File has %d lines — showing depth-2 summary. Use max_depth, max_lines, or focus to control output.\n%s" line_count summary) (extract_cst_comments cst)))
| "sx_summarise" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let depth = args |> member "depth" |> to_int in
text_result (inject_comments (value_to_string (call_sx "summarise" [tree; Number (float_of_int depth)])) cmap)
text_result (inject_cst_comments (value_to_string (call_sx "summarise" [tree; Number (float_of_int depth)])) (extract_cst_comments cst))
| "sx_read_subtree" ->
let tree = parse_file (args |> member "file" |> to_string) in
@@ -679,14 +662,14 @@ let handle_tool name args =
| "sx_get_siblings" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let path = resolve_path tree (args |> member "path" |> to_string) in
let output = value_to_string (call_sx "get-siblings" [tree; path]) in
(* Inject comments for top-level siblings *)
let is_top_level = match path with
| List [Number _] | List [Number _; Number _] -> true
| _ -> false in
text_result (if is_top_level then inject_comments output cmap else output)
text_result (if is_top_level then inject_cst_comments output (extract_cst_comments cst) else output)
| "sx_validate" ->
let tree = parse_file (args |> member "file" |> to_string) in
@@ -694,36 +677,31 @@ let handle_tool name args =
| "sx_replace_node" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let path = resolve_path tree (args |> member "path" |> to_string) in
let raw_src = args |> member "new_source" |> to_string in
(* Extract leading comments from fragment, attach to target position *)
let top_idx = match path with List (Number n :: _) -> Some (int_of_float n) | _ -> None in
let src = match top_idx with
| Some idx -> extract_fragment_comments raw_src cmap idx
| None -> raw_src in
write_edit_with_comments file cmap (call_sx "replace-node" [tree; path; String src])
let src = args |> member "new_source" |> to_string in
write_edit_cst file cst (call_sx "replace-node" [tree; path; String src])
| "sx_insert_child" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let path = resolve_path tree (args |> member "path" |> to_string) in
let index = args |> member "index" |> to_int in
let src = args |> member "new_source" |> to_string in
write_edit_with_comments file cmap (call_sx "insert-child" [tree; path; Number (float_of_int index); String src])
write_edit_cst file cst (call_sx "insert-child" [tree; path; Number (float_of_int index); String src])
| "sx_delete_node" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let path = resolve_path tree (args |> member "path" |> to_string) in
write_edit_with_comments file cmap (call_sx "delete-node" [tree; path])
write_edit_cst file cst (call_sx "delete-node" [tree; path])
| "sx_wrap_node" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let path = resolve_path tree (args |> member "path" |> to_string) in
let wrapper = args |> member "wrapper" |> to_string in
write_edit_with_comments file cmap (call_sx "wrap-node" [tree; path; String wrapper])
write_edit_cst file cst (call_sx "wrap-node" [tree; path; String wrapper])
| "sx_format_check" ->
let file = args |> member "file" |> to_string in
@@ -1592,13 +1570,13 @@ let handle_tool name args =
| "sx_rename_symbol" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let old_name = args |> member "old_name" |> to_string in
let new_name = args |> member "new_name" |> to_string in
let new_tree = call_sx "rename-symbol" [tree; String old_name; String new_name] in
let count = call_sx "count-renames" [tree; String old_name] in
let count_str = value_to_string count in
write_edit_with_comments file cmap (Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d))
write_edit_cst file cst (Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d))
|> (fun result ->
match result with
| `Assoc [("content", `List [`Assoc [("type", _); ("text", `String s)]])] when not (String.starts_with ~prefix:"Error" s) ->
@@ -1607,34 +1585,22 @@ let handle_tool name args =
| "sx_replace_by_pattern" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let pattern = args |> member "pattern" |> to_string in
let src = args |> member "new_source" |> to_string in
let all = args |> member "all" |> to_bool_option |> Option.value ~default:false in
if all then
write_edit_with_comments file cmap (call_sx "replace-all-by-pattern" [tree; String pattern; String src])
write_edit_cst file cst (call_sx "replace-all-by-pattern" [tree; String pattern; String src])
else
write_edit_with_comments file cmap (call_sx "replace-by-pattern" [tree; String pattern; String src])
write_edit_cst file cst (call_sx "replace-by-pattern" [tree; String pattern; String src])
| "sx_insert_near" ->
let file = args |> member "file" |> to_string in
let tree, cmap = parse_file_with_comments file in
let tree, cst = parse_file_cst file in
let pattern = args |> member "pattern" |> to_string in
let position = args |> member "position" |> to_string_option |> Option.value ~default:"after" in
let raw_src = args |> member "new_source" |> to_string in
(* Find the match index so we can attach fragment comments *)
let match_result = call_sx "find-all" [tree; String pattern] in
let target_idx = match match_result with
| List (List (List (Number n :: _) :: _) :: _) -> Some (int_of_float n)
| List (List (Number n :: _) :: _) -> Some (int_of_float n)
| _ -> None in
let insert_idx = match target_idx with
| Some n -> Some (if position = "before" then n else n + 1)
| None -> None in
let src = match insert_idx with
| Some idx -> extract_fragment_comments raw_src cmap idx
| None -> raw_src in
write_edit_with_comments file cmap (call_sx "insert-near-pattern" [tree; String pattern; String position; String src])
let src = args |> member "new_source" |> to_string in
write_edit_cst file cst (call_sx "insert-near-pattern" [tree; String pattern; String position; String src])
| "sx_rename_across" ->
let dir = args |> member "dir" |> to_string in
@@ -1645,7 +1611,7 @@ let handle_tool name args =
let results = List.filter_map (fun path ->
let rel = relative_path ~base:dir path in
try
let tree, cmap = parse_file_with_comments path in
let tree, cst = parse_file_cst path in
let count = call_sx "count-renames" [tree; String old_name] in
match count with
| Number n when n > 0.0 ->
@@ -1653,13 +1619,8 @@ let handle_tool name args =
Some (Printf.sprintf "%s: %d occurrences (dry run)" rel (int_of_float n))
else begin
let new_tree = call_sx "rename-symbol" [tree; String old_name; String new_name] in
let items = match new_tree with
| List items | ListRef { contents = items } -> items
| _ -> [new_tree]
in
let merged = reinterleave items cmap in
let source = pretty_print_file merged in
Out_channel.with_open_text path (fun oc -> output_string oc source);
let result = Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d) in
ignore (write_edit_cst path cst result);
Some (Printf.sprintf "%s: %d occurrences renamed" rel (int_of_float n))
end
| _ -> None