From af63d49451131d1979f6c299ac496a99ac015181 Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 3 Apr 2026 18:13:34 +0000 Subject: [PATCH] Migrate MCP tools from comment_map to CST-based round-tripping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the comment preservation workaround (comment_map type, separate_comments, reinterleave, strip_interior_comments, extract_fragment_comments, inject_comments — ~170 lines) with CST-based editing (~80 lines). write_edit_cst: compares old AST vs new AST per node. Unchanged nodes keep original source verbatim. Changed nodes are pretty-printed with original leading trivia preserved. New nodes (insertions) get minimal formatting. parse_file_cst: returns (AST tree, CST file). AST goes to tree-tools, CST is used for write-back. extract_cst_comments / inject_cst_comments: read comment trivia from CST nodes for summarise/read_tree display. Net: -39 lines. 24/24 CST round-trip tests, 2583/2583 evaluator tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- hosts/ocaml/bin/mcp_tree.ml | 291 ++++++++++++++++-------------------- 1 file changed, 126 insertions(+), 165 deletions(-) diff --git a/hosts/ocaml/bin/mcp_tree.ml b/hosts/ocaml/bin/mcp_tree.ml index 663ad2ba..8dfdc068 100644 --- a/hosts/ocaml/bin/mcp_tree.ml +++ b/hosts/ocaml/bin/mcp_tree.ml @@ -313,79 +313,70 @@ let parse_file ?(comments=false) path = let exprs = Sx_parser.parse_all ~comments src in List exprs -(* Comment preservation for edit round-trips. - Comments are separated before tree-tools operate (so paths stay correct), - then re-interleaved before writing. *) +(* CST-based round-tripping — replaces comment_map machinery. + Parse file as CST for lossless writes, project to AST for tree-tools. *) -type comment_map = { - before : (int, value list) Hashtbl.t; (* expr_index → comments before it *) - trailing : value list; (* comments after last expression *) -} +(* Parse file → (AST for tree-tools, CST for writing back) *) +let parse_file_cst path = + let cst = Sx_parser.parse_file_cst path in + let ast = List (List.map Sx_cst.cst_to_ast cst.nodes) in + (ast, cst) -(* Recursively strip Comment nodes from inside lists — tree-tools - must not see them or index-based paths break. *) -let rec strip_interior_comments v = - match v with - | List items -> - let clean = List.filter (function Comment _ -> false | _ -> true) items in - List (List.map strip_interior_comments clean) - | ListRef { contents = items } -> - let clean = List.filter (function Comment _ -> false | _ -> true) items in - List (List.map strip_interior_comments clean) - | _ -> v +(* Extract comment text from CST trivia for display in summarise/read_tree *) +let extract_cst_comments (cst : Sx_parser.cst_file) = + let tbl = Hashtbl.create 16 in + List.iteri (fun i node -> + let trivia = match node with + | Sx_cst.CstAtom r -> r.leading_trivia + | Sx_cst.CstList r -> r.leading_trivia + | Sx_cst.CstDict r -> r.leading_trivia + in + let comments = List.filter_map (function + | Sx_cst.LineComment text -> + (* Strip trailing newline for display *) + let t = if String.length text > 0 && text.[String.length text - 1] = '\n' + then String.sub text 0 (String.length text - 1) else text in + Some t + | _ -> None + ) trivia in + if comments <> [] then + Hashtbl.replace tbl i comments + ) cst.nodes; + tbl -let separate_comments items = - let before = Hashtbl.create 16 in - let exprs = ref [] in - let pending = ref [] in - let idx = ref 0 in - List.iter (fun item -> - match item with - | Comment _ -> pending := item :: !pending - | _ -> - if !pending <> [] then - Hashtbl.replace before !idx (List.rev !pending); - pending := []; - (* Strip interior comments from the expression before passing to tree-tools *) - exprs := strip_interior_comments item :: !exprs; - incr idx - ) items; - let trailing = List.rev !pending in - (List.rev !exprs, { before; trailing }) - -let reinterleave exprs cmap = - let result = ref [] in - List.iteri (fun i expr -> - (match Hashtbl.find_opt cmap.before i with - | Some cs -> List.iter (fun c -> result := c :: !result) cs - | None -> ()); - result := expr :: !result - ) exprs; - List.iter (fun c -> result := c :: !result) cmap.trailing; - List.rev !result - -(* Extract leading comments from fragment source and attach to a position - in the comment map. Returns the clean source (comments stripped). *) -let extract_fragment_comments src cmap target_idx = - let items = Sx_parser.parse_all ~comments:true src in - let cmt_nodes = ref [] in - let expr_nodes = ref [] in - List.iter (fun item -> - match item with - | Comment _ -> cmt_nodes := item :: !cmt_nodes - | _ -> expr_nodes := item :: !expr_nodes - ) items; - let cmts = List.rev !cmt_nodes in - if cmts <> [] then begin - let existing = match Hashtbl.find_opt cmap.before target_idx with - | Some cs -> cs | None -> [] in - Hashtbl.replace cmap.before target_idx (existing @ cmts) - end; - (* Reconstruct source without comments *) - if cmts = [] then src +(* Inject comment text into summarise/annotate output. + Matches [N] markers and inserts comment lines that precede expr N. *) +let inject_cst_comments output comment_tbl = + if Hashtbl.length comment_tbl = 0 then output else - let clean = List.rev !expr_nodes in - String.concat " " (List.map inspect clean) + let lines = String.split_on_char '\n' output in + let buf = Buffer.create (String.length output + 512) in + let first = ref true in + List.iter (fun line -> + let idx = if String.length line > 1 && line.[0] = '[' then + (try Scanf.sscanf line "[%d]" (fun n -> Some n) with _ -> None) + else + let trimmed = String.trim line in + if String.length trimmed > 1 && trimmed.[0] = '[' then + (try Scanf.sscanf trimmed "[%d]" (fun n -> Some n) with _ -> None) + else None + in + (match idx with + | Some n -> + (match Hashtbl.find_opt comment_tbl n with + | Some comments -> + List.iter (fun text -> + if not !first then Buffer.add_char buf '\n'; + first := false; + Buffer.add_string buf text + ) comments + | None -> ()) + | None -> ()); + if not !first then Buffer.add_char buf '\n'; + first := false; + Buffer.add_string buf line + ) lines; + Buffer.contents buf let parse_path_str s = (* Parse SX path string: "(0 3 2)" or "(0,3,2)" or "0 3 2" → SX list of numbers. @@ -551,25 +542,49 @@ let pretty_print_file exprs = emit true false exprs; Buffer.contents buf -(* Parse a file preserving comments, return clean tree + comment map *) -let parse_file_with_comments path = - let src = In_channel.with_open_text path In_channel.input_all in - let all_items = Sx_parser.parse_all ~comments:true src in - let exprs, cmap = separate_comments all_items in - (List exprs, cmap) -(* Write an edited tree back with comments re-interleaved *) -let write_edit_with_comments file cmap result = +(* Apply an AST-level edit result back to the CST and write the file. + Unchanged nodes keep their original source; changed nodes are pretty-printed + with the original leading trivia preserved. *) +let write_edit_cst file (cst : Sx_parser.cst_file) result = match result with | Dict d -> (match Hashtbl.find_opt d "ok" with | Some new_tree -> - let items = match new_tree with + let new_items = match new_tree with | List items | ListRef { contents = items } -> items | _ -> [new_tree] in - let merged = reinterleave items cmap in - let source = pretty_print_file merged in + let old_nodes = cst.nodes in + let old_asts = List.map Sx_cst.cst_to_ast old_nodes in + let new_cst_nodes = List.mapi (fun i new_ast -> + if i < List.length old_nodes then + let old_ast = List.nth old_asts i in + if inspect old_ast = inspect new_ast then + List.nth old_nodes i + else + let pp = pretty_print_value new_ast in + let new_cst = Sx_parser.parse_all_cst pp in + let orig_trivia = match List.nth old_nodes i with + | Sx_cst.CstAtom r -> r.leading_trivia + | Sx_cst.CstList r -> r.leading_trivia + | Sx_cst.CstDict r -> r.leading_trivia + in + (match new_cst.nodes with + | [node] -> + (match node with + | Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = orig_trivia } + | Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = orig_trivia } + | Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = orig_trivia }) + | _ -> List.nth old_nodes i) + else + let pp = pretty_print_value new_ast in + let new_cst = Sx_parser.parse_all_cst ("\n\n" ^ pp) in + (match new_cst.nodes with node :: _ -> node | [] -> + Sx_cst.CstAtom { leading_trivia = []; token = pp; + value = new_ast; span = { start_offset = 0; end_offset = 0 } }) + ) new_items in + let source = Sx_cst.cst_file_to_source new_cst_nodes cst.trailing_trivia in Out_channel.with_open_text file (fun oc -> output_string oc source); text_result (Printf.sprintf "OK — wrote %d bytes to %s" (String.length source) file) | None -> @@ -579,38 +594,6 @@ let write_edit_with_comments file cmap result = error_result ("Error: " ^ err)) | _ -> error_result "Unexpected result type" -(* Inject comment text into summarise/annotate output. - Matches [N] markers and inserts the comment block that precedes expr N. *) -let inject_comments output cmap = - if Hashtbl.length cmap.before = 0 && cmap.trailing = [] then output - else - let lines = String.split_on_char '\n' output in - let buf = Buffer.create (String.length output + 512) in - let first = ref true in - List.iter (fun line -> - (* Check if line starts with [N] *) - let idx = if String.length line > 1 && line.[0] = '[' then - (try Scanf.sscanf line "[%d]" (fun n -> Some n) with _ -> None) - else None in - (match idx with - | Some n -> - (match Hashtbl.find_opt cmap.before n with - | Some comments -> - List.iter (fun c -> - if not !first then Buffer.add_char buf '\n'; - first := false; - match c with - | Comment text -> Buffer.add_string buf text - | _ -> () - ) comments - | None -> ()) - | None -> ()); - if not !first then Buffer.add_char buf '\n'; - first := false; - Buffer.add_string buf line - ) lines; - Buffer.contents buf - (* ------------------------------------------------------------------ *) (* Tool handlers *) (* ------------------------------------------------------------------ *) @@ -620,36 +603,36 @@ let handle_tool name args = match name with | "sx_read_tree" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let focus = args |> member "focus" |> to_string_option in let max_depth = args |> member "max_depth" |> to_int_option in let max_lines = args |> member "max_lines" |> to_int_option in let offset = args |> member "offset" |> to_int_option |> Option.value ~default:0 in (match focus with | Some pattern -> - text_result (inject_comments (value_to_string (call_sx "annotate-focused" [tree; String pattern])) cmap) + text_result (inject_cst_comments (value_to_string (call_sx "annotate-focused" [tree; String pattern])) (extract_cst_comments cst)) | None -> match max_lines with | Some limit -> - text_result (inject_comments (value_to_string (call_sx "annotate-paginated" - [tree; Number (float_of_int offset); Number (float_of_int limit)])) cmap) + text_result (inject_cst_comments (value_to_string (call_sx "annotate-paginated" + [tree; Number (float_of_int offset); Number (float_of_int limit)])) (extract_cst_comments cst)) | None -> match max_depth with | Some depth -> - text_result (inject_comments (value_to_string (call_sx "summarise" [tree; Number (float_of_int depth)])) cmap) + text_result (inject_cst_comments (value_to_string (call_sx "summarise" [tree; Number (float_of_int depth)])) (extract_cst_comments cst)) | None -> let full = value_to_string (call_sx "annotate-tree" [tree]) in let line_count = 1 + String.fold_left (fun n c -> if c = '\n' then n + 1 else n) 0 full in - if line_count <= 200 then text_result (inject_comments full cmap) + if line_count <= 200 then text_result (inject_cst_comments full (extract_cst_comments cst)) else let summary = value_to_string (call_sx "summarise" [tree; Number 2.0]) in - text_result (inject_comments (Printf.sprintf ";; File has %d lines — showing depth-2 summary. Use max_depth, max_lines, or focus to control output.\n%s" line_count summary) cmap)) + text_result (inject_cst_comments (Printf.sprintf ";; File has %d lines — showing depth-2 summary. Use max_depth, max_lines, or focus to control output.\n%s" line_count summary) (extract_cst_comments cst))) | "sx_summarise" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let depth = args |> member "depth" |> to_int in - text_result (inject_comments (value_to_string (call_sx "summarise" [tree; Number (float_of_int depth)])) cmap) + text_result (inject_cst_comments (value_to_string (call_sx "summarise" [tree; Number (float_of_int depth)])) (extract_cst_comments cst)) | "sx_read_subtree" -> let tree = parse_file (args |> member "file" |> to_string) in @@ -679,14 +662,14 @@ let handle_tool name args = | "sx_get_siblings" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let path = resolve_path tree (args |> member "path" |> to_string) in let output = value_to_string (call_sx "get-siblings" [tree; path]) in (* Inject comments for top-level siblings *) let is_top_level = match path with | List [Number _] | List [Number _; Number _] -> true | _ -> false in - text_result (if is_top_level then inject_comments output cmap else output) + text_result (if is_top_level then inject_cst_comments output (extract_cst_comments cst) else output) | "sx_validate" -> let tree = parse_file (args |> member "file" |> to_string) in @@ -694,36 +677,31 @@ let handle_tool name args = | "sx_replace_node" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let path = resolve_path tree (args |> member "path" |> to_string) in - let raw_src = args |> member "new_source" |> to_string in - (* Extract leading comments from fragment, attach to target position *) - let top_idx = match path with List (Number n :: _) -> Some (int_of_float n) | _ -> None in - let src = match top_idx with - | Some idx -> extract_fragment_comments raw_src cmap idx - | None -> raw_src in - write_edit_with_comments file cmap (call_sx "replace-node" [tree; path; String src]) + let src = args |> member "new_source" |> to_string in + write_edit_cst file cst (call_sx "replace-node" [tree; path; String src]) | "sx_insert_child" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let path = resolve_path tree (args |> member "path" |> to_string) in let index = args |> member "index" |> to_int in let src = args |> member "new_source" |> to_string in - write_edit_with_comments file cmap (call_sx "insert-child" [tree; path; Number (float_of_int index); String src]) + write_edit_cst file cst (call_sx "insert-child" [tree; path; Number (float_of_int index); String src]) | "sx_delete_node" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let path = resolve_path tree (args |> member "path" |> to_string) in - write_edit_with_comments file cmap (call_sx "delete-node" [tree; path]) + write_edit_cst file cst (call_sx "delete-node" [tree; path]) | "sx_wrap_node" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let path = resolve_path tree (args |> member "path" |> to_string) in let wrapper = args |> member "wrapper" |> to_string in - write_edit_with_comments file cmap (call_sx "wrap-node" [tree; path; String wrapper]) + write_edit_cst file cst (call_sx "wrap-node" [tree; path; String wrapper]) | "sx_format_check" -> let file = args |> member "file" |> to_string in @@ -1592,13 +1570,13 @@ let handle_tool name args = | "sx_rename_symbol" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let old_name = args |> member "old_name" |> to_string in let new_name = args |> member "new_name" |> to_string in let new_tree = call_sx "rename-symbol" [tree; String old_name; String new_name] in let count = call_sx "count-renames" [tree; String old_name] in let count_str = value_to_string count in - write_edit_with_comments file cmap (Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d)) + write_edit_cst file cst (Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d)) |> (fun result -> match result with | `Assoc [("content", `List [`Assoc [("type", _); ("text", `String s)]])] when not (String.starts_with ~prefix:"Error" s) -> @@ -1607,34 +1585,22 @@ let handle_tool name args = | "sx_replace_by_pattern" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let pattern = args |> member "pattern" |> to_string in let src = args |> member "new_source" |> to_string in let all = args |> member "all" |> to_bool_option |> Option.value ~default:false in if all then - write_edit_with_comments file cmap (call_sx "replace-all-by-pattern" [tree; String pattern; String src]) + write_edit_cst file cst (call_sx "replace-all-by-pattern" [tree; String pattern; String src]) else - write_edit_with_comments file cmap (call_sx "replace-by-pattern" [tree; String pattern; String src]) + write_edit_cst file cst (call_sx "replace-by-pattern" [tree; String pattern; String src]) | "sx_insert_near" -> let file = args |> member "file" |> to_string in - let tree, cmap = parse_file_with_comments file in + let tree, cst = parse_file_cst file in let pattern = args |> member "pattern" |> to_string in let position = args |> member "position" |> to_string_option |> Option.value ~default:"after" in - let raw_src = args |> member "new_source" |> to_string in - (* Find the match index so we can attach fragment comments *) - let match_result = call_sx "find-all" [tree; String pattern] in - let target_idx = match match_result with - | List (List (List (Number n :: _) :: _) :: _) -> Some (int_of_float n) - | List (List (Number n :: _) :: _) -> Some (int_of_float n) - | _ -> None in - let insert_idx = match target_idx with - | Some n -> Some (if position = "before" then n else n + 1) - | None -> None in - let src = match insert_idx with - | Some idx -> extract_fragment_comments raw_src cmap idx - | None -> raw_src in - write_edit_with_comments file cmap (call_sx "insert-near-pattern" [tree; String pattern; String position; String src]) + let src = args |> member "new_source" |> to_string in + write_edit_cst file cst (call_sx "insert-near-pattern" [tree; String pattern; String position; String src]) | "sx_rename_across" -> let dir = args |> member "dir" |> to_string in @@ -1645,7 +1611,7 @@ let handle_tool name args = let results = List.filter_map (fun path -> let rel = relative_path ~base:dir path in try - let tree, cmap = parse_file_with_comments path in + let tree, cst = parse_file_cst path in let count = call_sx "count-renames" [tree; String old_name] in match count with | Number n when n > 0.0 -> @@ -1653,13 +1619,8 @@ let handle_tool name args = Some (Printf.sprintf "%s: %d occurrences (dry run)" rel (int_of_float n)) else begin let new_tree = call_sx "rename-symbol" [tree; String old_name; String new_name] in - let items = match new_tree with - | List items | ListRef { contents = items } -> items - | _ -> [new_tree] - in - let merged = reinterleave items cmap in - let source = pretty_print_file merged in - Out_channel.with_open_text path (fun oc -> output_string oc source); + let result = Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d) in + ignore (write_edit_cst path cst result); Some (Printf.sprintf "%s: %d occurrences renamed" rel (int_of_float n)) end | _ -> None