diff --git a/hosts/ocaml/bin/mcp_tree.ml b/hosts/ocaml/bin/mcp_tree.ml index fb0be1bb..1a3fe71b 100644 --- a/hosts/ocaml/bin/mcp_tree.ml +++ b/hosts/ocaml/bin/mcp_tree.ml @@ -308,11 +308,49 @@ let call_sx fn_name args = let fn = env_get e fn_name in Sx_ref.cek_call fn (List args) -let parse_file path = +let parse_file ?(comments=false) path = let src = In_channel.with_open_text path In_channel.input_all in - let exprs = Sx_parser.parse_all src in + let exprs = Sx_parser.parse_all ~comments src in List exprs +(* Comment preservation for edit round-trips. + Comments are separated before tree-tools operate (so paths stay correct), + then re-interleaved before writing. *) + +type comment_map = { + before : (int, value list) Hashtbl.t; (* expr_index → comments before it *) + trailing : value list; (* comments after last expression *) +} + +let separate_comments items = + let before = Hashtbl.create 16 in + let exprs = ref [] in + let pending = ref [] in + let idx = ref 0 in + List.iter (fun item -> + match item with + | Comment _ -> pending := item :: !pending + | _ -> + if !pending <> [] then + Hashtbl.replace before !idx (List.rev !pending); + pending := []; + exprs := item :: !exprs; + incr idx + ) items; + let trailing = List.rev !pending in + (List.rev !exprs, { before; trailing }) + +let reinterleave exprs cmap = + let result = ref [] in + List.iteri (fun i expr -> + (match Hashtbl.find_opt cmap.before i with + | Some cs -> List.iter (fun c -> result := c :: !result) cs + | None -> ()); + result := expr :: !result + ) exprs; + List.iter (fun c -> result := c :: !result) cmap.trailing; + List.rev !result + let parse_path_str s = (* Parse SX path string: "(0 3 2)" or "(0,3,2)" or "0 3 2" → SX list of numbers. Commas are unquote in SX, so strip them before parsing. *) @@ -437,13 +475,63 @@ let pretty_print_value ?(max_width=80) v = Buffer.contents buf let pretty_print_file exprs = - String.concat "\n\n" (List.map pretty_print_value exprs) ^ "\n" + (* Comments attach to the following expression — no blank line between + comment and expression. Non-comment expressions get one blank line + between them (matching the old String.concat "\n\n" behaviour). *) + let buf = Buffer.create 4096 in + let rec emit first prev_was_comment = function + | [] -> () + | Comment text :: rest -> + (* Blank line before comment block, unless it's the first item + or follows another comment *) + if not first && not prev_was_comment then Buffer.add_char buf '\n'; + Buffer.add_string buf text; + Buffer.add_char buf '\n'; + emit false true rest + | v :: rest -> + (* Blank line between non-comment expressions; no blank line + after a comment (comment sticks to its expression) *) + if not first && not prev_was_comment then Buffer.add_char buf '\n'; + Buffer.add_string buf (pretty_print_value v); + Buffer.add_char buf '\n'; + emit false false rest + in + emit true false exprs; + Buffer.contents buf + +(* Parse a file preserving comments, return clean tree + comment map *) +let parse_file_with_comments path = + let src = In_channel.with_open_text path In_channel.input_all in + let all_items = Sx_parser.parse_all ~comments:true src in + let exprs, cmap = separate_comments all_items in + (List exprs, cmap) + +(* Write an edited tree back with comments re-interleaved *) +let write_edit_with_comments file cmap result = + match result with + | Dict d -> + (match Hashtbl.find_opt d "ok" with + | Some new_tree -> + let items = match new_tree with + | List items | ListRef { contents = items } -> items + | _ -> [new_tree] + in + let merged = reinterleave items cmap in + let source = pretty_print_file merged in + Out_channel.with_open_text file (fun oc -> output_string oc source); + text_result (Printf.sprintf "OK — wrote %d bytes to %s" (String.length source) file) + | None -> + let err = match Hashtbl.find_opt d "error" with + | Some (String s) -> s | Some v -> value_to_string v | None -> "Unknown error" + in + error_result ("Error: " ^ err)) + | _ -> error_result "Unexpected result type" (* ------------------------------------------------------------------ *) (* Tool handlers *) (* ------------------------------------------------------------------ *) -let rec handle_tool name args = +let handle_tool name args = let open Yojson.Safe.Util in match name with | "sx_read_tree" -> @@ -519,31 +607,31 @@ let rec handle_tool name args = | "sx_replace_node" -> let file = args |> member "file" |> to_string in - let tree = parse_file file in + let tree, cmap = parse_file_with_comments file in let path = resolve_path tree (args |> member "path" |> to_string) in let src = args |> member "new_source" |> to_string in - write_edit file (call_sx "replace-node" [tree; path; String src]) + write_edit_with_comments file cmap (call_sx "replace-node" [tree; path; String src]) | "sx_insert_child" -> let file = args |> member "file" |> to_string in - let tree = parse_file file in + let tree, cmap = parse_file_with_comments file in let path = resolve_path tree (args |> member "path" |> to_string) in let index = args |> member "index" |> to_int in let src = args |> member "new_source" |> to_string in - write_edit file (call_sx "insert-child" [tree; path; Number (float_of_int index); String src]) + write_edit_with_comments file cmap (call_sx "insert-child" [tree; path; Number (float_of_int index); String src]) | "sx_delete_node" -> let file = args |> member "file" |> to_string in - let tree = parse_file file in + let tree, cmap = parse_file_with_comments file in let path = resolve_path tree (args |> member "path" |> to_string) in - write_edit file (call_sx "delete-node" [tree; path]) + write_edit_with_comments file cmap (call_sx "delete-node" [tree; path]) | "sx_wrap_node" -> let file = args |> member "file" |> to_string in - let tree = parse_file file in + let tree, cmap = parse_file_with_comments file in let path = resolve_path tree (args |> member "path" |> to_string) in let wrapper = args |> member "wrapper" |> to_string in - write_edit file (call_sx "wrap-node" [tree; path; String wrapper]) + write_edit_with_comments file cmap (call_sx "wrap-node" [tree; path; String wrapper]) | "sx_format_check" -> let file = args |> member "file" |> to_string in @@ -734,7 +822,7 @@ let rec handle_tool name args = | "sx_pretty_print" -> let file = args |> member "file" |> to_string in - let exprs = Sx_parser.parse_all (In_channel.with_open_text file In_channel.input_all) in + let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text file In_channel.input_all) in let source = pretty_print_file exprs in Out_channel.with_open_text file (fun oc -> output_string oc source); text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length exprs)) @@ -1364,9 +1452,9 @@ let rec handle_tool name args = | "sx_write_file" -> let file = args |> member "file" |> to_string in let source = args |> member "source" |> to_string in - (* Validate by parsing first *) + (* Validate by parsing first — preserve comments *) (try - let exprs = Sx_parser.parse_all source in + let exprs = Sx_parser.parse_all ~comments:true source in if exprs = [] then error_result "Source parsed to empty — nothing to write" else begin let output = pretty_print_file exprs in @@ -1377,13 +1465,13 @@ let rec handle_tool name args = | "sx_rename_symbol" -> let file = args |> member "file" |> to_string in - let tree = parse_file file in + let tree, cmap = parse_file_with_comments file in let old_name = args |> member "old_name" |> to_string in let new_name = args |> member "new_name" |> to_string in let new_tree = call_sx "rename-symbol" [tree; String old_name; String new_name] in let count = call_sx "count-renames" [tree; String old_name] in let count_str = value_to_string count in - write_edit file (Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d)) + write_edit_with_comments file cmap (Dict (let d = Hashtbl.create 2 in Hashtbl.replace d "ok" new_tree; d)) |> (fun result -> match result with | `Assoc [("content", `List [`Assoc [("type", _); ("text", `String s)]])] when not (String.starts_with ~prefix:"Error" s) -> @@ -1392,22 +1480,22 @@ let rec handle_tool name args = | "sx_replace_by_pattern" -> let file = args |> member "file" |> to_string in - let tree = parse_file file in + let tree, cmap = parse_file_with_comments file in let pattern = args |> member "pattern" |> to_string in let src = args |> member "new_source" |> to_string in let all = args |> member "all" |> to_bool_option |> Option.value ~default:false in if all then - write_edit file (call_sx "replace-all-by-pattern" [tree; String pattern; String src]) + write_edit_with_comments file cmap (call_sx "replace-all-by-pattern" [tree; String pattern; String src]) else - write_edit file (call_sx "replace-by-pattern" [tree; String pattern; String src]) + write_edit_with_comments file cmap (call_sx "replace-by-pattern" [tree; String pattern; String src]) | "sx_insert_near" -> let file = args |> member "file" |> to_string in - let tree = parse_file file in + let tree, cmap = parse_file_with_comments file in let pattern = args |> member "pattern" |> to_string in let position = args |> member "position" |> to_string_option |> Option.value ~default:"after" in let src = args |> member "new_source" |> to_string in - write_edit file (call_sx "insert-near-pattern" [tree; String pattern; String position; String src]) + write_edit_with_comments file cmap (call_sx "insert-near-pattern" [tree; String pattern; String position; String src]) | "sx_rename_across" -> let dir = args |> member "dir" |> to_string in @@ -1418,7 +1506,7 @@ let rec handle_tool name args = let results = List.filter_map (fun path -> let rel = relative_path ~base:dir path in try - let tree = parse_file path in + let tree, cmap = parse_file_with_comments path in let count = call_sx "count-renames" [tree; String old_name] in match count with | Number n when n > 0.0 -> @@ -1430,7 +1518,8 @@ let rec handle_tool name args = | List items | ListRef { contents = items } -> items | _ -> [new_tree] in - let source = pretty_print_file items in + let merged = reinterleave items cmap in + let source = pretty_print_file merged in Out_channel.with_open_text path (fun oc -> output_string oc source); Some (Printf.sprintf "%s: %d occurrences renamed" rel (int_of_float n)) end @@ -2095,25 +2184,6 @@ let rec handle_tool name args = | _ -> error_result ("Unknown tool: " ^ name) -and write_edit file result = - match result with - | Dict d -> - (match Hashtbl.find_opt d "ok" with - | Some new_tree -> - let items = match new_tree with - | List items | ListRef { contents = items } -> items - | _ -> [new_tree] - in - let source = pretty_print_file items in - Out_channel.with_open_text file (fun oc -> output_string oc source); - text_result (Printf.sprintf "OK — wrote %d bytes to %s" (String.length source) file) - | None -> - let err = match Hashtbl.find_opt d "error" with - | Some (String s) -> s | Some v -> value_to_string v | None -> "Unknown error" - in - error_result ("Error: " ^ err)) - | _ -> error_result "Unexpected result type" - (* ------------------------------------------------------------------ *) (* MCP tool definitions *) (* ------------------------------------------------------------------ *) diff --git a/hosts/ocaml/lib/sx_parser.ml b/hosts/ocaml/lib/sx_parser.ml index 307d6270..f71d1364 100644 --- a/hosts/ocaml/lib/sx_parser.ml +++ b/hosts/ocaml/lib/sx_parser.ml @@ -30,6 +30,23 @@ let skip_whitespace_and_comments s = | _ -> () in go () +(* Skip whitespace only — leaves comments for capture *) +let skip_whitespace s = + let rec go () = + if at_end s then () + else match s.src.[s.pos] with + | ' ' | '\t' | '\n' | '\r' -> advance s; go () + | _ -> () + in go () + +(* Read a comment line starting at ';', returns text after ";;" (or ";") *) +let read_comment s = + let start = s.pos in + while s.pos < s.len && s.src.[s.pos] <> '\n' do advance s done; + let text = String.sub s.src start (s.pos - start) in + if s.pos < s.len then advance s; + text + (* Character classification — matches spec/parser.sx ident-start/ident-char. ident-start: a-z A-Z _ ~ * + - > < = / ! ? & ident-char: ident-start plus 0-9 . : / # , *) @@ -202,23 +219,48 @@ and read_dict s = in go () -(** Parse a string into a list of SX values. *) -let parse_all src = +(** Parse a string into a list of SX values. + When [~comments:true], top-level ;; comments are preserved as [Comment] + nodes in the result list. Default is [false] (strip comments). *) +let parse_all ?(comments=false) src = let s = make_state src in let results = ref [] in let rec go () = - skip_whitespace_and_comments s; - if at_end s then List.rev !results - else begin - results := read_value s :: !results; - go () + if comments then begin + skip_whitespace s; + if at_end s then List.rev !results + else if s.src.[s.pos] = ';' then begin + (* Collect consecutive comment lines into one Comment node *) + let lines = ref [] in + let rec collect () = + skip_whitespace s; + if not (at_end s) && s.src.[s.pos] = ';' then begin + lines := read_comment s :: !lines; + collect () + end + in + collect (); + let text = String.concat "\n" (List.rev !lines) in + results := Comment text :: !results; + go () + end else begin + results := read_value s :: !results; + go () + end + end else begin + skip_whitespace_and_comments s; + if at_end s then List.rev !results + else begin + results := read_value s :: !results; + go () + end end in go () (** Parse a file into a list of SX values. *) -let parse_file path = +let parse_file ?(comments=false) path = let ic = open_in path in let n = in_channel_length ic in let src = really_input_string ic n in close_in ic; - parse_all src + parse_all ~comments src diff --git a/hosts/ocaml/lib/sx_types.ml b/hosts/ocaml/lib/sx_types.ml index b875a51f..a02b0030 100644 --- a/hosts/ocaml/lib/sx_types.ml +++ b/hosts/ocaml/lib/sx_types.ml @@ -67,6 +67,7 @@ and value = | CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *) | CekFrame of cek_frame (** Optimized CEK continuation frame. *) | VmClosure of vm_closure (** VM-compiled closure — callable within the VM without allocating a new VM. *) + | Comment of string (** Source comment — preserved through parse/edit/pretty-print round-trips. *) (** CEK machine state — record instead of Dict for performance. 5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *) @@ -347,6 +348,7 @@ let type_of = function | CekState _ -> "dict" (* CEK state behaves as a dict for type checks *) | CekFrame _ -> "dict" | VmClosure _ -> "function" + | Comment _ -> "comment" let is_nil = function Nil -> true | _ -> false let is_lambda = function Lambda _ -> true | _ -> false @@ -541,3 +543,4 @@ let rec inspect = function | CekState _ -> "" | CekFrame f -> Printf.sprintf "" f.cf_type | VmClosure cl -> Printf.sprintf "" (match cl.vm_name with Some n -> n | None -> "anon") + | Comment text -> ";;" ^ text