diff --git a/hosts/ocaml/bin/mcp_tree.ml b/hosts/ocaml/bin/mcp_tree.ml index 8dfdc068..e52448ea 100644 --- a/hosts/ocaml/bin/mcp_tree.ml +++ b/hosts/ocaml/bin/mcp_tree.ml @@ -308,9 +308,9 @@ let call_sx fn_name args = let fn = env_get e fn_name in Sx_ref.cek_call fn (List args) -let parse_file ?(comments=false) path = +let parse_file path = let src = In_channel.with_open_text path In_channel.input_all in - let exprs = Sx_parser.parse_all ~comments src in + let exprs = Sx_parser.parse_all src in List exprs (* CST-based round-tripping — replaces comment_map machinery. @@ -461,23 +461,12 @@ let rec est_width = function 2 + List.fold_left (fun acc x -> acc + est_width x + 1) 0 items | _ -> 10 -let rec has_interior_comments = function - | List items | ListRef { contents = items } -> - List.exists (fun item -> match item with - | Comment _ -> true - | List _ | ListRef _ -> has_interior_comments item - | _ -> false - ) items - | _ -> false - let pretty_print_value ?(max_width=80) v = let buf = Buffer.create 4096 in let rec pp indent v = match v with - | Comment text -> - Buffer.add_string buf text | List items | ListRef { contents = items } when items <> [] -> - if est_width v <= max_width - indent && not (has_interior_comments v) then + if est_width v <= max_width - indent then (* Fits on one line and has no comments *) Buffer.add_string buf (pp_atom v) else begin @@ -489,12 +478,6 @@ let pretty_print_value ?(max_width=80) v = let rest = List.tl items in let rec emit = function | [] -> () - | Comment text :: rest -> - (* Interior comment: indented, on its own line *) - Buffer.add_char buf '\n'; - Buffer.add_string buf (String.make child_indent ' '); - Buffer.add_string buf text; - emit rest | Keyword k :: v :: rest -> Buffer.add_char buf '\n'; Buffer.add_string buf (String.make child_indent ' '); @@ -517,30 +500,6 @@ let pretty_print_value ?(max_width=80) v = pp 0 v; Buffer.contents buf -let pretty_print_file exprs = - (* Comments attach to the following expression — no blank line between - comment and expression. Non-comment expressions get one blank line - between them (matching the old String.concat "\n\n" behaviour). *) - let buf = Buffer.create 4096 in - let rec emit first prev_was_comment = function - | [] -> () - | Comment text :: rest -> - (* Blank line before comment block, unless it's the first item - or follows another comment *) - if not first && not prev_was_comment then Buffer.add_char buf '\n'; - Buffer.add_string buf text; - Buffer.add_char buf '\n'; - emit false true rest - | v :: rest -> - (* Blank line between non-comment expressions; no blank line - after a comment (comment sticks to its expression) *) - if not first && not prev_was_comment then Buffer.add_char buf '\n'; - Buffer.add_string buf (pretty_print_value v); - Buffer.add_char buf '\n'; - emit false false rest - in - emit true false exprs; - Buffer.contents buf (* Apply an AST-level edit result back to the CST and write the file. @@ -892,10 +851,28 @@ let handle_tool name args = | "sx_pretty_print" -> let file = args |> member "file" |> to_string in - let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text file In_channel.input_all) in - let source = pretty_print_file exprs in + let cst = Sx_parser.parse_file_cst file in + (* Reformat each node's code while preserving trivia (comments, spacing) *) + let reformatted = List.map (fun node -> + let trivia = match node with + | Sx_cst.CstAtom r -> r.leading_trivia + | Sx_cst.CstList r -> r.leading_trivia + | Sx_cst.CstDict r -> r.leading_trivia + in + let ast = Sx_cst.cst_to_ast node in + let pp = pretty_print_value ast in + let new_cst = Sx_parser.parse_all_cst pp in + match new_cst.nodes with + | [n] -> + (match n with + | Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = trivia } + | Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = trivia } + | Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = trivia }) + | _ -> node + ) cst.nodes in + let source = Sx_cst.cst_file_to_source reformatted cst.trailing_trivia in Out_channel.with_open_text file (fun oc -> output_string oc source); - text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length exprs)) + text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length cst.nodes)) | "sx_changed" -> let base_ref = args |> member "ref" |> to_string_option |> Option.value ~default:"main" in @@ -1021,14 +998,24 @@ let handle_tool name args = let all_docs = List.concat_map (fun path -> let rel = relative_path ~base:dir path in try - let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text path In_channel.input_all) in - (* Walk list tracking preceding comment *) - let rec collect prev_comment = function - | [] -> [] - | Comment text :: rest -> collect (Some text) rest - | (List (Symbol head :: Symbol name :: params_rest) as _expr) :: rest - | (ListRef { contents = Symbol head :: Symbol name :: params_rest } as _expr) :: rest -> - let doc = match head with + let cst = Sx_parser.parse_file_cst path in + List.filter_map (fun node -> + (* Extract leading comment trivia from CST node *) + let trivia = match node with + | Sx_cst.CstAtom r -> r.leading_trivia + | Sx_cst.CstList r -> r.leading_trivia + | Sx_cst.CstDict r -> r.leading_trivia + in + let comment_text = List.filter_map (function + | Sx_cst.LineComment text -> Some text | _ -> None + ) trivia in + let prev_comment = if comment_text = [] then None + else Some (String.concat "" comment_text) in + let expr = Sx_cst.cst_to_ast node in + match expr with + | List (Symbol head :: Symbol name :: params_rest) + | ListRef { contents = Symbol head :: Symbol name :: params_rest } -> + (match head with | "defcomp" | "defisland" -> let params_str = match params_rest with | List ps :: _ | ListRef { contents = ps } :: _ -> @@ -1058,12 +1045,9 @@ let handle_tool name args = | None -> "" in Some (Printf.sprintf "## %s `%s`\nDefined in: %s\nType: macro\n%s" head name rel comment_str) - | _ -> None - in - (match doc with Some d -> d :: collect None rest | None -> collect None rest) - | _ :: rest -> collect None rest - in - collect None exprs + | _ -> None) + | _ -> None + ) cst.nodes with _ -> [] ) files in if all_docs = [] then text_result "(no components found)" @@ -1557,14 +1541,32 @@ let handle_tool name args = | "sx_write_file" -> let file = args |> member "file" |> to_string in let source = args |> member "source" |> to_string in - (* Validate by parsing first — preserve comments *) + (* Validate by parsing as CST — preserves comments and formatting *) (try - let exprs = Sx_parser.parse_all ~comments:true source in - if exprs = [] then error_result "Source parsed to empty — nothing to write" + let cst = Sx_parser.parse_all_cst source in + if cst.nodes = [] then error_result "Source parsed to empty — nothing to write" else begin - let output = pretty_print_file exprs in + (* Pretty-print each node but keep trivia *) + let reformatted = List.map (fun node -> + let trivia = match node with + | Sx_cst.CstAtom r -> r.leading_trivia + | Sx_cst.CstList r -> r.leading_trivia + | Sx_cst.CstDict r -> r.leading_trivia + in + let ast = Sx_cst.cst_to_ast node in + let pp = pretty_print_value ast in + let new_cst = Sx_parser.parse_all_cst pp in + match new_cst.nodes with + | [n] -> + (match n with + | Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = trivia } + | Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = trivia } + | Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = trivia }) + | _ -> node + ) cst.nodes in + let output = Sx_cst.cst_file_to_source reformatted cst.trailing_trivia in Out_channel.with_open_text file (fun oc -> output_string oc output); - text_result (Printf.sprintf "OK — wrote %d bytes (%d top-level forms) to %s" (String.length output) (List.length exprs) file) + text_result (Printf.sprintf "OK — wrote %d bytes (%d top-level forms) to %s" (String.length output) (List.length cst.nodes) file) end with e -> error_result (Printf.sprintf "Parse error — file not written: %s" (Printexc.to_string e))) diff --git a/hosts/ocaml/lib/sx_parser.ml b/hosts/ocaml/lib/sx_parser.ml index f508dda9..e235c967 100644 --- a/hosts/ocaml/lib/sx_parser.ml +++ b/hosts/ocaml/lib/sx_parser.ml @@ -30,23 +30,6 @@ let skip_whitespace_and_comments s = | _ -> () in go () -(* Skip whitespace only — leaves comments for capture *) -let skip_whitespace s = - let rec go () = - if at_end s then () - else match s.src.[s.pos] with - | ' ' | '\t' | '\n' | '\r' -> advance s; go () - | _ -> () - in go () - -(* Read a comment line starting at ';', returns text after ";;" (or ";") *) -let read_comment s = - let start = s.pos in - while s.pos < s.len && s.src.[s.pos] <> '\n' do advance s done; - let text = String.sub s.src start (s.pos - start) in - if s.pos < s.len then advance s; - text - (* Character classification — matches spec/parser.sx ident-start/ident-char. ident-start: a-z A-Z _ ~ * + - > < = / ! ? & ident-char: ident-start plus 0-9 . : / # , *) @@ -110,35 +93,7 @@ let try_number str = | Some n -> Some (Number n) | None -> None -(* Module-level flag — when true, comments are captured as Comment nodes. - Set by parse_all ~comments:true, reset after. *) -let _preserve_comments = ref false - -(* Collect consecutive comment lines into a Comment node *) -let collect_comment_node s = - let lines = ref [] in - let rec go () = - skip_whitespace s; - if not (at_end s) && s.src.[s.pos] = ';' then begin - lines := read_comment s :: !lines; - go () - end - in - go (); - Comment (String.concat "\n" (List.rev !lines)) - let rec read_value s : value = - (* In comment-preserving mode, check for comments first *) - if !_preserve_comments then begin - skip_whitespace s; - if not (at_end s) && s.src.[s.pos] = ';' then - collect_comment_node s - else - read_value_core s - end else - read_value_core s - -and read_value_core s : value = skip_whitespace_and_comments s; if at_end s then begin let line = ref 1 in @@ -212,29 +167,14 @@ and read_list s close_char = advance s; (* skip opening paren/bracket *) let items = ref [] in let rec go () = - if !_preserve_comments then begin - skip_whitespace s; - if at_end s then raise (Parse_error "Unterminated list"); - if s.src.[s.pos] = close_char then begin - advance s; - List (List.rev !items) - end else if s.src.[s.pos] = ';' then begin - items := collect_comment_node s :: !items; - go () - end else begin - items := read_value_core s :: !items; - go () - end + skip_whitespace_and_comments s; + if at_end s then raise (Parse_error "Unterminated list"); + if s.src.[s.pos] = close_char then begin + advance s; + List (List.rev !items) end else begin - skip_whitespace_and_comments s; - if at_end s then raise (Parse_error "Unterminated list"); - if s.src.[s.pos] = close_char then begin - advance s; - List (List.rev !items) - end else begin - items := read_value s :: !items; - go () - end + items := read_value s :: !items; + go () end in go () @@ -262,34 +202,26 @@ and read_dict s = in go () -(** Parse a string into a list of SX values. - When [~comments:true], comments are preserved as [Comment] nodes — - both at top level and inside lists. Default is [false] (strip). *) -let parse_all ?(comments=false) src = - _preserve_comments := comments; +(** Parse a string into a list of SX values (AST — comments stripped). *) +let parse_all src = let s = make_state src in let results = ref [] in let rec go () = - if !_preserve_comments then skip_whitespace s - else skip_whitespace_and_comments s; - if at_end s then ( - _preserve_comments := false; - List.rev !results - ) else begin + skip_whitespace_and_comments s; + if at_end s then List.rev !results + else begin results := read_value s :: !results; go () end - in - try go () - with e -> _preserve_comments := false; raise e + in go () -(** Parse a file into a list of SX values. *) -let parse_file ?(comments=false) path = +(** Parse a file into a list of SX values (AST — comments stripped). *) +let parse_file path = let ic = open_in path in let n = in_channel_length ic in let src = really_input_string ic n in close_in ic; - parse_all ~comments src + parse_all src (* ================================================================== *) diff --git a/hosts/ocaml/lib/sx_types.ml b/hosts/ocaml/lib/sx_types.ml index a02b0030..b875a51f 100644 --- a/hosts/ocaml/lib/sx_types.ml +++ b/hosts/ocaml/lib/sx_types.ml @@ -67,7 +67,6 @@ and value = | CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *) | CekFrame of cek_frame (** Optimized CEK continuation frame. *) | VmClosure of vm_closure (** VM-compiled closure — callable within the VM without allocating a new VM. *) - | Comment of string (** Source comment — preserved through parse/edit/pretty-print round-trips. *) (** CEK machine state — record instead of Dict for performance. 5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *) @@ -348,7 +347,6 @@ let type_of = function | CekState _ -> "dict" (* CEK state behaves as a dict for type checks *) | CekFrame _ -> "dict" | VmClosure _ -> "function" - | Comment _ -> "comment" let is_nil = function Nil -> true | _ -> false let is_lambda = function Lambda _ -> true | _ -> false @@ -543,4 +541,3 @@ let rec inspect = function | CekState _ -> "" | CekFrame f -> Printf.sprintf "" f.cf_type | VmClosure cl -> Printf.sprintf "" (match cl.vm_name with Some n -> n | None -> "anon") - | Comment text -> ";;" ^ text