Remove Comment variant and old comment-mode parser — CST handles all

Delete from sx_types.ml:
- Comment of string variant (no longer needed)

Delete from sx_parser.ml:
- _preserve_comments mutable ref
- collect_comment_node function
- comment-mode branches in read_value, read_list
- ~comments parameter from parse_all and parse_file
- skip_whitespace and read_comment (only used by old comment mode)

Delete from mcp_tree.ml:
- has_interior_comments function
- Comment handling in pretty_print_value
- pretty_print_file function (replaced by CST write-back)
- ~comments parameter from local parse_file

Migrate sx_pretty_print, sx_write_file, sx_doc_gen to CST path.
Net: -69 lines. 24/24 CST round-trips, 2583/2583 evaluator tests.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-03 18:19:19 +00:00
parent af63d49451
commit 9b8a8dd272
3 changed files with 84 additions and 153 deletions

View File

@@ -308,9 +308,9 @@ let call_sx fn_name args =
let fn = env_get e fn_name in
Sx_ref.cek_call fn (List args)
let parse_file ?(comments=false) path =
let parse_file path =
let src = In_channel.with_open_text path In_channel.input_all in
let exprs = Sx_parser.parse_all ~comments src in
let exprs = Sx_parser.parse_all src in
List exprs
(* CST-based round-tripping — replaces comment_map machinery.
@@ -461,23 +461,12 @@ let rec est_width = function
2 + List.fold_left (fun acc x -> acc + est_width x + 1) 0 items
| _ -> 10
let rec has_interior_comments = function
| List items | ListRef { contents = items } ->
List.exists (fun item -> match item with
| Comment _ -> true
| List _ | ListRef _ -> has_interior_comments item
| _ -> false
) items
| _ -> false
let pretty_print_value ?(max_width=80) v =
let buf = Buffer.create 4096 in
let rec pp indent v =
match v with
| Comment text ->
Buffer.add_string buf text
| List items | ListRef { contents = items } when items <> [] ->
if est_width v <= max_width - indent && not (has_interior_comments v) then
if est_width v <= max_width - indent then
(* Fits on one line and has no comments *)
Buffer.add_string buf (pp_atom v)
else begin
@@ -489,12 +478,6 @@ let pretty_print_value ?(max_width=80) v =
let rest = List.tl items in
let rec emit = function
| [] -> ()
| Comment text :: rest ->
(* Interior comment: indented, on its own line *)
Buffer.add_char buf '\n';
Buffer.add_string buf (String.make child_indent ' ');
Buffer.add_string buf text;
emit rest
| Keyword k :: v :: rest ->
Buffer.add_char buf '\n';
Buffer.add_string buf (String.make child_indent ' ');
@@ -517,30 +500,6 @@ let pretty_print_value ?(max_width=80) v =
pp 0 v;
Buffer.contents buf
let pretty_print_file exprs =
(* Comments attach to the following expression — no blank line between
comment and expression. Non-comment expressions get one blank line
between them (matching the old String.concat "\n\n" behaviour). *)
let buf = Buffer.create 4096 in
let rec emit first prev_was_comment = function
| [] -> ()
| Comment text :: rest ->
(* Blank line before comment block, unless it's the first item
or follows another comment *)
if not first && not prev_was_comment then Buffer.add_char buf '\n';
Buffer.add_string buf text;
Buffer.add_char buf '\n';
emit false true rest
| v :: rest ->
(* Blank line between non-comment expressions; no blank line
after a comment (comment sticks to its expression) *)
if not first && not prev_was_comment then Buffer.add_char buf '\n';
Buffer.add_string buf (pretty_print_value v);
Buffer.add_char buf '\n';
emit false false rest
in
emit true false exprs;
Buffer.contents buf
(* Apply an AST-level edit result back to the CST and write the file.
@@ -892,10 +851,28 @@ let handle_tool name args =
| "sx_pretty_print" ->
let file = args |> member "file" |> to_string in
let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text file In_channel.input_all) in
let source = pretty_print_file exprs in
let cst = Sx_parser.parse_file_cst file in
(* Reformat each node's code while preserving trivia (comments, spacing) *)
let reformatted = List.map (fun node ->
let trivia = match node with
| Sx_cst.CstAtom r -> r.leading_trivia
| Sx_cst.CstList r -> r.leading_trivia
| Sx_cst.CstDict r -> r.leading_trivia
in
let ast = Sx_cst.cst_to_ast node in
let pp = pretty_print_value ast in
let new_cst = Sx_parser.parse_all_cst pp in
match new_cst.nodes with
| [n] ->
(match n with
| Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = trivia }
| Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = trivia }
| Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = trivia })
| _ -> node
) cst.nodes in
let source = Sx_cst.cst_file_to_source reformatted cst.trailing_trivia in
Out_channel.with_open_text file (fun oc -> output_string oc source);
text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length exprs))
text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length cst.nodes))
| "sx_changed" ->
let base_ref = args |> member "ref" |> to_string_option |> Option.value ~default:"main" in
@@ -1021,14 +998,24 @@ let handle_tool name args =
let all_docs = List.concat_map (fun path ->
let rel = relative_path ~base:dir path in
try
let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text path In_channel.input_all) in
(* Walk list tracking preceding comment *)
let rec collect prev_comment = function
| [] -> []
| Comment text :: rest -> collect (Some text) rest
| (List (Symbol head :: Symbol name :: params_rest) as _expr) :: rest
| (ListRef { contents = Symbol head :: Symbol name :: params_rest } as _expr) :: rest ->
let doc = match head with
let cst = Sx_parser.parse_file_cst path in
List.filter_map (fun node ->
(* Extract leading comment trivia from CST node *)
let trivia = match node with
| Sx_cst.CstAtom r -> r.leading_trivia
| Sx_cst.CstList r -> r.leading_trivia
| Sx_cst.CstDict r -> r.leading_trivia
in
let comment_text = List.filter_map (function
| Sx_cst.LineComment text -> Some text | _ -> None
) trivia in
let prev_comment = if comment_text = [] then None
else Some (String.concat "" comment_text) in
let expr = Sx_cst.cst_to_ast node in
match expr with
| List (Symbol head :: Symbol name :: params_rest)
| ListRef { contents = Symbol head :: Symbol name :: params_rest } ->
(match head with
| "defcomp" | "defisland" ->
let params_str = match params_rest with
| List ps :: _ | ListRef { contents = ps } :: _ ->
@@ -1058,12 +1045,9 @@ let handle_tool name args =
| None -> ""
in
Some (Printf.sprintf "## %s `%s`\nDefined in: %s\nType: macro\n%s" head name rel comment_str)
| _ -> None
in
(match doc with Some d -> d :: collect None rest | None -> collect None rest)
| _ :: rest -> collect None rest
in
collect None exprs
| _ -> None)
| _ -> None
) cst.nodes
with _ -> []
) files in
if all_docs = [] then text_result "(no components found)"
@@ -1557,14 +1541,32 @@ let handle_tool name args =
| "sx_write_file" ->
let file = args |> member "file" |> to_string in
let source = args |> member "source" |> to_string in
(* Validate by parsing first — preserve comments *)
(* Validate by parsing as CST — preserves comments and formatting *)
(try
let exprs = Sx_parser.parse_all ~comments:true source in
if exprs = [] then error_result "Source parsed to empty — nothing to write"
let cst = Sx_parser.parse_all_cst source in
if cst.nodes = [] then error_result "Source parsed to empty — nothing to write"
else begin
let output = pretty_print_file exprs in
(* Pretty-print each node but keep trivia *)
let reformatted = List.map (fun node ->
let trivia = match node with
| Sx_cst.CstAtom r -> r.leading_trivia
| Sx_cst.CstList r -> r.leading_trivia
| Sx_cst.CstDict r -> r.leading_trivia
in
let ast = Sx_cst.cst_to_ast node in
let pp = pretty_print_value ast in
let new_cst = Sx_parser.parse_all_cst pp in
match new_cst.nodes with
| [n] ->
(match n with
| Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = trivia }
| Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = trivia }
| Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = trivia })
| _ -> node
) cst.nodes in
let output = Sx_cst.cst_file_to_source reformatted cst.trailing_trivia in
Out_channel.with_open_text file (fun oc -> output_string oc output);
text_result (Printf.sprintf "OK — wrote %d bytes (%d top-level forms) to %s" (String.length output) (List.length exprs) file)
text_result (Printf.sprintf "OK — wrote %d bytes (%d top-level forms) to %s" (String.length output) (List.length cst.nodes) file)
end
with e -> error_result (Printf.sprintf "Parse error — file not written: %s" (Printexc.to_string e)))

View File

@@ -30,23 +30,6 @@ let skip_whitespace_and_comments s =
| _ -> ()
in go ()
(* Skip whitespace only — leaves comments for capture *)
let skip_whitespace s =
let rec go () =
if at_end s then ()
else match s.src.[s.pos] with
| ' ' | '\t' | '\n' | '\r' -> advance s; go ()
| _ -> ()
in go ()
(* Read a comment line starting at ';', returns text after ";;" (or ";") *)
let read_comment s =
let start = s.pos in
while s.pos < s.len && s.src.[s.pos] <> '\n' do advance s done;
let text = String.sub s.src start (s.pos - start) in
if s.pos < s.len then advance s;
text
(* Character classification — matches spec/parser.sx ident-start/ident-char.
ident-start: a-z A-Z _ ~ * + - > < = / ! ? &
ident-char: ident-start plus 0-9 . : / # , *)
@@ -110,35 +93,7 @@ let try_number str =
| Some n -> Some (Number n)
| None -> None
(* Module-level flag — when true, comments are captured as Comment nodes.
Set by parse_all ~comments:true, reset after. *)
let _preserve_comments = ref false
(* Collect consecutive comment lines into a Comment node *)
let collect_comment_node s =
let lines = ref [] in
let rec go () =
skip_whitespace s;
if not (at_end s) && s.src.[s.pos] = ';' then begin
lines := read_comment s :: !lines;
go ()
end
in
go ();
Comment (String.concat "\n" (List.rev !lines))
let rec read_value s : value =
(* In comment-preserving mode, check for comments first *)
if !_preserve_comments then begin
skip_whitespace s;
if not (at_end s) && s.src.[s.pos] = ';' then
collect_comment_node s
else
read_value_core s
end else
read_value_core s
and read_value_core s : value =
skip_whitespace_and_comments s;
if at_end s then begin
let line = ref 1 in
@@ -212,29 +167,14 @@ and read_list s close_char =
advance s; (* skip opening paren/bracket *)
let items = ref [] in
let rec go () =
if !_preserve_comments then begin
skip_whitespace s;
if at_end s then raise (Parse_error "Unterminated list");
if s.src.[s.pos] = close_char then begin
advance s;
List (List.rev !items)
end else if s.src.[s.pos] = ';' then begin
items := collect_comment_node s :: !items;
go ()
end else begin
items := read_value_core s :: !items;
go ()
end
skip_whitespace_and_comments s;
if at_end s then raise (Parse_error "Unterminated list");
if s.src.[s.pos] = close_char then begin
advance s;
List (List.rev !items)
end else begin
skip_whitespace_and_comments s;
if at_end s then raise (Parse_error "Unterminated list");
if s.src.[s.pos] = close_char then begin
advance s;
List (List.rev !items)
end else begin
items := read_value s :: !items;
go ()
end
items := read_value s :: !items;
go ()
end
in go ()
@@ -262,34 +202,26 @@ and read_dict s =
in go ()
(** Parse a string into a list of SX values.
When [~comments:true], comments are preserved as [Comment] nodes —
both at top level and inside lists. Default is [false] (strip). *)
let parse_all ?(comments=false) src =
_preserve_comments := comments;
(** Parse a string into a list of SX values (AST — comments stripped). *)
let parse_all src =
let s = make_state src in
let results = ref [] in
let rec go () =
if !_preserve_comments then skip_whitespace s
else skip_whitespace_and_comments s;
if at_end s then (
_preserve_comments := false;
List.rev !results
) else begin
skip_whitespace_and_comments s;
if at_end s then List.rev !results
else begin
results := read_value s :: !results;
go ()
end
in
try go ()
with e -> _preserve_comments := false; raise e
in go ()
(** Parse a file into a list of SX values. *)
let parse_file ?(comments=false) path =
(** Parse a file into a list of SX values (AST — comments stripped). *)
let parse_file path =
let ic = open_in path in
let n = in_channel_length ic in
let src = really_input_string ic n in
close_in ic;
parse_all ~comments src
parse_all src
(* ================================================================== *)

View File

@@ -67,7 +67,6 @@ and value =
| CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *)
| CekFrame of cek_frame (** Optimized CEK continuation frame. *)
| VmClosure of vm_closure (** VM-compiled closure — callable within the VM without allocating a new VM. *)
| Comment of string (** Source comment — preserved through parse/edit/pretty-print round-trips. *)
(** CEK machine state — record instead of Dict for performance.
5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *)
@@ -348,7 +347,6 @@ let type_of = function
| CekState _ -> "dict" (* CEK state behaves as a dict for type checks *)
| CekFrame _ -> "dict"
| VmClosure _ -> "function"
| Comment _ -> "comment"
let is_nil = function Nil -> true | _ -> false
let is_lambda = function Lambda _ -> true | _ -> false
@@ -543,4 +541,3 @@ let rec inspect = function
| CekState _ -> "<cek-state>"
| CekFrame f -> Printf.sprintf "<frame:%s>" f.cf_type
| VmClosure cl -> Printf.sprintf "<vm:%s>" (match cl.vm_name with Some n -> n | None -> "anon")
| Comment text -> ";;" ^ text