Remove Comment variant and old comment-mode parser — CST handles all
Delete from sx_types.ml: - Comment of string variant (no longer needed) Delete from sx_parser.ml: - _preserve_comments mutable ref - collect_comment_node function - comment-mode branches in read_value, read_list - ~comments parameter from parse_all and parse_file - skip_whitespace and read_comment (only used by old comment mode) Delete from mcp_tree.ml: - has_interior_comments function - Comment handling in pretty_print_value - pretty_print_file function (replaced by CST write-back) - ~comments parameter from local parse_file Migrate sx_pretty_print, sx_write_file, sx_doc_gen to CST path. Net: -69 lines. 24/24 CST round-trips, 2583/2583 evaluator tests. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -308,9 +308,9 @@ let call_sx fn_name args =
|
||||
let fn = env_get e fn_name in
|
||||
Sx_ref.cek_call fn (List args)
|
||||
|
||||
let parse_file ?(comments=false) path =
|
||||
let parse_file path =
|
||||
let src = In_channel.with_open_text path In_channel.input_all in
|
||||
let exprs = Sx_parser.parse_all ~comments src in
|
||||
let exprs = Sx_parser.parse_all src in
|
||||
List exprs
|
||||
|
||||
(* CST-based round-tripping — replaces comment_map machinery.
|
||||
@@ -461,23 +461,12 @@ let rec est_width = function
|
||||
2 + List.fold_left (fun acc x -> acc + est_width x + 1) 0 items
|
||||
| _ -> 10
|
||||
|
||||
let rec has_interior_comments = function
|
||||
| List items | ListRef { contents = items } ->
|
||||
List.exists (fun item -> match item with
|
||||
| Comment _ -> true
|
||||
| List _ | ListRef _ -> has_interior_comments item
|
||||
| _ -> false
|
||||
) items
|
||||
| _ -> false
|
||||
|
||||
let pretty_print_value ?(max_width=80) v =
|
||||
let buf = Buffer.create 4096 in
|
||||
let rec pp indent v =
|
||||
match v with
|
||||
| Comment text ->
|
||||
Buffer.add_string buf text
|
||||
| List items | ListRef { contents = items } when items <> [] ->
|
||||
if est_width v <= max_width - indent && not (has_interior_comments v) then
|
||||
if est_width v <= max_width - indent then
|
||||
(* Fits on one line and has no comments *)
|
||||
Buffer.add_string buf (pp_atom v)
|
||||
else begin
|
||||
@@ -489,12 +478,6 @@ let pretty_print_value ?(max_width=80) v =
|
||||
let rest = List.tl items in
|
||||
let rec emit = function
|
||||
| [] -> ()
|
||||
| Comment text :: rest ->
|
||||
(* Interior comment: indented, on its own line *)
|
||||
Buffer.add_char buf '\n';
|
||||
Buffer.add_string buf (String.make child_indent ' ');
|
||||
Buffer.add_string buf text;
|
||||
emit rest
|
||||
| Keyword k :: v :: rest ->
|
||||
Buffer.add_char buf '\n';
|
||||
Buffer.add_string buf (String.make child_indent ' ');
|
||||
@@ -517,30 +500,6 @@ let pretty_print_value ?(max_width=80) v =
|
||||
pp 0 v;
|
||||
Buffer.contents buf
|
||||
|
||||
let pretty_print_file exprs =
|
||||
(* Comments attach to the following expression — no blank line between
|
||||
comment and expression. Non-comment expressions get one blank line
|
||||
between them (matching the old String.concat "\n\n" behaviour). *)
|
||||
let buf = Buffer.create 4096 in
|
||||
let rec emit first prev_was_comment = function
|
||||
| [] -> ()
|
||||
| Comment text :: rest ->
|
||||
(* Blank line before comment block, unless it's the first item
|
||||
or follows another comment *)
|
||||
if not first && not prev_was_comment then Buffer.add_char buf '\n';
|
||||
Buffer.add_string buf text;
|
||||
Buffer.add_char buf '\n';
|
||||
emit false true rest
|
||||
| v :: rest ->
|
||||
(* Blank line between non-comment expressions; no blank line
|
||||
after a comment (comment sticks to its expression) *)
|
||||
if not first && not prev_was_comment then Buffer.add_char buf '\n';
|
||||
Buffer.add_string buf (pretty_print_value v);
|
||||
Buffer.add_char buf '\n';
|
||||
emit false false rest
|
||||
in
|
||||
emit true false exprs;
|
||||
Buffer.contents buf
|
||||
|
||||
|
||||
(* Apply an AST-level edit result back to the CST and write the file.
|
||||
@@ -892,10 +851,28 @@ let handle_tool name args =
|
||||
|
||||
| "sx_pretty_print" ->
|
||||
let file = args |> member "file" |> to_string in
|
||||
let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text file In_channel.input_all) in
|
||||
let source = pretty_print_file exprs in
|
||||
let cst = Sx_parser.parse_file_cst file in
|
||||
(* Reformat each node's code while preserving trivia (comments, spacing) *)
|
||||
let reformatted = List.map (fun node ->
|
||||
let trivia = match node with
|
||||
| Sx_cst.CstAtom r -> r.leading_trivia
|
||||
| Sx_cst.CstList r -> r.leading_trivia
|
||||
| Sx_cst.CstDict r -> r.leading_trivia
|
||||
in
|
||||
let ast = Sx_cst.cst_to_ast node in
|
||||
let pp = pretty_print_value ast in
|
||||
let new_cst = Sx_parser.parse_all_cst pp in
|
||||
match new_cst.nodes with
|
||||
| [n] ->
|
||||
(match n with
|
||||
| Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = trivia }
|
||||
| Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = trivia }
|
||||
| Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = trivia })
|
||||
| _ -> node
|
||||
) cst.nodes in
|
||||
let source = Sx_cst.cst_file_to_source reformatted cst.trailing_trivia in
|
||||
Out_channel.with_open_text file (fun oc -> output_string oc source);
|
||||
text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length exprs))
|
||||
text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length cst.nodes))
|
||||
|
||||
| "sx_changed" ->
|
||||
let base_ref = args |> member "ref" |> to_string_option |> Option.value ~default:"main" in
|
||||
@@ -1021,14 +998,24 @@ let handle_tool name args =
|
||||
let all_docs = List.concat_map (fun path ->
|
||||
let rel = relative_path ~base:dir path in
|
||||
try
|
||||
let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text path In_channel.input_all) in
|
||||
(* Walk list tracking preceding comment *)
|
||||
let rec collect prev_comment = function
|
||||
| [] -> []
|
||||
| Comment text :: rest -> collect (Some text) rest
|
||||
| (List (Symbol head :: Symbol name :: params_rest) as _expr) :: rest
|
||||
| (ListRef { contents = Symbol head :: Symbol name :: params_rest } as _expr) :: rest ->
|
||||
let doc = match head with
|
||||
let cst = Sx_parser.parse_file_cst path in
|
||||
List.filter_map (fun node ->
|
||||
(* Extract leading comment trivia from CST node *)
|
||||
let trivia = match node with
|
||||
| Sx_cst.CstAtom r -> r.leading_trivia
|
||||
| Sx_cst.CstList r -> r.leading_trivia
|
||||
| Sx_cst.CstDict r -> r.leading_trivia
|
||||
in
|
||||
let comment_text = List.filter_map (function
|
||||
| Sx_cst.LineComment text -> Some text | _ -> None
|
||||
) trivia in
|
||||
let prev_comment = if comment_text = [] then None
|
||||
else Some (String.concat "" comment_text) in
|
||||
let expr = Sx_cst.cst_to_ast node in
|
||||
match expr with
|
||||
| List (Symbol head :: Symbol name :: params_rest)
|
||||
| ListRef { contents = Symbol head :: Symbol name :: params_rest } ->
|
||||
(match head with
|
||||
| "defcomp" | "defisland" ->
|
||||
let params_str = match params_rest with
|
||||
| List ps :: _ | ListRef { contents = ps } :: _ ->
|
||||
@@ -1058,12 +1045,9 @@ let handle_tool name args =
|
||||
| None -> ""
|
||||
in
|
||||
Some (Printf.sprintf "## %s `%s`\nDefined in: %s\nType: macro\n%s" head name rel comment_str)
|
||||
| _ -> None
|
||||
in
|
||||
(match doc with Some d -> d :: collect None rest | None -> collect None rest)
|
||||
| _ :: rest -> collect None rest
|
||||
in
|
||||
collect None exprs
|
||||
| _ -> None)
|
||||
| _ -> None
|
||||
) cst.nodes
|
||||
with _ -> []
|
||||
) files in
|
||||
if all_docs = [] then text_result "(no components found)"
|
||||
@@ -1557,14 +1541,32 @@ let handle_tool name args =
|
||||
| "sx_write_file" ->
|
||||
let file = args |> member "file" |> to_string in
|
||||
let source = args |> member "source" |> to_string in
|
||||
(* Validate by parsing first — preserve comments *)
|
||||
(* Validate by parsing as CST — preserves comments and formatting *)
|
||||
(try
|
||||
let exprs = Sx_parser.parse_all ~comments:true source in
|
||||
if exprs = [] then error_result "Source parsed to empty — nothing to write"
|
||||
let cst = Sx_parser.parse_all_cst source in
|
||||
if cst.nodes = [] then error_result "Source parsed to empty — nothing to write"
|
||||
else begin
|
||||
let output = pretty_print_file exprs in
|
||||
(* Pretty-print each node but keep trivia *)
|
||||
let reformatted = List.map (fun node ->
|
||||
let trivia = match node with
|
||||
| Sx_cst.CstAtom r -> r.leading_trivia
|
||||
| Sx_cst.CstList r -> r.leading_trivia
|
||||
| Sx_cst.CstDict r -> r.leading_trivia
|
||||
in
|
||||
let ast = Sx_cst.cst_to_ast node in
|
||||
let pp = pretty_print_value ast in
|
||||
let new_cst = Sx_parser.parse_all_cst pp in
|
||||
match new_cst.nodes with
|
||||
| [n] ->
|
||||
(match n with
|
||||
| Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = trivia }
|
||||
| Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = trivia }
|
||||
| Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = trivia })
|
||||
| _ -> node
|
||||
) cst.nodes in
|
||||
let output = Sx_cst.cst_file_to_source reformatted cst.trailing_trivia in
|
||||
Out_channel.with_open_text file (fun oc -> output_string oc output);
|
||||
text_result (Printf.sprintf "OK — wrote %d bytes (%d top-level forms) to %s" (String.length output) (List.length exprs) file)
|
||||
text_result (Printf.sprintf "OK — wrote %d bytes (%d top-level forms) to %s" (String.length output) (List.length cst.nodes) file)
|
||||
end
|
||||
with e -> error_result (Printf.sprintf "Parse error — file not written: %s" (Printexc.to_string e)))
|
||||
|
||||
|
||||
@@ -30,23 +30,6 @@ let skip_whitespace_and_comments s =
|
||||
| _ -> ()
|
||||
in go ()
|
||||
|
||||
(* Skip whitespace only — leaves comments for capture *)
|
||||
let skip_whitespace s =
|
||||
let rec go () =
|
||||
if at_end s then ()
|
||||
else match s.src.[s.pos] with
|
||||
| ' ' | '\t' | '\n' | '\r' -> advance s; go ()
|
||||
| _ -> ()
|
||||
in go ()
|
||||
|
||||
(* Read a comment line starting at ';', returns text after ";;" (or ";") *)
|
||||
let read_comment s =
|
||||
let start = s.pos in
|
||||
while s.pos < s.len && s.src.[s.pos] <> '\n' do advance s done;
|
||||
let text = String.sub s.src start (s.pos - start) in
|
||||
if s.pos < s.len then advance s;
|
||||
text
|
||||
|
||||
(* Character classification — matches spec/parser.sx ident-start/ident-char.
|
||||
ident-start: a-z A-Z _ ~ * + - > < = / ! ? &
|
||||
ident-char: ident-start plus 0-9 . : / # , *)
|
||||
@@ -110,35 +93,7 @@ let try_number str =
|
||||
| Some n -> Some (Number n)
|
||||
| None -> None
|
||||
|
||||
(* Module-level flag — when true, comments are captured as Comment nodes.
|
||||
Set by parse_all ~comments:true, reset after. *)
|
||||
let _preserve_comments = ref false
|
||||
|
||||
(* Collect consecutive comment lines into a Comment node *)
|
||||
let collect_comment_node s =
|
||||
let lines = ref [] in
|
||||
let rec go () =
|
||||
skip_whitespace s;
|
||||
if not (at_end s) && s.src.[s.pos] = ';' then begin
|
||||
lines := read_comment s :: !lines;
|
||||
go ()
|
||||
end
|
||||
in
|
||||
go ();
|
||||
Comment (String.concat "\n" (List.rev !lines))
|
||||
|
||||
let rec read_value s : value =
|
||||
(* In comment-preserving mode, check for comments first *)
|
||||
if !_preserve_comments then begin
|
||||
skip_whitespace s;
|
||||
if not (at_end s) && s.src.[s.pos] = ';' then
|
||||
collect_comment_node s
|
||||
else
|
||||
read_value_core s
|
||||
end else
|
||||
read_value_core s
|
||||
|
||||
and read_value_core s : value =
|
||||
skip_whitespace_and_comments s;
|
||||
if at_end s then begin
|
||||
let line = ref 1 in
|
||||
@@ -212,29 +167,14 @@ and read_list s close_char =
|
||||
advance s; (* skip opening paren/bracket *)
|
||||
let items = ref [] in
|
||||
let rec go () =
|
||||
if !_preserve_comments then begin
|
||||
skip_whitespace s;
|
||||
if at_end s then raise (Parse_error "Unterminated list");
|
||||
if s.src.[s.pos] = close_char then begin
|
||||
advance s;
|
||||
List (List.rev !items)
|
||||
end else if s.src.[s.pos] = ';' then begin
|
||||
items := collect_comment_node s :: !items;
|
||||
go ()
|
||||
end else begin
|
||||
items := read_value_core s :: !items;
|
||||
go ()
|
||||
end
|
||||
skip_whitespace_and_comments s;
|
||||
if at_end s then raise (Parse_error "Unterminated list");
|
||||
if s.src.[s.pos] = close_char then begin
|
||||
advance s;
|
||||
List (List.rev !items)
|
||||
end else begin
|
||||
skip_whitespace_and_comments s;
|
||||
if at_end s then raise (Parse_error "Unterminated list");
|
||||
if s.src.[s.pos] = close_char then begin
|
||||
advance s;
|
||||
List (List.rev !items)
|
||||
end else begin
|
||||
items := read_value s :: !items;
|
||||
go ()
|
||||
end
|
||||
items := read_value s :: !items;
|
||||
go ()
|
||||
end
|
||||
in go ()
|
||||
|
||||
@@ -262,34 +202,26 @@ and read_dict s =
|
||||
in go ()
|
||||
|
||||
|
||||
(** Parse a string into a list of SX values.
|
||||
When [~comments:true], comments are preserved as [Comment] nodes —
|
||||
both at top level and inside lists. Default is [false] (strip). *)
|
||||
let parse_all ?(comments=false) src =
|
||||
_preserve_comments := comments;
|
||||
(** Parse a string into a list of SX values (AST — comments stripped). *)
|
||||
let parse_all src =
|
||||
let s = make_state src in
|
||||
let results = ref [] in
|
||||
let rec go () =
|
||||
if !_preserve_comments then skip_whitespace s
|
||||
else skip_whitespace_and_comments s;
|
||||
if at_end s then (
|
||||
_preserve_comments := false;
|
||||
List.rev !results
|
||||
) else begin
|
||||
skip_whitespace_and_comments s;
|
||||
if at_end s then List.rev !results
|
||||
else begin
|
||||
results := read_value s :: !results;
|
||||
go ()
|
||||
end
|
||||
in
|
||||
try go ()
|
||||
with e -> _preserve_comments := false; raise e
|
||||
in go ()
|
||||
|
||||
(** Parse a file into a list of SX values. *)
|
||||
let parse_file ?(comments=false) path =
|
||||
(** Parse a file into a list of SX values (AST — comments stripped). *)
|
||||
let parse_file path =
|
||||
let ic = open_in path in
|
||||
let n = in_channel_length ic in
|
||||
let src = really_input_string ic n in
|
||||
close_in ic;
|
||||
parse_all ~comments src
|
||||
parse_all src
|
||||
|
||||
|
||||
(* ================================================================== *)
|
||||
|
||||
@@ -67,7 +67,6 @@ and value =
|
||||
| CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *)
|
||||
| CekFrame of cek_frame (** Optimized CEK continuation frame. *)
|
||||
| VmClosure of vm_closure (** VM-compiled closure — callable within the VM without allocating a new VM. *)
|
||||
| Comment of string (** Source comment — preserved through parse/edit/pretty-print round-trips. *)
|
||||
|
||||
(** CEK machine state — record instead of Dict for performance.
|
||||
5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *)
|
||||
@@ -348,7 +347,6 @@ let type_of = function
|
||||
| CekState _ -> "dict" (* CEK state behaves as a dict for type checks *)
|
||||
| CekFrame _ -> "dict"
|
||||
| VmClosure _ -> "function"
|
||||
| Comment _ -> "comment"
|
||||
|
||||
let is_nil = function Nil -> true | _ -> false
|
||||
let is_lambda = function Lambda _ -> true | _ -> false
|
||||
@@ -543,4 +541,3 @@ let rec inspect = function
|
||||
| CekState _ -> "<cek-state>"
|
||||
| CekFrame f -> Printf.sprintf "<frame:%s>" f.cf_type
|
||||
| VmClosure cl -> Printf.sprintf "<vm:%s>" (match cl.vm_name with Some n -> n | None -> "anon")
|
||||
| Comment text -> ";;" ^ text
|
||||
|
||||
Reference in New Issue
Block a user