Remove Comment variant and old comment-mode parser — CST handles all
Delete from sx_types.ml: - Comment of string variant (no longer needed) Delete from sx_parser.ml: - _preserve_comments mutable ref - collect_comment_node function - comment-mode branches in read_value, read_list - ~comments parameter from parse_all and parse_file - skip_whitespace and read_comment (only used by old comment mode) Delete from mcp_tree.ml: - has_interior_comments function - Comment handling in pretty_print_value - pretty_print_file function (replaced by CST write-back) - ~comments parameter from local parse_file Migrate sx_pretty_print, sx_write_file, sx_doc_gen to CST path. Net: -69 lines. 24/24 CST round-trips, 2583/2583 evaluator tests. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -308,9 +308,9 @@ let call_sx fn_name args =
|
|||||||
let fn = env_get e fn_name in
|
let fn = env_get e fn_name in
|
||||||
Sx_ref.cek_call fn (List args)
|
Sx_ref.cek_call fn (List args)
|
||||||
|
|
||||||
let parse_file ?(comments=false) path =
|
let parse_file path =
|
||||||
let src = In_channel.with_open_text path In_channel.input_all in
|
let src = In_channel.with_open_text path In_channel.input_all in
|
||||||
let exprs = Sx_parser.parse_all ~comments src in
|
let exprs = Sx_parser.parse_all src in
|
||||||
List exprs
|
List exprs
|
||||||
|
|
||||||
(* CST-based round-tripping — replaces comment_map machinery.
|
(* CST-based round-tripping — replaces comment_map machinery.
|
||||||
@@ -461,23 +461,12 @@ let rec est_width = function
|
|||||||
2 + List.fold_left (fun acc x -> acc + est_width x + 1) 0 items
|
2 + List.fold_left (fun acc x -> acc + est_width x + 1) 0 items
|
||||||
| _ -> 10
|
| _ -> 10
|
||||||
|
|
||||||
let rec has_interior_comments = function
|
|
||||||
| List items | ListRef { contents = items } ->
|
|
||||||
List.exists (fun item -> match item with
|
|
||||||
| Comment _ -> true
|
|
||||||
| List _ | ListRef _ -> has_interior_comments item
|
|
||||||
| _ -> false
|
|
||||||
) items
|
|
||||||
| _ -> false
|
|
||||||
|
|
||||||
let pretty_print_value ?(max_width=80) v =
|
let pretty_print_value ?(max_width=80) v =
|
||||||
let buf = Buffer.create 4096 in
|
let buf = Buffer.create 4096 in
|
||||||
let rec pp indent v =
|
let rec pp indent v =
|
||||||
match v with
|
match v with
|
||||||
| Comment text ->
|
|
||||||
Buffer.add_string buf text
|
|
||||||
| List items | ListRef { contents = items } when items <> [] ->
|
| List items | ListRef { contents = items } when items <> [] ->
|
||||||
if est_width v <= max_width - indent && not (has_interior_comments v) then
|
if est_width v <= max_width - indent then
|
||||||
(* Fits on one line and has no comments *)
|
(* Fits on one line and has no comments *)
|
||||||
Buffer.add_string buf (pp_atom v)
|
Buffer.add_string buf (pp_atom v)
|
||||||
else begin
|
else begin
|
||||||
@@ -489,12 +478,6 @@ let pretty_print_value ?(max_width=80) v =
|
|||||||
let rest = List.tl items in
|
let rest = List.tl items in
|
||||||
let rec emit = function
|
let rec emit = function
|
||||||
| [] -> ()
|
| [] -> ()
|
||||||
| Comment text :: rest ->
|
|
||||||
(* Interior comment: indented, on its own line *)
|
|
||||||
Buffer.add_char buf '\n';
|
|
||||||
Buffer.add_string buf (String.make child_indent ' ');
|
|
||||||
Buffer.add_string buf text;
|
|
||||||
emit rest
|
|
||||||
| Keyword k :: v :: rest ->
|
| Keyword k :: v :: rest ->
|
||||||
Buffer.add_char buf '\n';
|
Buffer.add_char buf '\n';
|
||||||
Buffer.add_string buf (String.make child_indent ' ');
|
Buffer.add_string buf (String.make child_indent ' ');
|
||||||
@@ -517,30 +500,6 @@ let pretty_print_value ?(max_width=80) v =
|
|||||||
pp 0 v;
|
pp 0 v;
|
||||||
Buffer.contents buf
|
Buffer.contents buf
|
||||||
|
|
||||||
let pretty_print_file exprs =
|
|
||||||
(* Comments attach to the following expression — no blank line between
|
|
||||||
comment and expression. Non-comment expressions get one blank line
|
|
||||||
between them (matching the old String.concat "\n\n" behaviour). *)
|
|
||||||
let buf = Buffer.create 4096 in
|
|
||||||
let rec emit first prev_was_comment = function
|
|
||||||
| [] -> ()
|
|
||||||
| Comment text :: rest ->
|
|
||||||
(* Blank line before comment block, unless it's the first item
|
|
||||||
or follows another comment *)
|
|
||||||
if not first && not prev_was_comment then Buffer.add_char buf '\n';
|
|
||||||
Buffer.add_string buf text;
|
|
||||||
Buffer.add_char buf '\n';
|
|
||||||
emit false true rest
|
|
||||||
| v :: rest ->
|
|
||||||
(* Blank line between non-comment expressions; no blank line
|
|
||||||
after a comment (comment sticks to its expression) *)
|
|
||||||
if not first && not prev_was_comment then Buffer.add_char buf '\n';
|
|
||||||
Buffer.add_string buf (pretty_print_value v);
|
|
||||||
Buffer.add_char buf '\n';
|
|
||||||
emit false false rest
|
|
||||||
in
|
|
||||||
emit true false exprs;
|
|
||||||
Buffer.contents buf
|
|
||||||
|
|
||||||
|
|
||||||
(* Apply an AST-level edit result back to the CST and write the file.
|
(* Apply an AST-level edit result back to the CST and write the file.
|
||||||
@@ -892,10 +851,28 @@ let handle_tool name args =
|
|||||||
|
|
||||||
| "sx_pretty_print" ->
|
| "sx_pretty_print" ->
|
||||||
let file = args |> member "file" |> to_string in
|
let file = args |> member "file" |> to_string in
|
||||||
let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text file In_channel.input_all) in
|
let cst = Sx_parser.parse_file_cst file in
|
||||||
let source = pretty_print_file exprs in
|
(* Reformat each node's code while preserving trivia (comments, spacing) *)
|
||||||
|
let reformatted = List.map (fun node ->
|
||||||
|
let trivia = match node with
|
||||||
|
| Sx_cst.CstAtom r -> r.leading_trivia
|
||||||
|
| Sx_cst.CstList r -> r.leading_trivia
|
||||||
|
| Sx_cst.CstDict r -> r.leading_trivia
|
||||||
|
in
|
||||||
|
let ast = Sx_cst.cst_to_ast node in
|
||||||
|
let pp = pretty_print_value ast in
|
||||||
|
let new_cst = Sx_parser.parse_all_cst pp in
|
||||||
|
match new_cst.nodes with
|
||||||
|
| [n] ->
|
||||||
|
(match n with
|
||||||
|
| Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = trivia }
|
||||||
|
| Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = trivia }
|
||||||
|
| Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = trivia })
|
||||||
|
| _ -> node
|
||||||
|
) cst.nodes in
|
||||||
|
let source = Sx_cst.cst_file_to_source reformatted cst.trailing_trivia in
|
||||||
Out_channel.with_open_text file (fun oc -> output_string oc source);
|
Out_channel.with_open_text file (fun oc -> output_string oc source);
|
||||||
text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length exprs))
|
text_result (Printf.sprintf "OK — reformatted %s (%d bytes, %d forms)" file (String.length source) (List.length cst.nodes))
|
||||||
|
|
||||||
| "sx_changed" ->
|
| "sx_changed" ->
|
||||||
let base_ref = args |> member "ref" |> to_string_option |> Option.value ~default:"main" in
|
let base_ref = args |> member "ref" |> to_string_option |> Option.value ~default:"main" in
|
||||||
@@ -1021,14 +998,24 @@ let handle_tool name args =
|
|||||||
let all_docs = List.concat_map (fun path ->
|
let all_docs = List.concat_map (fun path ->
|
||||||
let rel = relative_path ~base:dir path in
|
let rel = relative_path ~base:dir path in
|
||||||
try
|
try
|
||||||
let exprs = Sx_parser.parse_all ~comments:true (In_channel.with_open_text path In_channel.input_all) in
|
let cst = Sx_parser.parse_file_cst path in
|
||||||
(* Walk list tracking preceding comment *)
|
List.filter_map (fun node ->
|
||||||
let rec collect prev_comment = function
|
(* Extract leading comment trivia from CST node *)
|
||||||
| [] -> []
|
let trivia = match node with
|
||||||
| Comment text :: rest -> collect (Some text) rest
|
| Sx_cst.CstAtom r -> r.leading_trivia
|
||||||
| (List (Symbol head :: Symbol name :: params_rest) as _expr) :: rest
|
| Sx_cst.CstList r -> r.leading_trivia
|
||||||
| (ListRef { contents = Symbol head :: Symbol name :: params_rest } as _expr) :: rest ->
|
| Sx_cst.CstDict r -> r.leading_trivia
|
||||||
let doc = match head with
|
in
|
||||||
|
let comment_text = List.filter_map (function
|
||||||
|
| Sx_cst.LineComment text -> Some text | _ -> None
|
||||||
|
) trivia in
|
||||||
|
let prev_comment = if comment_text = [] then None
|
||||||
|
else Some (String.concat "" comment_text) in
|
||||||
|
let expr = Sx_cst.cst_to_ast node in
|
||||||
|
match expr with
|
||||||
|
| List (Symbol head :: Symbol name :: params_rest)
|
||||||
|
| ListRef { contents = Symbol head :: Symbol name :: params_rest } ->
|
||||||
|
(match head with
|
||||||
| "defcomp" | "defisland" ->
|
| "defcomp" | "defisland" ->
|
||||||
let params_str = match params_rest with
|
let params_str = match params_rest with
|
||||||
| List ps :: _ | ListRef { contents = ps } :: _ ->
|
| List ps :: _ | ListRef { contents = ps } :: _ ->
|
||||||
@@ -1058,12 +1045,9 @@ let handle_tool name args =
|
|||||||
| None -> ""
|
| None -> ""
|
||||||
in
|
in
|
||||||
Some (Printf.sprintf "## %s `%s`\nDefined in: %s\nType: macro\n%s" head name rel comment_str)
|
Some (Printf.sprintf "## %s `%s`\nDefined in: %s\nType: macro\n%s" head name rel comment_str)
|
||||||
| _ -> None
|
| _ -> None)
|
||||||
in
|
| _ -> None
|
||||||
(match doc with Some d -> d :: collect None rest | None -> collect None rest)
|
) cst.nodes
|
||||||
| _ :: rest -> collect None rest
|
|
||||||
in
|
|
||||||
collect None exprs
|
|
||||||
with _ -> []
|
with _ -> []
|
||||||
) files in
|
) files in
|
||||||
if all_docs = [] then text_result "(no components found)"
|
if all_docs = [] then text_result "(no components found)"
|
||||||
@@ -1557,14 +1541,32 @@ let handle_tool name args =
|
|||||||
| "sx_write_file" ->
|
| "sx_write_file" ->
|
||||||
let file = args |> member "file" |> to_string in
|
let file = args |> member "file" |> to_string in
|
||||||
let source = args |> member "source" |> to_string in
|
let source = args |> member "source" |> to_string in
|
||||||
(* Validate by parsing first — preserve comments *)
|
(* Validate by parsing as CST — preserves comments and formatting *)
|
||||||
(try
|
(try
|
||||||
let exprs = Sx_parser.parse_all ~comments:true source in
|
let cst = Sx_parser.parse_all_cst source in
|
||||||
if exprs = [] then error_result "Source parsed to empty — nothing to write"
|
if cst.nodes = [] then error_result "Source parsed to empty — nothing to write"
|
||||||
else begin
|
else begin
|
||||||
let output = pretty_print_file exprs in
|
(* Pretty-print each node but keep trivia *)
|
||||||
|
let reformatted = List.map (fun node ->
|
||||||
|
let trivia = match node with
|
||||||
|
| Sx_cst.CstAtom r -> r.leading_trivia
|
||||||
|
| Sx_cst.CstList r -> r.leading_trivia
|
||||||
|
| Sx_cst.CstDict r -> r.leading_trivia
|
||||||
|
in
|
||||||
|
let ast = Sx_cst.cst_to_ast node in
|
||||||
|
let pp = pretty_print_value ast in
|
||||||
|
let new_cst = Sx_parser.parse_all_cst pp in
|
||||||
|
match new_cst.nodes with
|
||||||
|
| [n] ->
|
||||||
|
(match n with
|
||||||
|
| Sx_cst.CstAtom r -> Sx_cst.CstAtom { r with leading_trivia = trivia }
|
||||||
|
| Sx_cst.CstList r -> Sx_cst.CstList { r with leading_trivia = trivia }
|
||||||
|
| Sx_cst.CstDict r -> Sx_cst.CstDict { r with leading_trivia = trivia })
|
||||||
|
| _ -> node
|
||||||
|
) cst.nodes in
|
||||||
|
let output = Sx_cst.cst_file_to_source reformatted cst.trailing_trivia in
|
||||||
Out_channel.with_open_text file (fun oc -> output_string oc output);
|
Out_channel.with_open_text file (fun oc -> output_string oc output);
|
||||||
text_result (Printf.sprintf "OK — wrote %d bytes (%d top-level forms) to %s" (String.length output) (List.length exprs) file)
|
text_result (Printf.sprintf "OK — wrote %d bytes (%d top-level forms) to %s" (String.length output) (List.length cst.nodes) file)
|
||||||
end
|
end
|
||||||
with e -> error_result (Printf.sprintf "Parse error — file not written: %s" (Printexc.to_string e)))
|
with e -> error_result (Printf.sprintf "Parse error — file not written: %s" (Printexc.to_string e)))
|
||||||
|
|
||||||
|
|||||||
@@ -30,23 +30,6 @@ let skip_whitespace_and_comments s =
|
|||||||
| _ -> ()
|
| _ -> ()
|
||||||
in go ()
|
in go ()
|
||||||
|
|
||||||
(* Skip whitespace only — leaves comments for capture *)
|
|
||||||
let skip_whitespace s =
|
|
||||||
let rec go () =
|
|
||||||
if at_end s then ()
|
|
||||||
else match s.src.[s.pos] with
|
|
||||||
| ' ' | '\t' | '\n' | '\r' -> advance s; go ()
|
|
||||||
| _ -> ()
|
|
||||||
in go ()
|
|
||||||
|
|
||||||
(* Read a comment line starting at ';', returns text after ";;" (or ";") *)
|
|
||||||
let read_comment s =
|
|
||||||
let start = s.pos in
|
|
||||||
while s.pos < s.len && s.src.[s.pos] <> '\n' do advance s done;
|
|
||||||
let text = String.sub s.src start (s.pos - start) in
|
|
||||||
if s.pos < s.len then advance s;
|
|
||||||
text
|
|
||||||
|
|
||||||
(* Character classification — matches spec/parser.sx ident-start/ident-char.
|
(* Character classification — matches spec/parser.sx ident-start/ident-char.
|
||||||
ident-start: a-z A-Z _ ~ * + - > < = / ! ? &
|
ident-start: a-z A-Z _ ~ * + - > < = / ! ? &
|
||||||
ident-char: ident-start plus 0-9 . : / # , *)
|
ident-char: ident-start plus 0-9 . : / # , *)
|
||||||
@@ -110,35 +93,7 @@ let try_number str =
|
|||||||
| Some n -> Some (Number n)
|
| Some n -> Some (Number n)
|
||||||
| None -> None
|
| None -> None
|
||||||
|
|
||||||
(* Module-level flag — when true, comments are captured as Comment nodes.
|
|
||||||
Set by parse_all ~comments:true, reset after. *)
|
|
||||||
let _preserve_comments = ref false
|
|
||||||
|
|
||||||
(* Collect consecutive comment lines into a Comment node *)
|
|
||||||
let collect_comment_node s =
|
|
||||||
let lines = ref [] in
|
|
||||||
let rec go () =
|
|
||||||
skip_whitespace s;
|
|
||||||
if not (at_end s) && s.src.[s.pos] = ';' then begin
|
|
||||||
lines := read_comment s :: !lines;
|
|
||||||
go ()
|
|
||||||
end
|
|
||||||
in
|
|
||||||
go ();
|
|
||||||
Comment (String.concat "\n" (List.rev !lines))
|
|
||||||
|
|
||||||
let rec read_value s : value =
|
let rec read_value s : value =
|
||||||
(* In comment-preserving mode, check for comments first *)
|
|
||||||
if !_preserve_comments then begin
|
|
||||||
skip_whitespace s;
|
|
||||||
if not (at_end s) && s.src.[s.pos] = ';' then
|
|
||||||
collect_comment_node s
|
|
||||||
else
|
|
||||||
read_value_core s
|
|
||||||
end else
|
|
||||||
read_value_core s
|
|
||||||
|
|
||||||
and read_value_core s : value =
|
|
||||||
skip_whitespace_and_comments s;
|
skip_whitespace_and_comments s;
|
||||||
if at_end s then begin
|
if at_end s then begin
|
||||||
let line = ref 1 in
|
let line = ref 1 in
|
||||||
@@ -212,29 +167,14 @@ and read_list s close_char =
|
|||||||
advance s; (* skip opening paren/bracket *)
|
advance s; (* skip opening paren/bracket *)
|
||||||
let items = ref [] in
|
let items = ref [] in
|
||||||
let rec go () =
|
let rec go () =
|
||||||
if !_preserve_comments then begin
|
skip_whitespace_and_comments s;
|
||||||
skip_whitespace s;
|
if at_end s then raise (Parse_error "Unterminated list");
|
||||||
if at_end s then raise (Parse_error "Unterminated list");
|
if s.src.[s.pos] = close_char then begin
|
||||||
if s.src.[s.pos] = close_char then begin
|
advance s;
|
||||||
advance s;
|
List (List.rev !items)
|
||||||
List (List.rev !items)
|
|
||||||
end else if s.src.[s.pos] = ';' then begin
|
|
||||||
items := collect_comment_node s :: !items;
|
|
||||||
go ()
|
|
||||||
end else begin
|
|
||||||
items := read_value_core s :: !items;
|
|
||||||
go ()
|
|
||||||
end
|
|
||||||
end else begin
|
end else begin
|
||||||
skip_whitespace_and_comments s;
|
items := read_value s :: !items;
|
||||||
if at_end s then raise (Parse_error "Unterminated list");
|
go ()
|
||||||
if s.src.[s.pos] = close_char then begin
|
|
||||||
advance s;
|
|
||||||
List (List.rev !items)
|
|
||||||
end else begin
|
|
||||||
items := read_value s :: !items;
|
|
||||||
go ()
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
in go ()
|
in go ()
|
||||||
|
|
||||||
@@ -262,34 +202,26 @@ and read_dict s =
|
|||||||
in go ()
|
in go ()
|
||||||
|
|
||||||
|
|
||||||
(** Parse a string into a list of SX values.
|
(** Parse a string into a list of SX values (AST — comments stripped). *)
|
||||||
When [~comments:true], comments are preserved as [Comment] nodes —
|
let parse_all src =
|
||||||
both at top level and inside lists. Default is [false] (strip). *)
|
|
||||||
let parse_all ?(comments=false) src =
|
|
||||||
_preserve_comments := comments;
|
|
||||||
let s = make_state src in
|
let s = make_state src in
|
||||||
let results = ref [] in
|
let results = ref [] in
|
||||||
let rec go () =
|
let rec go () =
|
||||||
if !_preserve_comments then skip_whitespace s
|
skip_whitespace_and_comments s;
|
||||||
else skip_whitespace_and_comments s;
|
if at_end s then List.rev !results
|
||||||
if at_end s then (
|
else begin
|
||||||
_preserve_comments := false;
|
|
||||||
List.rev !results
|
|
||||||
) else begin
|
|
||||||
results := read_value s :: !results;
|
results := read_value s :: !results;
|
||||||
go ()
|
go ()
|
||||||
end
|
end
|
||||||
in
|
in go ()
|
||||||
try go ()
|
|
||||||
with e -> _preserve_comments := false; raise e
|
|
||||||
|
|
||||||
(** Parse a file into a list of SX values. *)
|
(** Parse a file into a list of SX values (AST — comments stripped). *)
|
||||||
let parse_file ?(comments=false) path =
|
let parse_file path =
|
||||||
let ic = open_in path in
|
let ic = open_in path in
|
||||||
let n = in_channel_length ic in
|
let n = in_channel_length ic in
|
||||||
let src = really_input_string ic n in
|
let src = really_input_string ic n in
|
||||||
close_in ic;
|
close_in ic;
|
||||||
parse_all ~comments src
|
parse_all src
|
||||||
|
|
||||||
|
|
||||||
(* ================================================================== *)
|
(* ================================================================== *)
|
||||||
|
|||||||
@@ -67,7 +67,6 @@ and value =
|
|||||||
| CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *)
|
| CekState of cek_state (** Optimized CEK machine state — avoids Dict allocation. *)
|
||||||
| CekFrame of cek_frame (** Optimized CEK continuation frame. *)
|
| CekFrame of cek_frame (** Optimized CEK continuation frame. *)
|
||||||
| VmClosure of vm_closure (** VM-compiled closure — callable within the VM without allocating a new VM. *)
|
| VmClosure of vm_closure (** VM-compiled closure — callable within the VM without allocating a new VM. *)
|
||||||
| Comment of string (** Source comment — preserved through parse/edit/pretty-print round-trips. *)
|
|
||||||
|
|
||||||
(** CEK machine state — record instead of Dict for performance.
|
(** CEK machine state — record instead of Dict for performance.
|
||||||
5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *)
|
5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *)
|
||||||
@@ -348,7 +347,6 @@ let type_of = function
|
|||||||
| CekState _ -> "dict" (* CEK state behaves as a dict for type checks *)
|
| CekState _ -> "dict" (* CEK state behaves as a dict for type checks *)
|
||||||
| CekFrame _ -> "dict"
|
| CekFrame _ -> "dict"
|
||||||
| VmClosure _ -> "function"
|
| VmClosure _ -> "function"
|
||||||
| Comment _ -> "comment"
|
|
||||||
|
|
||||||
let is_nil = function Nil -> true | _ -> false
|
let is_nil = function Nil -> true | _ -> false
|
||||||
let is_lambda = function Lambda _ -> true | _ -> false
|
let is_lambda = function Lambda _ -> true | _ -> false
|
||||||
@@ -543,4 +541,3 @@ let rec inspect = function
|
|||||||
| CekState _ -> "<cek-state>"
|
| CekState _ -> "<cek-state>"
|
||||||
| CekFrame f -> Printf.sprintf "<frame:%s>" f.cf_type
|
| CekFrame f -> Printf.sprintf "<frame:%s>" f.cf_type
|
||||||
| VmClosure cl -> Printf.sprintf "<vm:%s>" (match cl.vm_name with Some n -> n | None -> "anon")
|
| VmClosure cl -> Printf.sprintf "<vm:%s>" (match cl.vm_name with Some n -> n | None -> "anon")
|
||||||
| Comment text -> ";;" ^ text
|
|
||||||
|
|||||||
Reference in New Issue
Block a user