(** Concrete Syntax Tree for SX — lossless source representation. Every piece of source text is preserved: whitespace, comments, delimiters, raw token text. The CST supports two projections: - [cst_to_source]: reconstruct the exact original source - [cst_to_ast]: strip trivia, produce [Sx_types.value] for evaluation Trivia attaches to nodes (leading on every node, trailing on containers before the close delimiter). No separate comment map. *) open Sx_types (** {1 Types} *) type trivia = | Whitespace of string (** Runs of spaces, tabs, newlines *) | LineComment of string (** ";;" through end of line, including the ";" chars *) type span = { start_offset : int; end_offset : int; } type cst_node = | CstAtom of { leading_trivia : trivia list; token : string; (** Raw source text of the token *) value : value; (** Parsed semantic value *) span : span; } | CstList of { leading_trivia : trivia list; open_delim : char; (** '(' or '[' *) children : cst_node list; close_delim : char; (** ')' or ']' *) trailing_trivia : trivia list; (** Trivia between last child and close delim *) span : span; } | CstDict of { leading_trivia : trivia list; children : cst_node list; (** Alternating key/value atoms *) trailing_trivia : trivia list; span : span; } (** {1 CST → Source (lossless reconstruction)} *) let trivia_to_string ts = let buf = Buffer.create 64 in List.iter (function | Whitespace s -> Buffer.add_string buf s | LineComment s -> Buffer.add_string buf s ) ts; Buffer.contents buf let rec cst_to_source node = match node with | CstAtom { leading_trivia; token; _ } -> trivia_to_string leading_trivia ^ token | CstList { leading_trivia; open_delim; children; close_delim; trailing_trivia; _ } -> let buf = Buffer.create 256 in Buffer.add_string buf (trivia_to_string leading_trivia); Buffer.add_char buf open_delim; List.iter (fun c -> Buffer.add_string buf (cst_to_source c)) children; Buffer.add_string buf (trivia_to_string trailing_trivia); Buffer.add_char buf close_delim; Buffer.contents buf | CstDict { leading_trivia; children; trailing_trivia; _ } -> let buf = Buffer.create 256 in Buffer.add_string buf (trivia_to_string leading_trivia); Buffer.add_char buf '{'; List.iter (fun c -> Buffer.add_string buf (cst_to_source c)) children; Buffer.add_string buf (trivia_to_string trailing_trivia); Buffer.add_char buf '}'; Buffer.contents buf let cst_to_source_file nodes = String.concat "" (List.map cst_to_source nodes) (** Reconstruct source from a parsed file (nodes + trailing trivia). *) let cst_file_to_source nodes trailing = cst_to_source_file nodes ^ trivia_to_string trailing (** {1 CST → AST (strip trivia for evaluation)} *) let rec cst_to_ast = function | CstAtom { value; _ } -> value | CstList { children; _ } -> List (List.map cst_to_ast children) | CstDict { children; _ } -> let d = make_dict () in let rec pairs = function | k :: v :: rest -> let key_str = match cst_to_ast k with | Keyword k -> k | String k -> k | Symbol k -> k | _ -> "" in dict_set d key_str (cst_to_ast v); pairs rest | _ -> () in pairs children; Dict d (** {1 CST editing — apply AST-level edits back to the CST} *) (** Replace the CST node at [path] with [new_source], preserving the original node's leading trivia. [new_source] is parsed as CST so any comments in it are preserved. *) let apply_edit path new_cst_nodes original_cst_nodes = let rec go nodes idx_path = match idx_path with | [] -> nodes (* shouldn't happen *) | [target] -> List.mapi (fun i node -> if i = target then match new_cst_nodes with | [replacement] -> (* Preserve original leading trivia *) let orig_trivia = match node with | CstAtom { leading_trivia; _ } -> leading_trivia | CstList { leading_trivia; _ } -> leading_trivia | CstDict { leading_trivia; _ } -> leading_trivia in (match replacement with | CstAtom r -> CstAtom { r with leading_trivia = orig_trivia } | CstList r -> CstList { r with leading_trivia = orig_trivia } | CstDict r -> CstDict { r with leading_trivia = orig_trivia }) | _ -> node (* multi-node replacement: use as-is *) else node ) nodes | target :: rest -> List.mapi (fun i node -> if i = target then match node with | CstList r -> CstList { r with children = go r.children rest } | CstDict r -> CstDict { r with children = go r.children rest } | _ -> node else node ) nodes in go original_cst_nodes path