Files
rose-ash/hosts/ocaml/lib/sx_cst.ml
giles 99c5c44cc1 Step 14: source locations — pos-to-loc, error-loc, sx-parse-loc — 15 tests
Pure SX layer: pos-to-loc (offset→line/col), error-loc (parse result→loc),
format-parse-error (human-readable error with source context line).
OCaml platform: cst_to_ast_loc (CST spans→loc dicts), sx-parse-loc
primitive (parse with locations), source-loc accessor.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-11 08:03:45 +00:00

174 lines
5.7 KiB
OCaml

(** Concrete Syntax Tree for SX — lossless source representation.
Every piece of source text is preserved: whitespace, comments,
delimiters, raw token text. The CST supports two projections:
- [cst_to_source]: reconstruct the exact original source
- [cst_to_ast]: strip trivia, produce [Sx_types.value] for evaluation
Trivia attaches to nodes (leading on every node, trailing on
containers before the close delimiter). No separate comment map. *)
open Sx_types
(** {1 Types} *)
type trivia =
| Whitespace of string (** Runs of spaces, tabs, newlines *)
| LineComment of string (** ";;" through end of line, including the ";" chars *)
type span = {
start_offset : int;
end_offset : int;
}
type cst_node =
| CstAtom of {
leading_trivia : trivia list;
token : string; (** Raw source text of the token *)
value : value; (** Parsed semantic value *)
span : span;
}
| CstList of {
leading_trivia : trivia list;
open_delim : char; (** '(' or '[' *)
children : cst_node list;
close_delim : char; (** ')' or ']' *)
trailing_trivia : trivia list; (** Trivia between last child and close delim *)
span : span;
}
| CstDict of {
leading_trivia : trivia list;
children : cst_node list; (** Alternating key/value atoms *)
trailing_trivia : trivia list;
span : span;
}
(** {1 CST → Source (lossless reconstruction)} *)
let trivia_to_string ts =
let buf = Buffer.create 64 in
List.iter (function
| Whitespace s -> Buffer.add_string buf s
| LineComment s -> Buffer.add_string buf s
) ts;
Buffer.contents buf
let rec cst_to_source node =
match node with
| CstAtom { leading_trivia; token; _ } ->
trivia_to_string leading_trivia ^ token
| CstList { leading_trivia; open_delim; children; close_delim; trailing_trivia; _ } ->
let buf = Buffer.create 256 in
Buffer.add_string buf (trivia_to_string leading_trivia);
Buffer.add_char buf open_delim;
List.iter (fun c -> Buffer.add_string buf (cst_to_source c)) children;
Buffer.add_string buf (trivia_to_string trailing_trivia);
Buffer.add_char buf close_delim;
Buffer.contents buf
| CstDict { leading_trivia; children; trailing_trivia; _ } ->
let buf = Buffer.create 256 in
Buffer.add_string buf (trivia_to_string leading_trivia);
Buffer.add_char buf '{';
List.iter (fun c -> Buffer.add_string buf (cst_to_source c)) children;
Buffer.add_string buf (trivia_to_string trailing_trivia);
Buffer.add_char buf '}';
Buffer.contents buf
let cst_to_source_file nodes =
String.concat "" (List.map cst_to_source nodes)
(** Reconstruct source from a parsed file (nodes + trailing trivia). *)
let cst_file_to_source nodes trailing =
cst_to_source_file nodes ^ trivia_to_string trailing
(** {1 CST → AST (strip trivia for evaluation)} *)
let rec cst_to_ast = function
| CstAtom { value; _ } -> value
| CstList { children; _ } ->
List (List.map cst_to_ast children)
| CstDict { children; _ } ->
let d = make_dict () in
let rec pairs = function
| k :: v :: rest ->
let key_str = match cst_to_ast k with
| Keyword k -> k | String k -> k | Symbol k -> k | _ -> ""
in
dict_set d key_str (cst_to_ast v);
pairs rest
| _ -> ()
in
pairs children;
Dict d
(** Convert character offset to line/col (1-based lines, 0-based cols) *)
let offset_to_loc src offset =
let line = ref 1 and col = ref 0 in
for i = 0 to min (offset - 1) (String.length src - 1) do
if src.[i] = '\n' then (incr line; col := 0)
else col := !col + 1
done;
(!line, !col)
(** CST → AST with source location dicts ({:form value :line N :col N}) *)
let cst_to_ast_loc src nodes =
List.map (fun node ->
let span = match node with
| CstAtom { span; _ } -> span
| CstList { span; _ } -> span
| CstDict { span; _ } -> span
in
let value = cst_to_ast node in
let (line, col) = offset_to_loc src span.start_offset in
let d = make_dict () in
dict_set d "form" value;
dict_set d "line" (Number (float_of_int line));
dict_set d "col" (Number (float_of_int col));
Dict d
) nodes
(** {1 CST editing — apply AST-level edits back to the CST} *)
(** Replace the CST node at [path] with [new_source], preserving the
original node's leading trivia. [new_source] is parsed as CST so
any comments in it are preserved. *)
let apply_edit path new_cst_nodes original_cst_nodes =
let rec go nodes idx_path =
match idx_path with
| [] -> nodes (* shouldn't happen *)
| [target] ->
List.mapi (fun i node ->
if i = target then
match new_cst_nodes with
| [replacement] ->
(* Preserve original leading trivia *)
let orig_trivia = match node with
| CstAtom { leading_trivia; _ } -> leading_trivia
| CstList { leading_trivia; _ } -> leading_trivia
| CstDict { leading_trivia; _ } -> leading_trivia
in
(match replacement with
| CstAtom r -> CstAtom { r with leading_trivia = orig_trivia }
| CstList r -> CstList { r with leading_trivia = orig_trivia }
| CstDict r -> CstDict { r with leading_trivia = orig_trivia })
| _ -> node (* multi-node replacement: use as-is *)
else node
) nodes
| target :: rest ->
List.mapi (fun i node ->
if i = target then
match node with
| CstList r ->
CstList { r with children = go r.children rest }
| CstDict r ->
CstDict { r with children = go r.children rest }
| _ -> node
else node
) nodes
in
go original_cst_nodes path