CST parser: lossless concrete syntax tree for .sx files
New sx_cst.ml: CstAtom, CstList, CstDict node types with leading/trailing trivia (whitespace + comments). Two projections: - cst_to_source/cst_file_to_source: exact source reconstruction - cst_to_ast: strip trivia → Sx_types.value for evaluation New parse_all_cst/parse_file_cst in sx_parser.ml: parallel CST parser alongside existing AST parser. Reuses read_string, read_symbol, try_number. Trivia collected via collect_trivia (replaces skip_whitespace_and_comments). Round-trip invariant: cst_file_to_source(parse_all_cst(src)) = src Verified on 13 synthetic tests + 7 real codebase files (101KB evaluator, parser, primitives, render, tree-tools, engine, io). CST→AST equivalence: cst_to_ast matches parse_all output. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -5,3 +5,7 @@
|
||||
(executable
|
||||
(name mcp_tree)
|
||||
(libraries sx unix yojson str))
|
||||
|
||||
(executable
|
||||
(name test_cst)
|
||||
(libraries sx))
|
||||
|
||||
91
hosts/ocaml/bin/test_cst.ml
Normal file
91
hosts/ocaml/bin/test_cst.ml
Normal file
@@ -0,0 +1,91 @@
|
||||
let () =
|
||||
let test_sources = [
|
||||
"(define foo 42)";
|
||||
";; comment\n(define bar 1)\n\n;; another\n(define baz 2)\n";
|
||||
"(define my-fn\n (fn (x)\n ;; check nil\n (if (nil? x) 0 x)))";
|
||||
"(list 1 2 3)";
|
||||
"{:key \"value\" :num 42}";
|
||||
"'(a b c)";
|
||||
"`(a ,b ,@c)";
|
||||
"(define x \"hello\\nworld\")";
|
||||
";; top\n;; multi-line\n(define a 1)\n";
|
||||
"";
|
||||
" \n ";
|
||||
"(a)\n(b)\n(c)";
|
||||
"(a ;; inline\n b)";
|
||||
] in
|
||||
let pass = ref 0 in
|
||||
let fail = ref 0 in
|
||||
List.iter (fun src ->
|
||||
let cst = Sx_parser.parse_all_cst src in
|
||||
let roundtrip = Sx_cst.cst_file_to_source cst.nodes cst.trailing_trivia in
|
||||
if roundtrip = src then begin
|
||||
incr pass;
|
||||
Printf.printf "PASS: %S\n" (if String.length src > 40 then String.sub src 0 40 ^ "..." else src)
|
||||
end else begin
|
||||
incr fail;
|
||||
Printf.printf "FAIL: %S\n expected: %S\n got: %S\n"
|
||||
(if String.length src > 40 then String.sub src 0 40 ^ "..." else src)
|
||||
src roundtrip
|
||||
end
|
||||
) test_sources;
|
||||
(* Also test CST→AST matches AST parser *)
|
||||
let ast_tests = [
|
||||
"(define foo 42)";
|
||||
"(list 1 2 3)";
|
||||
"{:key \"value\"}";
|
||||
";; comment\n(define bar 1)";
|
||||
] in
|
||||
Printf.printf "\nCST→AST equivalence:\n";
|
||||
List.iter (fun src ->
|
||||
let ast_direct = Sx_parser.parse_all src in
|
||||
let cst = Sx_parser.parse_all_cst src in
|
||||
let ast_via_cst = List.map Sx_cst.cst_to_ast cst.nodes in
|
||||
let s1 = String.concat " " (List.map Sx_types.inspect ast_direct) in
|
||||
let s2 = String.concat " " (List.map Sx_types.inspect ast_via_cst) in
|
||||
if s1 = s2 then begin
|
||||
incr pass;
|
||||
Printf.printf "PASS: %S\n" src
|
||||
end else begin
|
||||
incr fail;
|
||||
Printf.printf "FAIL: %S\n AST: %s\n CST→AST: %s\n" src s1 s2
|
||||
end
|
||||
) ast_tests;
|
||||
(* Test real .sx files from the codebase *)
|
||||
Printf.printf "\nReal file round-trips:\n";
|
||||
let test_file path =
|
||||
try
|
||||
let src = In_channel.with_open_text path In_channel.input_all in
|
||||
let cst = Sx_parser.parse_all_cst src in
|
||||
let roundtrip = Sx_cst.cst_file_to_source cst.nodes cst.trailing_trivia in
|
||||
if roundtrip = src then begin
|
||||
incr pass;
|
||||
Printf.printf "PASS: %s (%d bytes)\n" path (String.length src)
|
||||
end else begin
|
||||
incr fail;
|
||||
(* Find first difference *)
|
||||
let len = min (String.length src) (String.length roundtrip) in
|
||||
let diff_pos = ref len in
|
||||
for i = 0 to len - 1 do
|
||||
if src.[i] <> roundtrip.[i] && !diff_pos = len then diff_pos := i
|
||||
done;
|
||||
Printf.printf "FAIL: %s (diff at byte %d, src=%d rt=%d)\n" path !diff_pos (String.length src) (String.length roundtrip)
|
||||
end
|
||||
with e ->
|
||||
incr fail;
|
||||
Printf.printf "ERROR: %s — %s\n" path (Printexc.to_string e)
|
||||
in
|
||||
let spec_dir = try Sys.getenv "SX_SPEC_DIR" with Not_found -> "spec" in
|
||||
let project_dir = try Sys.getenv "SX_PROJECT_DIR" with Not_found -> "." in
|
||||
List.iter test_file [
|
||||
spec_dir ^ "/evaluator.sx";
|
||||
spec_dir ^ "/parser.sx";
|
||||
spec_dir ^ "/primitives.sx";
|
||||
spec_dir ^ "/render.sx";
|
||||
project_dir ^ "/lib/tree-tools.sx";
|
||||
project_dir ^ "/web/engine.sx";
|
||||
project_dir ^ "/web/io.sx";
|
||||
];
|
||||
|
||||
Printf.printf "\n%d/%d passed\n" !pass (!pass + !fail);
|
||||
if !fail > 0 then exit 1
|
||||
Reference in New Issue
Block a user