From 99c5c44cc1df33c814c201bce53367c5802822e4 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 11 Apr 2026 08:03:45 +0000 Subject: [PATCH] =?UTF-8?q?Step=2014:=20source=20locations=20=E2=80=94=20p?= =?UTF-8?q?os-to-loc,=20error-loc,=20sx-parse-loc=20=E2=80=94=2015=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pure SX layer: pos-to-loc (offset→line/col), error-loc (parse result→loc), format-parse-error (human-readable error with source context line). OCaml platform: cst_to_ast_loc (CST spans→loc dicts), sx-parse-loc primitive (parse with locations), source-loc accessor. Co-Authored-By: Claude Opus 4.6 (1M context) --- hosts/ocaml/bin/run_tests.ml | 18 ++++ hosts/ocaml/lib/sx_cst.ml | 27 ++++++ lib/parser-combinators.sx | 41 +++++++++ spec/tests/test-source-locations.sx | 138 ++++++++++++++++++++++++++++ 4 files changed, 224 insertions(+) create mode 100644 spec/tests/test-source-locations.sx diff --git a/hosts/ocaml/bin/run_tests.ml b/hosts/ocaml/bin/run_tests.ml index eb804bb2..48cc0570 100644 --- a/hosts/ocaml/bin/run_tests.ml +++ b/hosts/ocaml/bin/run_tests.ml @@ -164,6 +164,24 @@ let make_test_env () = | [String s] -> List (parse_all s) | _ -> raise (Eval_error "sx-parse: expected string")); + bind "sx-parse-loc" (fun args -> + match args with + | [String s] -> + let cst = Sx_parser.parse_all_cst s in + List (Sx_cst.cst_to_ast_loc s cst.nodes) + | _ -> raise (Eval_error "sx-parse-loc: expected string")); + + bind "source-loc" (fun args -> + match args with + | [Dict d] -> + let line = try Hashtbl.find d "line" with Not_found -> Nil in + let col = try Hashtbl.find d "col" with Not_found -> Nil in + let ld = Sx_types.make_dict () in + Sx_types.dict_set ld "line" line; + Sx_types.dict_set ld "col" col; + Dict ld + | _ -> Nil); + bind "sx-parse-one" (fun args -> match args with | [String s] -> diff --git a/hosts/ocaml/lib/sx_cst.ml b/hosts/ocaml/lib/sx_cst.ml index b73f4594..e19b3e37 100644 --- a/hosts/ocaml/lib/sx_cst.ml +++ b/hosts/ocaml/lib/sx_cst.ml @@ -104,6 +104,33 @@ let rec cst_to_ast = function Dict d +(** Convert character offset to line/col (1-based lines, 0-based cols) *) +let offset_to_loc src offset = + let line = ref 1 and col = ref 0 in + for i = 0 to min (offset - 1) (String.length src - 1) do + if src.[i] = '\n' then (incr line; col := 0) + else col := !col + 1 + done; + (!line, !col) + +(** CST → AST with source location dicts ({:form value :line N :col N}) *) +let cst_to_ast_loc src nodes = + List.map (fun node -> + let span = match node with + | CstAtom { span; _ } -> span + | CstList { span; _ } -> span + | CstDict { span; _ } -> span + in + let value = cst_to_ast node in + let (line, col) = offset_to_loc src span.start_offset in + let d = make_dict () in + dict_set d "form" value; + dict_set d "line" (Number (float_of_int line)); + dict_set d "col" (Number (float_of_int col)); + Dict d + ) nodes + + (** {1 CST editing — apply AST-level edits back to the CST} *) (** Replace the CST node at [path] with [new_source], preserving the diff --git a/lib/parser-combinators.sx b/lib/parser-combinators.sx index b44b4e86..b359dad6 100644 --- a/lib/parser-combinators.sx +++ b/lib/parser-combinators.sx @@ -426,6 +426,47 @@ ;; ── Layer 8: SX tokenizer ───────────────────────────────────────── +(define + pos-to-loc + (fn + (input pos) + (let + loop + ((i 0) (line 1) (col 0)) + (if + (>= i pos) + {:line line :col col} + (if + (= (char-at input i) "\n") + (loop (+ i 1) (+ line 1) 0) + (loop (+ i 1) line (+ col 1))))))) + +(define + error-loc + (fn (result input) (pos-to-loc input (result-pos result)))) + +(define + format-parse-error + (fn + (result input) + (let + ((loc (error-loc result input)) + (line-num (get loc :line)) + (col-num (get loc :col)) + (expected (result-expected result)) + (lines (split input "\n")) + (source-line + (if (<= line-num (len lines)) (nth lines (- line-num 1)) ""))) + (str + "line " + line-num + ", col " + col-num + ": expected " + expected + "\n" + source-line)))) + (define sx-comment (fn diff --git a/spec/tests/test-source-locations.sx b/spec/tests/test-source-locations.sx new file mode 100644 index 00000000..987a72d2 --- /dev/null +++ b/spec/tests/test-source-locations.sx @@ -0,0 +1,138 @@ +;; Step 14: Source maps / error reporting +;; Source locations tracked through parse → compile → eval. +;; pos-to-loc converts character offset to {:line :col}. +;; Errors include source context. + +;; ── pos-to-loc: offset → line/col ──────────────────────────────── + +(defsuite + "pos-to-loc" + (deftest + "first char is line 1 col 0" + (let + ((loc (pos-to-loc "hello" 0))) + (assert= (get loc :line) 1) + (assert= (get loc :col) 0))) + (deftest + "mid-line position" + (let + ((loc (pos-to-loc "hello" 3))) + (assert= (get loc :line) 1) + (assert= (get loc :col) 3))) + (deftest + "second line" + (let + ((loc (pos-to-loc "abc\ndef" 4))) + (assert= (get loc :line) 2) + (assert= (get loc :col) 0))) + (deftest + "second line mid" + (let + ((loc (pos-to-loc "abc\ndef" 6))) + (assert= (get loc :line) 2) + (assert= (get loc :col) 2))) + (deftest + "third line" + (let + ((loc (pos-to-loc "a\nb\nc" 4))) + (assert= (get loc :line) 3) + (assert= (get loc :col) 0))) + (deftest + "end of input" + (let + ((loc (pos-to-loc "abc" 3))) + (assert= (get loc :line) 1) + (assert= (get loc :col) 3)))) + +;; ── Parser combinator error locations ───────────────────────────── + +(defsuite + "parse-error-loc" + (deftest + "failed parse includes line and col" + (let + ((result (run-parser digit "abc"))) + (assert (not (ok? result))) + (let + ((loc (error-loc result "abc"))) + (assert= (get loc :line) 1) + (assert= (get loc :col) 0)))) + (deftest + "failed parse after consuming" + (let + ((result (run-parser (seq2 letter digit) "ab"))) + (assert (not (ok? result))) + (let + ((loc (error-loc result "ab"))) + (assert= (get loc :line) 1) + (assert= (get loc :col) 1)))) + (deftest + "failed parse on second line" + (let + ((result (run-parser (seq2 (parse-char "\n") digit) "\na"))) + (assert (not (ok? result))) + (let + ((loc (error-loc result "\na"))) + (assert= (get loc :line) 2) + (assert= (get loc :col) 0))))) + +;; ── format-parse-error: human-readable error ────────────────────── + +(defsuite + "format-parse-error" + (deftest + "single line error" + (let + ((result (run-parser digit "hello")) + (msg (format-parse-error result "hello"))) + (assert (string-contains? msg "line 1")) + (assert (string-contains? msg "digit")))) + (deftest + "multi-line error shows line number" + (let + ((input "abc\ndef\n!!!") + (result + (run-parser (seq (list (parse-string "abc\ndef\n") letter)) input)) + (msg (format-parse-error result input))) + (assert (string-contains? msg "line 3")))) + (deftest + "error includes source context" + (let + ((result (run-parser digit "let x = foo")) + (msg (format-parse-error result "let x = foo"))) + (assert (string-contains? msg "let x = foo"))))) + +;; ── source-loc on parsed AST ────────────────────────────────────── + +(defsuite + "ast-source-loc" + (deftest + "sx-parse-loc returns forms with locations" + (let + ((forms (sx-parse-loc "(+ 1 2)"))) + (assert (not (empty? forms))) + (let + ((loc (source-loc (first forms)))) + (assert (not (nil? loc))) + (assert= (get loc :line) 1) + (assert= (get loc :col) 0)))) + (deftest + "multi-line sx-parse-loc" + (let + ((forms (sx-parse-loc "(define x 1)\n(+ x 2)"))) + (assert= (len forms) 2) + (let + ((loc1 (source-loc (first forms))) + (loc2 (source-loc (nth forms 1)))) + (assert= (get loc1 :line) 1) + (assert= (get loc1 :col) 0) + (assert= (get loc2 :line) 2) + (assert= (get loc2 :col) 0)))) + (deftest + "form value is accessible" + (let + ((forms (sx-parse-loc "(+ 1 2)"))) + (let + ((form (get (first forms) :form))) + (assert (list? form)) + (assert= (len form) 3)))))