Step 14: source locations — pos-to-loc, error-loc, sx-parse-loc — 15 tests
Pure SX layer: pos-to-loc (offset→line/col), error-loc (parse result→loc), format-parse-error (human-readable error with source context line). OCaml platform: cst_to_ast_loc (CST spans→loc dicts), sx-parse-loc primitive (parse with locations), source-loc accessor. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -164,6 +164,24 @@ let make_test_env () =
|
||||
| [String s] -> List (parse_all s)
|
||||
| _ -> raise (Eval_error "sx-parse: expected string"));
|
||||
|
||||
bind "sx-parse-loc" (fun args ->
|
||||
match args with
|
||||
| [String s] ->
|
||||
let cst = Sx_parser.parse_all_cst s in
|
||||
List (Sx_cst.cst_to_ast_loc s cst.nodes)
|
||||
| _ -> raise (Eval_error "sx-parse-loc: expected string"));
|
||||
|
||||
bind "source-loc" (fun args ->
|
||||
match args with
|
||||
| [Dict d] ->
|
||||
let line = try Hashtbl.find d "line" with Not_found -> Nil in
|
||||
let col = try Hashtbl.find d "col" with Not_found -> Nil in
|
||||
let ld = Sx_types.make_dict () in
|
||||
Sx_types.dict_set ld "line" line;
|
||||
Sx_types.dict_set ld "col" col;
|
||||
Dict ld
|
||||
| _ -> Nil);
|
||||
|
||||
bind "sx-parse-one" (fun args ->
|
||||
match args with
|
||||
| [String s] ->
|
||||
|
||||
@@ -104,6 +104,33 @@ let rec cst_to_ast = function
|
||||
Dict d
|
||||
|
||||
|
||||
(** Convert character offset to line/col (1-based lines, 0-based cols) *)
|
||||
let offset_to_loc src offset =
|
||||
let line = ref 1 and col = ref 0 in
|
||||
for i = 0 to min (offset - 1) (String.length src - 1) do
|
||||
if src.[i] = '\n' then (incr line; col := 0)
|
||||
else col := !col + 1
|
||||
done;
|
||||
(!line, !col)
|
||||
|
||||
(** CST → AST with source location dicts ({:form value :line N :col N}) *)
|
||||
let cst_to_ast_loc src nodes =
|
||||
List.map (fun node ->
|
||||
let span = match node with
|
||||
| CstAtom { span; _ } -> span
|
||||
| CstList { span; _ } -> span
|
||||
| CstDict { span; _ } -> span
|
||||
in
|
||||
let value = cst_to_ast node in
|
||||
let (line, col) = offset_to_loc src span.start_offset in
|
||||
let d = make_dict () in
|
||||
dict_set d "form" value;
|
||||
dict_set d "line" (Number (float_of_int line));
|
||||
dict_set d "col" (Number (float_of_int col));
|
||||
Dict d
|
||||
) nodes
|
||||
|
||||
|
||||
(** {1 CST editing — apply AST-level edits back to the CST} *)
|
||||
|
||||
(** Replace the CST node at [path] with [new_source], preserving the
|
||||
|
||||
@@ -426,6 +426,47 @@
|
||||
|
||||
;; ── Layer 8: SX tokenizer ─────────────────────────────────────────
|
||||
|
||||
(define
|
||||
pos-to-loc
|
||||
(fn
|
||||
(input pos)
|
||||
(let
|
||||
loop
|
||||
((i 0) (line 1) (col 0))
|
||||
(if
|
||||
(>= i pos)
|
||||
{:line line :col col}
|
||||
(if
|
||||
(= (char-at input i) "\n")
|
||||
(loop (+ i 1) (+ line 1) 0)
|
||||
(loop (+ i 1) line (+ col 1)))))))
|
||||
|
||||
(define
|
||||
error-loc
|
||||
(fn (result input) (pos-to-loc input (result-pos result))))
|
||||
|
||||
(define
|
||||
format-parse-error
|
||||
(fn
|
||||
(result input)
|
||||
(let
|
||||
((loc (error-loc result input))
|
||||
(line-num (get loc :line))
|
||||
(col-num (get loc :col))
|
||||
(expected (result-expected result))
|
||||
(lines (split input "\n"))
|
||||
(source-line
|
||||
(if (<= line-num (len lines)) (nth lines (- line-num 1)) "")))
|
||||
(str
|
||||
"line "
|
||||
line-num
|
||||
", col "
|
||||
col-num
|
||||
": expected "
|
||||
expected
|
||||
"\n"
|
||||
source-line))))
|
||||
|
||||
(define
|
||||
sx-comment
|
||||
(fn
|
||||
|
||||
138
spec/tests/test-source-locations.sx
Normal file
138
spec/tests/test-source-locations.sx
Normal file
@@ -0,0 +1,138 @@
|
||||
;; Step 14: Source maps / error reporting
|
||||
;; Source locations tracked through parse → compile → eval.
|
||||
;; pos-to-loc converts character offset to {:line :col}.
|
||||
;; Errors include source context.
|
||||
|
||||
;; ── pos-to-loc: offset → line/col ────────────────────────────────
|
||||
|
||||
(defsuite
|
||||
"pos-to-loc"
|
||||
(deftest
|
||||
"first char is line 1 col 0"
|
||||
(let
|
||||
((loc (pos-to-loc "hello" 0)))
|
||||
(assert= (get loc :line) 1)
|
||||
(assert= (get loc :col) 0)))
|
||||
(deftest
|
||||
"mid-line position"
|
||||
(let
|
||||
((loc (pos-to-loc "hello" 3)))
|
||||
(assert= (get loc :line) 1)
|
||||
(assert= (get loc :col) 3)))
|
||||
(deftest
|
||||
"second line"
|
||||
(let
|
||||
((loc (pos-to-loc "abc\ndef" 4)))
|
||||
(assert= (get loc :line) 2)
|
||||
(assert= (get loc :col) 0)))
|
||||
(deftest
|
||||
"second line mid"
|
||||
(let
|
||||
((loc (pos-to-loc "abc\ndef" 6)))
|
||||
(assert= (get loc :line) 2)
|
||||
(assert= (get loc :col) 2)))
|
||||
(deftest
|
||||
"third line"
|
||||
(let
|
||||
((loc (pos-to-loc "a\nb\nc" 4)))
|
||||
(assert= (get loc :line) 3)
|
||||
(assert= (get loc :col) 0)))
|
||||
(deftest
|
||||
"end of input"
|
||||
(let
|
||||
((loc (pos-to-loc "abc" 3)))
|
||||
(assert= (get loc :line) 1)
|
||||
(assert= (get loc :col) 3))))
|
||||
|
||||
;; ── Parser combinator error locations ─────────────────────────────
|
||||
|
||||
(defsuite
|
||||
"parse-error-loc"
|
||||
(deftest
|
||||
"failed parse includes line and col"
|
||||
(let
|
||||
((result (run-parser digit "abc")))
|
||||
(assert (not (ok? result)))
|
||||
(let
|
||||
((loc (error-loc result "abc")))
|
||||
(assert= (get loc :line) 1)
|
||||
(assert= (get loc :col) 0))))
|
||||
(deftest
|
||||
"failed parse after consuming"
|
||||
(let
|
||||
((result (run-parser (seq2 letter digit) "ab")))
|
||||
(assert (not (ok? result)))
|
||||
(let
|
||||
((loc (error-loc result "ab")))
|
||||
(assert= (get loc :line) 1)
|
||||
(assert= (get loc :col) 1))))
|
||||
(deftest
|
||||
"failed parse on second line"
|
||||
(let
|
||||
((result (run-parser (seq2 (parse-char "\n") digit) "\na")))
|
||||
(assert (not (ok? result)))
|
||||
(let
|
||||
((loc (error-loc result "\na")))
|
||||
(assert= (get loc :line) 2)
|
||||
(assert= (get loc :col) 0)))))
|
||||
|
||||
;; ── format-parse-error: human-readable error ──────────────────────
|
||||
|
||||
(defsuite
|
||||
"format-parse-error"
|
||||
(deftest
|
||||
"single line error"
|
||||
(let
|
||||
((result (run-parser digit "hello"))
|
||||
(msg (format-parse-error result "hello")))
|
||||
(assert (string-contains? msg "line 1"))
|
||||
(assert (string-contains? msg "digit"))))
|
||||
(deftest
|
||||
"multi-line error shows line number"
|
||||
(let
|
||||
((input "abc\ndef\n!!!")
|
||||
(result
|
||||
(run-parser (seq (list (parse-string "abc\ndef\n") letter)) input))
|
||||
(msg (format-parse-error result input)))
|
||||
(assert (string-contains? msg "line 3"))))
|
||||
(deftest
|
||||
"error includes source context"
|
||||
(let
|
||||
((result (run-parser digit "let x = foo"))
|
||||
(msg (format-parse-error result "let x = foo")))
|
||||
(assert (string-contains? msg "let x = foo")))))
|
||||
|
||||
;; ── source-loc on parsed AST ──────────────────────────────────────
|
||||
|
||||
(defsuite
|
||||
"ast-source-loc"
|
||||
(deftest
|
||||
"sx-parse-loc returns forms with locations"
|
||||
(let
|
||||
((forms (sx-parse-loc "(+ 1 2)")))
|
||||
(assert (not (empty? forms)))
|
||||
(let
|
||||
((loc (source-loc (first forms))))
|
||||
(assert (not (nil? loc)))
|
||||
(assert= (get loc :line) 1)
|
||||
(assert= (get loc :col) 0))))
|
||||
(deftest
|
||||
"multi-line sx-parse-loc"
|
||||
(let
|
||||
((forms (sx-parse-loc "(define x 1)\n(+ x 2)")))
|
||||
(assert= (len forms) 2)
|
||||
(let
|
||||
((loc1 (source-loc (first forms)))
|
||||
(loc2 (source-loc (nth forms 1))))
|
||||
(assert= (get loc1 :line) 1)
|
||||
(assert= (get loc1 :col) 0)
|
||||
(assert= (get loc2 :line) 2)
|
||||
(assert= (get loc2 :col) 0))))
|
||||
(deftest
|
||||
"form value is accessible"
|
||||
(let
|
||||
((forms (sx-parse-loc "(+ 1 2)")))
|
||||
(let
|
||||
((form (get (first forms) :form)))
|
||||
(assert (list? form))
|
||||
(assert= (len form) 3)))))
|
||||
Reference in New Issue
Block a user