Align OCaml parser with spec/parser.sx character classification

Replace permissive is_symbol_char (negative check — everything not a
delimiter) with spec-compliant is_ident_start/is_ident_char (positive
check matching the exact character sets documented in parser.sx).

Changes:
- ident-start: remove extra chars (|, %, ^, $) not in spec
- ident-char: add comma (,) per spec
- Comma (,) now handled as dedicated unquote case in match, not in
  the catch-all fallback — matches spec dispatch order
- Remove ~@ splice-unquote alias (spec only defines ,@)

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-03-24 10:13:03 +00:00
parent 2a9a4b41bd
commit 0c7567925e

View File

@@ -30,10 +30,21 @@ let skip_whitespace_and_comments s =
| _ -> ()
in go ()
let is_symbol_char = function
| '(' | ')' | '[' | ']' | '{' | '}' | '"' | '\'' | '`'
| ' ' | '\t' | '\n' | '\r' | ',' | ';' -> false
| _ -> true
(* Character classification — matches spec/parser.sx ident-start/ident-char.
ident-start: a-z A-Z _ ~ * + - > < = / ! ? &
ident-char: ident-start plus 0-9 . : / # , *)
let is_ident_start = function
| 'a'..'z' | 'A'..'Z' | '_' | '~' | '*' | '+' | '-'
| '>' | '<' | '=' | '/' | '!' | '?' | '&' -> true
| _ -> false
let is_ident_char = function
| c when is_ident_start c -> true
| '0'..'9' | '.' | ':' | '#' | ',' -> true
| _ -> false
(* Symbol reading uses ident_char; first char must be ident_start or digit/colon *)
let is_symbol_char = is_ident_char
let read_string s =
(* s.pos is on the opening quote *)
@@ -116,20 +127,16 @@ let rec read_value s : value =
go ()
end
in go ()
| '~' when s.pos + 1 < s.len && s.src.[s.pos + 1] = '@' ->
advance s; advance s; (* skip ~@ *)
List [Symbol "splice-unquote"; read_value s]
| _ ->
(* Check for unquote: , followed by non-whitespace *)
if s.src.[s.pos] = ',' && s.pos + 1 < s.len &&
s.src.[s.pos + 1] <> ' ' && s.src.[s.pos + 1] <> '\n' then begin
| ',' ->
(* Unquote / splice-unquote — matches spec: , always triggers unquote *)
advance s;
if s.pos < s.len && s.src.[s.pos] = '@' then begin
advance s;
List [Symbol "splice-unquote"; read_value s]
end else
List [Symbol "unquote"; read_value s]
end else begin
| _ ->
begin
(* Symbol, keyword, number, or boolean *)
let token = read_symbol s in
if token = "" then raise (Parse_error ("Unexpected char: " ^ String.make 1 s.src.[s.pos]));