Align OCaml parser with spec/parser.sx character classification
Replace permissive is_symbol_char (negative check — everything not a delimiter) with spec-compliant is_ident_start/is_ident_char (positive check matching the exact character sets documented in parser.sx). Changes: - ident-start: remove extra chars (|, %, ^, $) not in spec - ident-char: add comma (,) per spec - Comma (,) now handled as dedicated unquote case in match, not in the catch-all fallback — matches spec dispatch order - Remove ~@ splice-unquote alias (spec only defines ,@) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -30,10 +30,21 @@ let skip_whitespace_and_comments s =
|
||||
| _ -> ()
|
||||
in go ()
|
||||
|
||||
let is_symbol_char = function
|
||||
| '(' | ')' | '[' | ']' | '{' | '}' | '"' | '\'' | '`'
|
||||
| ' ' | '\t' | '\n' | '\r' | ',' | ';' -> false
|
||||
| _ -> true
|
||||
(* Character classification — matches spec/parser.sx ident-start/ident-char.
|
||||
ident-start: a-z A-Z _ ~ * + - > < = / ! ? &
|
||||
ident-char: ident-start plus 0-9 . : / # , *)
|
||||
let is_ident_start = function
|
||||
| 'a'..'z' | 'A'..'Z' | '_' | '~' | '*' | '+' | '-'
|
||||
| '>' | '<' | '=' | '/' | '!' | '?' | '&' -> true
|
||||
| _ -> false
|
||||
|
||||
let is_ident_char = function
|
||||
| c when is_ident_start c -> true
|
||||
| '0'..'9' | '.' | ':' | '#' | ',' -> true
|
||||
| _ -> false
|
||||
|
||||
(* Symbol reading uses ident_char; first char must be ident_start or digit/colon *)
|
||||
let is_symbol_char = is_ident_char
|
||||
|
||||
let read_string s =
|
||||
(* s.pos is on the opening quote *)
|
||||
@@ -116,20 +127,16 @@ let rec read_value s : value =
|
||||
go ()
|
||||
end
|
||||
in go ()
|
||||
| '~' when s.pos + 1 < s.len && s.src.[s.pos + 1] = '@' ->
|
||||
advance s; advance s; (* skip ~@ *)
|
||||
List [Symbol "splice-unquote"; read_value s]
|
||||
| _ ->
|
||||
(* Check for unquote: , followed by non-whitespace *)
|
||||
if s.src.[s.pos] = ',' && s.pos + 1 < s.len &&
|
||||
s.src.[s.pos + 1] <> ' ' && s.src.[s.pos + 1] <> '\n' then begin
|
||||
| ',' ->
|
||||
(* Unquote / splice-unquote — matches spec: , always triggers unquote *)
|
||||
advance s;
|
||||
if s.pos < s.len && s.src.[s.pos] = '@' then begin
|
||||
advance s;
|
||||
List [Symbol "splice-unquote"; read_value s]
|
||||
end else
|
||||
List [Symbol "unquote"; read_value s]
|
||||
end else begin
|
||||
| _ ->
|
||||
begin
|
||||
(* Symbol, keyword, number, or boolean *)
|
||||
let token = read_symbol s in
|
||||
if token = "" then raise (Parse_error ("Unexpected char: " ^ String.make 1 s.src.[s.pos]));
|
||||
|
||||
Reference in New Issue
Block a user