ocaml: character type — Char of int, #\a parser, all char primitives
- Char of int variant in sx_types.ml (Unicode codepoint) - type_of → "char", inspect → #\a / #\space / #\newline notation - #\ char literal reader in sx_parser.ml (named + single-char) - make-char char? char->integer integer->char char-upcase char-downcase - char=? char<? char>? char<=? char>=? comparators - char-ci=? char-ci<? char-ci>? char-ci<=? char-ci>=? case-insensitive - char-alphabetic? char-numeric? char-whitespace? char-upper-case? char-lower-case? - string->list (returns Char values) and list->string (accepts Char values) - fix get_val in sx_runtime.ml: add Integer n case for list indexing - fix raw_serialize in sx_server.ml: Integer and Char variants - 4493/4493 tests — +43 passing, zero regressions Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1377,6 +1377,7 @@ let rec dispatch env cmd =
|
|||||||
| Bool true -> "true"
|
| Bool true -> "true"
|
||||||
| Bool false -> "false"
|
| Bool false -> "false"
|
||||||
| Number n -> Sx_types.format_number n
|
| Number n -> Sx_types.format_number n
|
||||||
|
| Integer n -> string_of_int n
|
||||||
| String s -> "\"" ^ escape_sx_string s ^ "\""
|
| String s -> "\"" ^ escape_sx_string s ^ "\""
|
||||||
| Symbol s -> s
|
| Symbol s -> s
|
||||||
| Keyword k -> ":" ^ k
|
| Keyword k -> ":" ^ k
|
||||||
@@ -1390,6 +1391,7 @@ let rec dispatch env cmd =
|
|||||||
| Island i -> "~" ^ i.i_name
|
| Island i -> "~" ^ i.i_name
|
||||||
| SxExpr s -> s
|
| SxExpr s -> s
|
||||||
| RawHTML s -> "\"" ^ escape_sx_string s ^ "\""
|
| RawHTML s -> "\"" ^ escape_sx_string s ^ "\""
|
||||||
|
| Char n -> Sx_types.inspect (Char n)
|
||||||
| _ -> "nil"
|
| _ -> "nil"
|
||||||
in
|
in
|
||||||
send_ok_raw (raw_serialize result)
|
send_ok_raw (raw_serialize result)
|
||||||
|
|||||||
@@ -120,6 +120,27 @@ let rec read_value s : value =
|
|||||||
| '"' -> String (read_string s)
|
| '"' -> String (read_string s)
|
||||||
| '\'' -> advance s; List [Symbol "quote"; read_value s]
|
| '\'' -> advance s; List [Symbol "quote"; read_value s]
|
||||||
| '`' -> advance s; List [Symbol "quasiquote"; read_value s]
|
| '`' -> advance s; List [Symbol "quasiquote"; read_value s]
|
||||||
|
| '#' when s.pos + 1 < s.len && s.src.[s.pos + 1] = '\\' ->
|
||||||
|
(* Character literal: #\a, #\space, #\newline, etc. *)
|
||||||
|
advance s; advance s;
|
||||||
|
if at_end s then raise (Parse_error "Unexpected end of input after #\\");
|
||||||
|
let char_start = s.pos in
|
||||||
|
(* Read a name if starts with ident char, else single char *)
|
||||||
|
if is_ident_start s.src.[s.pos] then begin
|
||||||
|
while s.pos < s.len && is_ident_char s.src.[s.pos] do advance s done;
|
||||||
|
let name = String.sub s.src char_start (s.pos - char_start) in
|
||||||
|
let cp = match name with
|
||||||
|
| "space" -> 32 | "newline" -> 10 | "tab" -> 9
|
||||||
|
| "return" -> 13 | "nul" -> 0 | "null" -> 0
|
||||||
|
| "escape" -> 27 | "delete" -> 127 | "backspace" -> 8
|
||||||
|
| "altmode" -> 27 | "rubout" -> 127
|
||||||
|
| _ -> Char.code name.[0] (* single letter like #\a *)
|
||||||
|
in Char cp
|
||||||
|
end else begin
|
||||||
|
let c = s.src.[s.pos] in
|
||||||
|
advance s;
|
||||||
|
Char (Char.code c)
|
||||||
|
end
|
||||||
| '#' when s.pos + 1 < s.len && s.src.[s.pos + 1] = ';' ->
|
| '#' when s.pos + 1 < s.len && s.src.[s.pos + 1] = ';' ->
|
||||||
(* Datum comment: #; discards next expression *)
|
(* Datum comment: #; discards next expression *)
|
||||||
advance s; advance s;
|
advance s; advance s;
|
||||||
|
|||||||
@@ -2225,4 +2225,80 @@ let () =
|
|||||||
register "symbol-interned?" (fun args ->
|
register "symbol-interned?" (fun args ->
|
||||||
match args with
|
match args with
|
||||||
| [Symbol _] -> Bool true
|
| [Symbol _] -> Bool true
|
||||||
| _ -> raise (Eval_error "symbol-interned?: expected 1 symbol"))
|
| _ -> raise (Eval_error "symbol-interned?: expected 1 symbol"));
|
||||||
|
(* Phase 13: character type *)
|
||||||
|
let char_downcase_cp n =
|
||||||
|
if n >= 65 && n <= 90 then n + 32 else n in
|
||||||
|
let char_upcase_cp n =
|
||||||
|
if n >= 97 && n <= 122 then n - 32 else n in
|
||||||
|
register "make-char" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Integer n] -> Char n
|
||||||
|
| _ -> raise (Eval_error "make-char: expected integer codepoint"));
|
||||||
|
register "char?" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Char _] -> Bool true | [_] -> Bool false
|
||||||
|
| _ -> raise (Eval_error "char?: expected 1 argument"));
|
||||||
|
register "char->integer" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Char n] -> Integer n
|
||||||
|
| _ -> raise (Eval_error "char->integer: expected char"));
|
||||||
|
register "integer->char" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Integer n] -> Char n
|
||||||
|
| _ -> raise (Eval_error "integer->char: expected integer"));
|
||||||
|
register "char-upcase" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Char n] -> Char (char_upcase_cp n)
|
||||||
|
| _ -> raise (Eval_error "char-upcase: expected char"));
|
||||||
|
register "char-downcase" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Char n] -> Char (char_downcase_cp n)
|
||||||
|
| _ -> raise (Eval_error "char-downcase: expected char"));
|
||||||
|
register "char=?" (fun args -> match args with [Char a; Char b] -> Bool (a = b) | _ -> raise (Eval_error "char=?: expected 2 chars"));
|
||||||
|
register "char<?" (fun args -> match args with [Char a; Char b] -> Bool (a < b) | _ -> raise (Eval_error "char<?: expected 2 chars"));
|
||||||
|
register "char>?" (fun args -> match args with [Char a; Char b] -> Bool (a > b) | _ -> raise (Eval_error "char>?: expected 2 chars"));
|
||||||
|
register "char<=?" (fun args -> match args with [Char a; Char b] -> Bool (a <= b) | _ -> raise (Eval_error "char<=?: expected 2 chars"));
|
||||||
|
register "char>=?" (fun args -> match args with [Char a; Char b] -> Bool (a >= b) | _ -> raise (Eval_error "char>=?: expected 2 chars"));
|
||||||
|
register "char-ci=?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a = char_downcase_cp b) | _ -> raise (Eval_error "char-ci=?: expected 2 chars"));
|
||||||
|
register "char-ci<?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a < char_downcase_cp b) | _ -> raise (Eval_error "char-ci<?: expected 2 chars"));
|
||||||
|
register "char-ci>?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a > char_downcase_cp b) | _ -> raise (Eval_error "char-ci>?: expected 2 chars"));
|
||||||
|
register "char-ci<=?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a <= char_downcase_cp b) | _ -> raise (Eval_error "char-ci<=?: expected 2 chars"));
|
||||||
|
register "char-ci>=?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a >= char_downcase_cp b) | _ -> raise (Eval_error "char-ci>=?: expected 2 chars"));
|
||||||
|
register "char-alphabetic?" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Char n] -> Bool ((n >= 65 && n <= 90) || (n >= 97 && n <= 122))
|
||||||
|
| _ -> raise (Eval_error "char-alphabetic?: expected char"));
|
||||||
|
register "char-numeric?" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Char n] -> Bool (n >= 48 && n <= 57)
|
||||||
|
| _ -> raise (Eval_error "char-numeric?: expected char"));
|
||||||
|
register "char-whitespace?" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Char n] -> Bool (n = 32 || n = 9 || n = 10 || n = 13)
|
||||||
|
| _ -> raise (Eval_error "char-whitespace?: expected char"));
|
||||||
|
register "char-upper-case?" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Char n] -> Bool (n >= 65 && n <= 90)
|
||||||
|
| _ -> raise (Eval_error "char-upper-case?: expected char"));
|
||||||
|
register "char-lower-case?" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [Char n] -> Bool (n >= 97 && n <= 122)
|
||||||
|
| _ -> raise (Eval_error "char-lower-case?: expected char"));
|
||||||
|
register "string->list" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [String s] ->
|
||||||
|
let chars = ref [] in
|
||||||
|
String.iter (fun c -> chars := Char (Char.code c) :: !chars) s;
|
||||||
|
List (List.rev !chars)
|
||||||
|
| _ -> raise (Eval_error "string->list: expected string"));
|
||||||
|
register "list->string" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [List chars] | [ListRef { contents = chars }] ->
|
||||||
|
let buf = Buffer.create (List.length chars) in
|
||||||
|
List.iter (function
|
||||||
|
| Char n -> Buffer.add_char buf (Char.chr (n land 0xFF))
|
||||||
|
| v -> raise (Eval_error ("list->string: expected char, got " ^ type_of v))
|
||||||
|
) chars;
|
||||||
|
String (Buffer.contents buf)
|
||||||
|
| _ -> raise (Eval_error "list->string: expected list of chars"))
|
||||||
|
|||||||
@@ -211,6 +211,8 @@ let get_val container key =
|
|||||||
| Dict d, Keyword k -> dict_get d k
|
| Dict d, Keyword k -> dict_get d k
|
||||||
| (List l | ListRef { contents = l }), Number n ->
|
| (List l | ListRef { contents = l }), Number n ->
|
||||||
(try List.nth l (int_of_float n) with _ -> Nil)
|
(try List.nth l (int_of_float n) with _ -> Nil)
|
||||||
|
| (List l | ListRef { contents = l }), Integer n ->
|
||||||
|
(try List.nth l n with _ -> Nil)
|
||||||
| Nil, _ -> Nil (* nil.anything → nil *)
|
| Nil, _ -> Nil (* nil.anything → nil *)
|
||||||
| _, _ -> Nil (* type mismatch → nil (matches JS/Python behavior) *)
|
| _, _ -> Nil (* type mismatch → nil (matches JS/Python behavior) *)
|
||||||
|
|
||||||
|
|||||||
@@ -75,6 +75,7 @@ and value =
|
|||||||
| Vector of value array (** R7RS vector — mutable fixed-size array. *)
|
| Vector of value array (** R7RS vector — mutable fixed-size array. *)
|
||||||
| StringBuffer of Buffer.t (** Mutable string buffer — O(1) amortized append. *)
|
| StringBuffer of Buffer.t (** Mutable string buffer — O(1) amortized append. *)
|
||||||
| HashTable of (value, value) Hashtbl.t (** Mutable hash table with arbitrary keys. *)
|
| HashTable of (value, value) Hashtbl.t (** Mutable hash table with arbitrary keys. *)
|
||||||
|
| Char of int (** Unicode codepoint — R7RS char type. *)
|
||||||
|
|
||||||
(** CEK machine state — record instead of Dict for performance.
|
(** CEK machine state — record instead of Dict for performance.
|
||||||
5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *)
|
5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *)
|
||||||
@@ -495,6 +496,7 @@ let type_of = function
|
|||||||
| Vector _ -> "vector"
|
| Vector _ -> "vector"
|
||||||
| StringBuffer _ -> "string-buffer"
|
| StringBuffer _ -> "string-buffer"
|
||||||
| HashTable _ -> "hash-table"
|
| HashTable _ -> "hash-table"
|
||||||
|
| Char _ -> "char"
|
||||||
|
|
||||||
let is_nil = function Nil -> true | _ -> false
|
let is_nil = function Nil -> true | _ -> false
|
||||||
let is_lambda = function Lambda _ -> true | _ -> false
|
let is_lambda = function Lambda _ -> true | _ -> false
|
||||||
@@ -842,3 +844,12 @@ let rec inspect = function
|
|||||||
| VmMachine m -> Printf.sprintf "<vm-machine:sp=%d frames=%d>" m.vm_sp (List.length m.vm_frames)
|
| VmMachine m -> Printf.sprintf "<vm-machine:sp=%d frames=%d>" m.vm_sp (List.length m.vm_frames)
|
||||||
| StringBuffer buf -> Printf.sprintf "<string-buffer:%d>" (Buffer.length buf)
|
| StringBuffer buf -> Printf.sprintf "<string-buffer:%d>" (Buffer.length buf)
|
||||||
| HashTable ht -> Printf.sprintf "<hash-table:%d>" (Hashtbl.length ht)
|
| HashTable ht -> Printf.sprintf "<hash-table:%d>" (Hashtbl.length ht)
|
||||||
|
| Char n ->
|
||||||
|
let name = match n with
|
||||||
|
| 32 -> "space" | 10 -> "newline" | 9 -> "tab"
|
||||||
|
| 13 -> "return" | 0 -> "nul" | 27 -> "escape"
|
||||||
|
| 127 -> "delete" | 8 -> "backspace"
|
||||||
|
| _ -> let buf = Buffer.create 1 in
|
||||||
|
Buffer.add_utf_8_uchar buf (Uchar.of_int n);
|
||||||
|
Buffer.contents buf
|
||||||
|
in "#\\" ^ name
|
||||||
|
|||||||
Reference in New Issue
Block a user