ocaml: character type — Char of int, #\a parser, all char primitives

- Char of int variant in sx_types.ml (Unicode codepoint)
- type_of → "char", inspect → #\a / #\space / #\newline notation
- #\ char literal reader in sx_parser.ml (named + single-char)
- make-char char? char->integer integer->char char-upcase char-downcase
- char=? char<? char>? char<=? char>=? comparators
- char-ci=? char-ci<? char-ci>? char-ci<=? char-ci>=? case-insensitive
- char-alphabetic? char-numeric? char-whitespace? char-upper-case? char-lower-case?
- string->list (returns Char values) and list->string (accepts Char values)
- fix get_val in sx_runtime.ml: add Integer n case for list indexing
- fix raw_serialize in sx_server.ml: Integer and Char variants
- 4493/4493 tests — +43 passing, zero regressions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-01 12:19:42 +00:00
parent 60f88ab4fe
commit b939becd86
5 changed files with 113 additions and 1 deletions

View File

@@ -1377,6 +1377,7 @@ let rec dispatch env cmd =
| Bool true -> "true"
| Bool false -> "false"
| Number n -> Sx_types.format_number n
| Integer n -> string_of_int n
| String s -> "\"" ^ escape_sx_string s ^ "\""
| Symbol s -> s
| Keyword k -> ":" ^ k
@@ -1390,6 +1391,7 @@ let rec dispatch env cmd =
| Island i -> "~" ^ i.i_name
| SxExpr s -> s
| RawHTML s -> "\"" ^ escape_sx_string s ^ "\""
| Char n -> Sx_types.inspect (Char n)
| _ -> "nil"
in
send_ok_raw (raw_serialize result)

View File

@@ -120,6 +120,27 @@ let rec read_value s : value =
| '"' -> String (read_string s)
| '\'' -> advance s; List [Symbol "quote"; read_value s]
| '`' -> advance s; List [Symbol "quasiquote"; read_value s]
| '#' when s.pos + 1 < s.len && s.src.[s.pos + 1] = '\\' ->
(* Character literal: #\a, #\space, #\newline, etc. *)
advance s; advance s;
if at_end s then raise (Parse_error "Unexpected end of input after #\\");
let char_start = s.pos in
(* Read a name if starts with ident char, else single char *)
if is_ident_start s.src.[s.pos] then begin
while s.pos < s.len && is_ident_char s.src.[s.pos] do advance s done;
let name = String.sub s.src char_start (s.pos - char_start) in
let cp = match name with
| "space" -> 32 | "newline" -> 10 | "tab" -> 9
| "return" -> 13 | "nul" -> 0 | "null" -> 0
| "escape" -> 27 | "delete" -> 127 | "backspace" -> 8
| "altmode" -> 27 | "rubout" -> 127
| _ -> Char.code name.[0] (* single letter like #\a *)
in Char cp
end else begin
let c = s.src.[s.pos] in
advance s;
Char (Char.code c)
end
| '#' when s.pos + 1 < s.len && s.src.[s.pos + 1] = ';' ->
(* Datum comment: #; discards next expression *)
advance s; advance s;

View File

@@ -2225,4 +2225,80 @@ let () =
register "symbol-interned?" (fun args ->
match args with
| [Symbol _] -> Bool true
| _ -> raise (Eval_error "symbol-interned?: expected 1 symbol"))
| _ -> raise (Eval_error "symbol-interned?: expected 1 symbol"));
(* Phase 13: character type *)
let char_downcase_cp n =
if n >= 65 && n <= 90 then n + 32 else n in
let char_upcase_cp n =
if n >= 97 && n <= 122 then n - 32 else n in
register "make-char" (fun args ->
match args with
| [Integer n] -> Char n
| _ -> raise (Eval_error "make-char: expected integer codepoint"));
register "char?" (fun args ->
match args with
| [Char _] -> Bool true | [_] -> Bool false
| _ -> raise (Eval_error "char?: expected 1 argument"));
register "char->integer" (fun args ->
match args with
| [Char n] -> Integer n
| _ -> raise (Eval_error "char->integer: expected char"));
register "integer->char" (fun args ->
match args with
| [Integer n] -> Char n
| _ -> raise (Eval_error "integer->char: expected integer"));
register "char-upcase" (fun args ->
match args with
| [Char n] -> Char (char_upcase_cp n)
| _ -> raise (Eval_error "char-upcase: expected char"));
register "char-downcase" (fun args ->
match args with
| [Char n] -> Char (char_downcase_cp n)
| _ -> raise (Eval_error "char-downcase: expected char"));
register "char=?" (fun args -> match args with [Char a; Char b] -> Bool (a = b) | _ -> raise (Eval_error "char=?: expected 2 chars"));
register "char<?" (fun args -> match args with [Char a; Char b] -> Bool (a < b) | _ -> raise (Eval_error "char<?: expected 2 chars"));
register "char>?" (fun args -> match args with [Char a; Char b] -> Bool (a > b) | _ -> raise (Eval_error "char>?: expected 2 chars"));
register "char<=?" (fun args -> match args with [Char a; Char b] -> Bool (a <= b) | _ -> raise (Eval_error "char<=?: expected 2 chars"));
register "char>=?" (fun args -> match args with [Char a; Char b] -> Bool (a >= b) | _ -> raise (Eval_error "char>=?: expected 2 chars"));
register "char-ci=?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a = char_downcase_cp b) | _ -> raise (Eval_error "char-ci=?: expected 2 chars"));
register "char-ci<?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a < char_downcase_cp b) | _ -> raise (Eval_error "char-ci<?: expected 2 chars"));
register "char-ci>?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a > char_downcase_cp b) | _ -> raise (Eval_error "char-ci>?: expected 2 chars"));
register "char-ci<=?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a <= char_downcase_cp b) | _ -> raise (Eval_error "char-ci<=?: expected 2 chars"));
register "char-ci>=?" (fun args -> match args with [Char a; Char b] -> Bool (char_downcase_cp a >= char_downcase_cp b) | _ -> raise (Eval_error "char-ci>=?: expected 2 chars"));
register "char-alphabetic?" (fun args ->
match args with
| [Char n] -> Bool ((n >= 65 && n <= 90) || (n >= 97 && n <= 122))
| _ -> raise (Eval_error "char-alphabetic?: expected char"));
register "char-numeric?" (fun args ->
match args with
| [Char n] -> Bool (n >= 48 && n <= 57)
| _ -> raise (Eval_error "char-numeric?: expected char"));
register "char-whitespace?" (fun args ->
match args with
| [Char n] -> Bool (n = 32 || n = 9 || n = 10 || n = 13)
| _ -> raise (Eval_error "char-whitespace?: expected char"));
register "char-upper-case?" (fun args ->
match args with
| [Char n] -> Bool (n >= 65 && n <= 90)
| _ -> raise (Eval_error "char-upper-case?: expected char"));
register "char-lower-case?" (fun args ->
match args with
| [Char n] -> Bool (n >= 97 && n <= 122)
| _ -> raise (Eval_error "char-lower-case?: expected char"));
register "string->list" (fun args ->
match args with
| [String s] ->
let chars = ref [] in
String.iter (fun c -> chars := Char (Char.code c) :: !chars) s;
List (List.rev !chars)
| _ -> raise (Eval_error "string->list: expected string"));
register "list->string" (fun args ->
match args with
| [List chars] | [ListRef { contents = chars }] ->
let buf = Buffer.create (List.length chars) in
List.iter (function
| Char n -> Buffer.add_char buf (Char.chr (n land 0xFF))
| v -> raise (Eval_error ("list->string: expected char, got " ^ type_of v))
) chars;
String (Buffer.contents buf)
| _ -> raise (Eval_error "list->string: expected list of chars"))

View File

@@ -211,6 +211,8 @@ let get_val container key =
| Dict d, Keyword k -> dict_get d k
| (List l | ListRef { contents = l }), Number n ->
(try List.nth l (int_of_float n) with _ -> Nil)
| (List l | ListRef { contents = l }), Integer n ->
(try List.nth l n with _ -> Nil)
| Nil, _ -> Nil (* nil.anything → nil *)
| _, _ -> Nil (* type mismatch → nil (matches JS/Python behavior) *)

View File

@@ -75,6 +75,7 @@ and value =
| Vector of value array (** R7RS vector — mutable fixed-size array. *)
| StringBuffer of Buffer.t (** Mutable string buffer — O(1) amortized append. *)
| HashTable of (value, value) Hashtbl.t (** Mutable hash table with arbitrary keys. *)
| Char of int (** Unicode codepoint — R7RS char type. *)
(** CEK machine state — record instead of Dict for performance.
5 fields × 55K steps/sec = 275K Hashtbl allocations/sec eliminated. *)
@@ -495,6 +496,7 @@ let type_of = function
| Vector _ -> "vector"
| StringBuffer _ -> "string-buffer"
| HashTable _ -> "hash-table"
| Char _ -> "char"
let is_nil = function Nil -> true | _ -> false
let is_lambda = function Lambda _ -> true | _ -> false
@@ -842,3 +844,12 @@ let rec inspect = function
| VmMachine m -> Printf.sprintf "<vm-machine:sp=%d frames=%d>" m.vm_sp (List.length m.vm_frames)
| StringBuffer buf -> Printf.sprintf "<string-buffer:%d>" (Buffer.length buf)
| HashTable ht -> Printf.sprintf "<hash-table:%d>" (Hashtbl.length ht)
| Char n ->
let name = match n with
| 32 -> "space" | 10 -> "newline" | 9 -> "tab"
| 13 -> "return" | 0 -> "nul" | 27 -> "escape"
| 127 -> "delete" | 8 -> "backspace"
| _ -> let buf = Buffer.create 1 in
Buffer.add_utf_8_uchar buf (Uchar.of_int n);
Buffer.contents buf
in "#\\" ^ name