Step 13: String/regex primitives — PCRE-compatible, cross-host

New primitives in sx_primitives.ml:
  char-at, char-code, parse-number — string inspection + conversion
  regex-match, regex-match?, regex-find-all — PCRE pattern matching
  regex-replace, regex-replace-first — PCRE substitution
  regex-split — split by PCRE pattern

Uses Re.Pcre (OCaml re library) so regex patterns use the same syntax
as JS RegExp — patterns in .sx files work identically on browser and
server. Replaces the old test-only regex-find-all stub.

Also: split now handles multi-char separators via Re.

176 new tests (10 suites). 2912/2912 total, zero failures.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-05 20:38:40 +00:00
parent 516f9c7186
commit 5d88b363e4
4 changed files with 247 additions and 48 deletions

View File

@@ -470,52 +470,7 @@ let make_test_env () =
let stack = try Hashtbl.find _scope_stacks name with Not_found -> [] in
(match stack with _ :: rest -> Hashtbl.replace _scope_stacks name (List [] :: rest) | [] -> ()); Nil
| _ -> Nil);
bind "regex-find-all" (fun args ->
(* Stub: supports ~name patterns for component scanning *)
match args with
| [String pattern; String text] ->
(* Extract the literal prefix from patterns like:
"(~[a-z/.-]+" → prefix "~", has_group=true
"\(~([a-zA-Z_]..." → prefix "(~", has_group=true *)
let prefix, has_group =
if String.length pattern >= 4 && pattern.[0] = '\\' && pattern.[1] = '(' then
(* Pattern like \(~(...) — literal "(" + "~" prefix, group after *)
let s = String.sub pattern 2 (String.length pattern - 2) in
let lit_end = try String.index s '(' with Not_found -> try String.index s '[' with Not_found -> String.length s in
let lit = String.sub s 0 lit_end in
("(" ^ lit, true)
else if String.length pattern > 2 && pattern.[0] = '(' then
let s = String.sub pattern 1 (String.length pattern - 1) in
let p = try String.sub s 0 (String.index s '[')
with Not_found -> try String.sub s 0 (String.index s '(')
with Not_found -> s in
((if String.length p > 0 then p else "~"), true)
else (pattern, false)
in
let results = ref [] in
let len = String.length text in
let plen = String.length prefix in
let i = ref 0 in
while !i <= len - plen do
if String.sub text !i plen = prefix then begin
(* Find end of identifier *)
let j = ref (!i + plen) in
while !j < len && let c = text.[!j] in
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')
|| c = '-' || c = '/' || c = '_' || c = '.' do
incr j
done;
let full_match = String.sub text !i (!j - !i) in
(* If pattern has capture group, strip the literal prefix to simulate group 1 *)
let result = if has_group then
String.sub full_match plen (String.length full_match - plen)
else full_match in
results := String result :: !results;
i := !j
end else incr i
done;
List (List.rev !results)
| _ -> List []);
(* regex-find-all now provided by sx_primitives.ml *)
bind "callable?" (fun args ->
match args with
| [NativeFn _] | [Lambda _] | [Component _] | [Island _] -> Bool true

View File

@@ -1,3 +1,4 @@
(library
(name sx)
(wrapped false))
(wrapped false)
(libraries re re.pcre))

View File

@@ -398,7 +398,12 @@ let () =
register "split" (fun args ->
match args with
| [String s; String sep] ->
List (List.map (fun p -> String p) (String.split_on_char sep.[0] s))
if String.length sep = 1 then
List (List.map (fun p -> String p) (String.split_on_char sep.[0] s))
else
(* Multi-char separator: use Re for literal split *)
let re = Re.compile (Re.str sep) in
List (List.map (fun p -> String p) (Re.split re s))
| _ -> raise (Eval_error "split: 2 args"));
register "join" (fun args ->
match args with
@@ -441,6 +446,94 @@ let () =
Buffer.add_utf_8_uchar buf (Uchar.of_int (int_of_float n));
String (Buffer.contents buf)
| _ -> raise (Eval_error "char-from-code: 1 arg"));
register "char-at" (fun args ->
match args with
| [String s; Number n] ->
let i = int_of_float n in
if i >= 0 && i < String.length s then
String (String.make 1 s.[i])
else Nil
| _ -> raise (Eval_error "char-at: string and index"));
register "char-code" (fun args ->
match args with
| [String s] when String.length s > 0 -> Number (float_of_int (Char.code s.[0]))
| _ -> raise (Eval_error "char-code: 1 non-empty string arg"));
register "parse-number" (fun args ->
match args with
| [String s] ->
(try Number (float_of_string s)
with Failure _ -> Nil)
| _ -> raise (Eval_error "parse-number: 1 string arg"));
(* === Regex (PCRE-compatible — same syntax as JS RegExp) === *)
register "regex-match" (fun args ->
match args with
| [String pattern; String input] ->
(try
let re = Re.Pcre.re pattern |> Re.compile in
match Re.exec_opt re input with
| Some group ->
let full = Re.Group.get group 0 in
let n = Re.Group.nb_groups group in
let groups = ref [String full] in
for i = 1 to n - 1 do
(try groups := !groups @ [String (Re.Group.get group i)]
with Not_found -> groups := !groups @ [Nil])
done;
List !groups
| None -> Nil
with _ -> Nil)
| _ -> raise (Eval_error "regex-match: pattern and input strings"));
register "regex-match?" (fun args ->
match args with
| [String pattern; String input] ->
(try Bool (Re.execp (Re.Pcre.re pattern |> Re.compile) input)
with _ -> Bool false)
| _ -> raise (Eval_error "regex-match?: pattern and input strings"));
register "regex-find-all" (fun args ->
match args with
| [String pattern; String input] ->
(try
let re = Re.Pcre.re pattern |> Re.compile in
let matches = Re.all re input in
let results = List.map (fun group ->
(* If there's a capture group, return group 1; else full match *)
try String (Re.Group.get group 1)
with Not_found -> String (Re.Group.get group 0)
) matches in
ListRef (ref results)
with _ -> ListRef (ref []))
| _ -> raise (Eval_error "regex-find-all: pattern and input strings"));
register "regex-replace" (fun args ->
match args with
| [String pattern; String replacement; String input] ->
(try
let re = Re.Pcre.re pattern |> Re.compile in
String (Re.replace_string re ~by:replacement input)
with _ -> String input)
| _ -> raise (Eval_error "regex-replace: pattern, replacement, input strings"));
register "regex-replace-first" (fun args ->
match args with
| [String pattern; String replacement; String input] ->
(try
let re = Re.Pcre.re pattern |> Re.compile in
(* Re doesn't have replace_first, so use all matches and replace only first *)
match Re.exec_opt re input with
| Some group ->
let start = Re.Group.start group 0 and stop = Re.Group.stop group 0 in
String (String.sub input 0 start ^ replacement ^
String.sub input stop (String.length input - stop))
| None -> String input
with _ -> String input)
| _ -> raise (Eval_error "regex-replace-first: pattern, replacement, input strings"));
register "regex-split" (fun args ->
match args with
| [String pattern; String input] ->
(try
let re = Re.Pcre.re pattern |> Re.compile in
ListRef (ref (List.map (fun s -> String s) (Re.split re input)))
with _ -> ListRef (ref [String input]))
| _ -> raise (Eval_error "regex-split: pattern and input strings"));
(* === Collections === *)
register "list" (fun args -> ListRef (ref args));