sx primitives: add regex-* (Re + Re.Pcre backed)
Adds regex-compile/test/exec/match-all/replace/replace-fn/split/source/flags.
Opaque dict handle {:__regex__ true :id :source :flags}; compiled Re.re
cached in a primitives-local table. Replacement supports $&, $1-$9, $$.
Flags: i (CASELESS), m (MULTILINE), s (DOTALL). g is a runtime flag handled
in replace. u (unicode) skipped for now.
Unblocks js-on-sx's regex-platform-override! hook — the JS RegExp shim can
now delegate to real regex instead of the substring stub.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1696,4 +1696,179 @@ let () =
|
||||
| [thunk] ->
|
||||
(try !Sx_types._cek_call_ref thunk Nil
|
||||
with _ -> Nil)
|
||||
| _ -> Nil)
|
||||
| _ -> Nil);
|
||||
|
||||
(* === Regex === wrapping Re + Re.Pcre *)
|
||||
let regex_table : (int, Re.re * string * string) Hashtbl.t = Hashtbl.create 32 in
|
||||
let regex_next_id = ref 0 in
|
||||
let parse_flags flags =
|
||||
let opts = ref [] in
|
||||
String.iter (function
|
||||
| 'i' -> opts := `CASELESS :: !opts
|
||||
| 'm' -> opts := `MULTILINE :: !opts
|
||||
| 's' -> opts := `DOTALL :: !opts
|
||||
| _ -> ()) flags;
|
||||
!opts
|
||||
in
|
||||
let make_regex_value id source flags =
|
||||
let d = Hashtbl.create 4 in
|
||||
Hashtbl.replace d "__regex__" (Bool true);
|
||||
Hashtbl.replace d "id" (Number (float_of_int id));
|
||||
Hashtbl.replace d "source" (String source);
|
||||
Hashtbl.replace d "flags" (String flags);
|
||||
Dict d
|
||||
in
|
||||
let regex_of_value = function
|
||||
| Dict d ->
|
||||
(match Hashtbl.find_opt d "id" with
|
||||
| Some (Number n) ->
|
||||
(match Hashtbl.find_opt regex_table (int_of_float n) with
|
||||
| Some r -> r
|
||||
| None -> raise (Eval_error "regex: handle not found"))
|
||||
| _ -> raise (Eval_error "regex: missing id"))
|
||||
| _ -> raise (Eval_error "regex: expected regex dict")
|
||||
in
|
||||
let group_to_dict g input =
|
||||
let d = Hashtbl.create 4 in
|
||||
Hashtbl.replace d "match" (String (Re.Group.get g 0));
|
||||
Hashtbl.replace d "index" (Number (float_of_int (Re.Group.start g 0)));
|
||||
Hashtbl.replace d "input" (String input);
|
||||
let count = Re.Group.nb_groups g in
|
||||
let groups = ref [] in
|
||||
for i = count - 1 downto 1 do
|
||||
let s = try Re.Group.get g i with Not_found -> "" in
|
||||
groups := String s :: !groups
|
||||
done;
|
||||
Hashtbl.replace d "groups" (List !groups);
|
||||
Dict d
|
||||
in
|
||||
register "regex-compile" (fun args ->
|
||||
match args with
|
||||
| [String source; String flags] | [String source; String flags; _] ->
|
||||
let opts = parse_flags flags in
|
||||
(try
|
||||
let re = Re.compile (Re.Pcre.re ~flags:opts source) in
|
||||
let id = !regex_next_id in
|
||||
incr regex_next_id;
|
||||
Hashtbl.replace regex_table id (re, source, flags);
|
||||
make_regex_value id source flags
|
||||
with _ -> raise (Eval_error ("regex-compile: invalid pattern " ^ source)))
|
||||
| [String source] ->
|
||||
(try
|
||||
let re = Re.compile (Re.Pcre.re source) in
|
||||
let id = !regex_next_id in
|
||||
incr regex_next_id;
|
||||
Hashtbl.replace regex_table id (re, source, "");
|
||||
make_regex_value id source ""
|
||||
with _ -> raise (Eval_error ("regex-compile: invalid pattern " ^ source)))
|
||||
| _ -> raise (Eval_error "regex-compile: (source flags)"));
|
||||
register "regex-test" (fun args ->
|
||||
match args with
|
||||
| [rx; String s] ->
|
||||
let (re, _, _) = regex_of_value rx in
|
||||
Bool (Re.execp re s)
|
||||
| _ -> raise (Eval_error "regex-test: (regex string)"));
|
||||
register "regex-exec" (fun args ->
|
||||
let (rx, s, start) = match args with
|
||||
| [rx; String s] -> (rx, s, 0)
|
||||
| [rx; String s; Number n] -> (rx, s, int_of_float n)
|
||||
| _ -> raise (Eval_error "regex-exec: (regex string start?)")
|
||||
in
|
||||
let (re, _, _) = regex_of_value rx in
|
||||
try
|
||||
let g = Re.exec ~pos:start re s in
|
||||
group_to_dict g s
|
||||
with Not_found -> Nil);
|
||||
register "regex-match-all" (fun args ->
|
||||
match args with
|
||||
| [rx; String s] ->
|
||||
let (re, _, _) = regex_of_value rx in
|
||||
let all = Re.all re s in
|
||||
List (List.map (fun g -> group_to_dict g s) all)
|
||||
| _ -> raise (Eval_error "regex-match-all: (regex string)"));
|
||||
register "regex-replace" (fun args ->
|
||||
match args with
|
||||
| [rx; String s; String replacement] ->
|
||||
let (re, _, flags) = regex_of_value rx in
|
||||
let expand g =
|
||||
let buf = Buffer.create (String.length replacement) in
|
||||
let i = ref 0 in
|
||||
let n = String.length replacement in
|
||||
while !i < n do
|
||||
let c = replacement.[!i] in
|
||||
if c = '$' && !i + 1 < n then
|
||||
(match replacement.[!i + 1] with
|
||||
| '&' -> Buffer.add_string buf (Re.Group.get g 0); i := !i + 2
|
||||
| '$' -> Buffer.add_char buf '$'; i := !i + 2
|
||||
| c when c >= '0' && c <= '9' ->
|
||||
let idx = Char.code c - Char.code '0' in
|
||||
(try Buffer.add_string buf (Re.Group.get g idx) with Not_found -> ());
|
||||
i := !i + 2
|
||||
| _ -> Buffer.add_char buf c; incr i)
|
||||
else (Buffer.add_char buf c; incr i)
|
||||
done;
|
||||
Buffer.contents buf
|
||||
in
|
||||
let global = String.contains flags 'g' in
|
||||
if global then
|
||||
String (Re.replace re ~f:expand s)
|
||||
else
|
||||
(match Re.exec_opt re s with
|
||||
| None -> String s
|
||||
| Some g ->
|
||||
let repl = expand g in
|
||||
let before = String.sub s 0 (Re.Group.start g 0) in
|
||||
let after_start = Re.Group.stop g 0 in
|
||||
let after = String.sub s after_start (String.length s - after_start) in
|
||||
String (before ^ repl ^ after))
|
||||
| _ -> raise (Eval_error "regex-replace: (regex string replacement)"));
|
||||
register "regex-replace-fn" (fun args ->
|
||||
match args with
|
||||
| [rx; String s; f] ->
|
||||
let (re, _, flags) = regex_of_value rx in
|
||||
let call_fn g =
|
||||
let match_str = Re.Group.get g 0 in
|
||||
let count = Re.Group.nb_groups g in
|
||||
let groups_before = ref [] in
|
||||
for i = count - 1 downto 1 do
|
||||
let v = try String (Re.Group.get g i) with Not_found -> Nil in
|
||||
groups_before := v :: !groups_before
|
||||
done;
|
||||
let idx = Number (float_of_int (Re.Group.start g 0)) in
|
||||
let all_args = [String match_str] @ !groups_before @ [idx; String s] in
|
||||
match !Sx_types._cek_call_ref f (List all_args) with
|
||||
| String s -> s
|
||||
| Number n -> Sx_types.format_number n
|
||||
| v -> Sx_types.inspect v
|
||||
in
|
||||
let global = String.contains flags 'g' in
|
||||
if global then
|
||||
String (Re.replace re ~f:call_fn s)
|
||||
else
|
||||
(match Re.exec_opt re s with
|
||||
| None -> String s
|
||||
| Some g ->
|
||||
let repl = call_fn g in
|
||||
let before = String.sub s 0 (Re.Group.start g 0) in
|
||||
let after_start = Re.Group.stop g 0 in
|
||||
let after = String.sub s after_start (String.length s - after_start) in
|
||||
String (before ^ repl ^ after))
|
||||
| _ -> raise (Eval_error "regex-replace-fn: (regex string fn)"));
|
||||
register "regex-split" (fun args ->
|
||||
match args with
|
||||
| [rx; String s] ->
|
||||
let (re, _, _) = regex_of_value rx in
|
||||
List (List.map (fun x -> String x) (Re.split re s))
|
||||
| _ -> raise (Eval_error "regex-split: (regex string)"));
|
||||
register "regex-source" (fun args ->
|
||||
match args with
|
||||
| [rx] ->
|
||||
let (_, source, _) = regex_of_value rx in
|
||||
String source
|
||||
| _ -> raise (Eval_error "regex-source: (regex)"));
|
||||
register "regex-flags" (fun args ->
|
||||
match args with
|
||||
| [rx] ->
|
||||
let (_, _, flags) = regex_of_value rx in
|
||||
String flags
|
||||
| _ -> raise (Eval_error "regex-flags: (regex)"))
|
||||
|
||||
Reference in New Issue
Block a user