sx primitives: add regex-* (Re + Re.Pcre backed)
Adds regex-compile/test/exec/match-all/replace/replace-fn/split/source/flags.
Opaque dict handle {:__regex__ true :id :source :flags}; compiled Re.re
cached in a primitives-local table. Replacement supports $&, $1-$9, $$.
Flags: i (CASELESS), m (MULTILINE), s (DOTALL). g is a runtime flag handled
in replace. u (unicode) skipped for now.
Unblocks js-on-sx's regex-platform-override! hook — the JS RegExp shim can
now delegate to real regex instead of the substring stub.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -1696,4 +1696,179 @@ let () =
|
|||||||
| [thunk] ->
|
| [thunk] ->
|
||||||
(try !Sx_types._cek_call_ref thunk Nil
|
(try !Sx_types._cek_call_ref thunk Nil
|
||||||
with _ -> Nil)
|
with _ -> Nil)
|
||||||
| _ -> Nil)
|
| _ -> Nil);
|
||||||
|
|
||||||
|
(* === Regex === wrapping Re + Re.Pcre *)
|
||||||
|
let regex_table : (int, Re.re * string * string) Hashtbl.t = Hashtbl.create 32 in
|
||||||
|
let regex_next_id = ref 0 in
|
||||||
|
let parse_flags flags =
|
||||||
|
let opts = ref [] in
|
||||||
|
String.iter (function
|
||||||
|
| 'i' -> opts := `CASELESS :: !opts
|
||||||
|
| 'm' -> opts := `MULTILINE :: !opts
|
||||||
|
| 's' -> opts := `DOTALL :: !opts
|
||||||
|
| _ -> ()) flags;
|
||||||
|
!opts
|
||||||
|
in
|
||||||
|
let make_regex_value id source flags =
|
||||||
|
let d = Hashtbl.create 4 in
|
||||||
|
Hashtbl.replace d "__regex__" (Bool true);
|
||||||
|
Hashtbl.replace d "id" (Number (float_of_int id));
|
||||||
|
Hashtbl.replace d "source" (String source);
|
||||||
|
Hashtbl.replace d "flags" (String flags);
|
||||||
|
Dict d
|
||||||
|
in
|
||||||
|
let regex_of_value = function
|
||||||
|
| Dict d ->
|
||||||
|
(match Hashtbl.find_opt d "id" with
|
||||||
|
| Some (Number n) ->
|
||||||
|
(match Hashtbl.find_opt regex_table (int_of_float n) with
|
||||||
|
| Some r -> r
|
||||||
|
| None -> raise (Eval_error "regex: handle not found"))
|
||||||
|
| _ -> raise (Eval_error "regex: missing id"))
|
||||||
|
| _ -> raise (Eval_error "regex: expected regex dict")
|
||||||
|
in
|
||||||
|
let group_to_dict g input =
|
||||||
|
let d = Hashtbl.create 4 in
|
||||||
|
Hashtbl.replace d "match" (String (Re.Group.get g 0));
|
||||||
|
Hashtbl.replace d "index" (Number (float_of_int (Re.Group.start g 0)));
|
||||||
|
Hashtbl.replace d "input" (String input);
|
||||||
|
let count = Re.Group.nb_groups g in
|
||||||
|
let groups = ref [] in
|
||||||
|
for i = count - 1 downto 1 do
|
||||||
|
let s = try Re.Group.get g i with Not_found -> "" in
|
||||||
|
groups := String s :: !groups
|
||||||
|
done;
|
||||||
|
Hashtbl.replace d "groups" (List !groups);
|
||||||
|
Dict d
|
||||||
|
in
|
||||||
|
register "regex-compile" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [String source; String flags] | [String source; String flags; _] ->
|
||||||
|
let opts = parse_flags flags in
|
||||||
|
(try
|
||||||
|
let re = Re.compile (Re.Pcre.re ~flags:opts source) in
|
||||||
|
let id = !regex_next_id in
|
||||||
|
incr regex_next_id;
|
||||||
|
Hashtbl.replace regex_table id (re, source, flags);
|
||||||
|
make_regex_value id source flags
|
||||||
|
with _ -> raise (Eval_error ("regex-compile: invalid pattern " ^ source)))
|
||||||
|
| [String source] ->
|
||||||
|
(try
|
||||||
|
let re = Re.compile (Re.Pcre.re source) in
|
||||||
|
let id = !regex_next_id in
|
||||||
|
incr regex_next_id;
|
||||||
|
Hashtbl.replace regex_table id (re, source, "");
|
||||||
|
make_regex_value id source ""
|
||||||
|
with _ -> raise (Eval_error ("regex-compile: invalid pattern " ^ source)))
|
||||||
|
| _ -> raise (Eval_error "regex-compile: (source flags)"));
|
||||||
|
register "regex-test" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [rx; String s] ->
|
||||||
|
let (re, _, _) = regex_of_value rx in
|
||||||
|
Bool (Re.execp re s)
|
||||||
|
| _ -> raise (Eval_error "regex-test: (regex string)"));
|
||||||
|
register "regex-exec" (fun args ->
|
||||||
|
let (rx, s, start) = match args with
|
||||||
|
| [rx; String s] -> (rx, s, 0)
|
||||||
|
| [rx; String s; Number n] -> (rx, s, int_of_float n)
|
||||||
|
| _ -> raise (Eval_error "regex-exec: (regex string start?)")
|
||||||
|
in
|
||||||
|
let (re, _, _) = regex_of_value rx in
|
||||||
|
try
|
||||||
|
let g = Re.exec ~pos:start re s in
|
||||||
|
group_to_dict g s
|
||||||
|
with Not_found -> Nil);
|
||||||
|
register "regex-match-all" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [rx; String s] ->
|
||||||
|
let (re, _, _) = regex_of_value rx in
|
||||||
|
let all = Re.all re s in
|
||||||
|
List (List.map (fun g -> group_to_dict g s) all)
|
||||||
|
| _ -> raise (Eval_error "regex-match-all: (regex string)"));
|
||||||
|
register "regex-replace" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [rx; String s; String replacement] ->
|
||||||
|
let (re, _, flags) = regex_of_value rx in
|
||||||
|
let expand g =
|
||||||
|
let buf = Buffer.create (String.length replacement) in
|
||||||
|
let i = ref 0 in
|
||||||
|
let n = String.length replacement in
|
||||||
|
while !i < n do
|
||||||
|
let c = replacement.[!i] in
|
||||||
|
if c = '$' && !i + 1 < n then
|
||||||
|
(match replacement.[!i + 1] with
|
||||||
|
| '&' -> Buffer.add_string buf (Re.Group.get g 0); i := !i + 2
|
||||||
|
| '$' -> Buffer.add_char buf '$'; i := !i + 2
|
||||||
|
| c when c >= '0' && c <= '9' ->
|
||||||
|
let idx = Char.code c - Char.code '0' in
|
||||||
|
(try Buffer.add_string buf (Re.Group.get g idx) with Not_found -> ());
|
||||||
|
i := !i + 2
|
||||||
|
| _ -> Buffer.add_char buf c; incr i)
|
||||||
|
else (Buffer.add_char buf c; incr i)
|
||||||
|
done;
|
||||||
|
Buffer.contents buf
|
||||||
|
in
|
||||||
|
let global = String.contains flags 'g' in
|
||||||
|
if global then
|
||||||
|
String (Re.replace re ~f:expand s)
|
||||||
|
else
|
||||||
|
(match Re.exec_opt re s with
|
||||||
|
| None -> String s
|
||||||
|
| Some g ->
|
||||||
|
let repl = expand g in
|
||||||
|
let before = String.sub s 0 (Re.Group.start g 0) in
|
||||||
|
let after_start = Re.Group.stop g 0 in
|
||||||
|
let after = String.sub s after_start (String.length s - after_start) in
|
||||||
|
String (before ^ repl ^ after))
|
||||||
|
| _ -> raise (Eval_error "regex-replace: (regex string replacement)"));
|
||||||
|
register "regex-replace-fn" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [rx; String s; f] ->
|
||||||
|
let (re, _, flags) = regex_of_value rx in
|
||||||
|
let call_fn g =
|
||||||
|
let match_str = Re.Group.get g 0 in
|
||||||
|
let count = Re.Group.nb_groups g in
|
||||||
|
let groups_before = ref [] in
|
||||||
|
for i = count - 1 downto 1 do
|
||||||
|
let v = try String (Re.Group.get g i) with Not_found -> Nil in
|
||||||
|
groups_before := v :: !groups_before
|
||||||
|
done;
|
||||||
|
let idx = Number (float_of_int (Re.Group.start g 0)) in
|
||||||
|
let all_args = [String match_str] @ !groups_before @ [idx; String s] in
|
||||||
|
match !Sx_types._cek_call_ref f (List all_args) with
|
||||||
|
| String s -> s
|
||||||
|
| Number n -> Sx_types.format_number n
|
||||||
|
| v -> Sx_types.inspect v
|
||||||
|
in
|
||||||
|
let global = String.contains flags 'g' in
|
||||||
|
if global then
|
||||||
|
String (Re.replace re ~f:call_fn s)
|
||||||
|
else
|
||||||
|
(match Re.exec_opt re s with
|
||||||
|
| None -> String s
|
||||||
|
| Some g ->
|
||||||
|
let repl = call_fn g in
|
||||||
|
let before = String.sub s 0 (Re.Group.start g 0) in
|
||||||
|
let after_start = Re.Group.stop g 0 in
|
||||||
|
let after = String.sub s after_start (String.length s - after_start) in
|
||||||
|
String (before ^ repl ^ after))
|
||||||
|
| _ -> raise (Eval_error "regex-replace-fn: (regex string fn)"));
|
||||||
|
register "regex-split" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [rx; String s] ->
|
||||||
|
let (re, _, _) = regex_of_value rx in
|
||||||
|
List (List.map (fun x -> String x) (Re.split re s))
|
||||||
|
| _ -> raise (Eval_error "regex-split: (regex string)"));
|
||||||
|
register "regex-source" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [rx] ->
|
||||||
|
let (_, source, _) = regex_of_value rx in
|
||||||
|
String source
|
||||||
|
| _ -> raise (Eval_error "regex-source: (regex)"));
|
||||||
|
register "regex-flags" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [rx] ->
|
||||||
|
let (_, _, flags) = regex_of_value rx in
|
||||||
|
String flags
|
||||||
|
| _ -> raise (Eval_error "regex-flags: (regex)"))
|
||||||
|
|||||||
Reference in New Issue
Block a user