diff --git a/hosts/ocaml/lib/sx_primitives.ml b/hosts/ocaml/lib/sx_primitives.ml index c9b870a3..3e0768f4 100644 --- a/hosts/ocaml/lib/sx_primitives.ml +++ b/hosts/ocaml/lib/sx_primitives.ml @@ -1696,4 +1696,179 @@ let () = | [thunk] -> (try !Sx_types._cek_call_ref thunk Nil with _ -> Nil) - | _ -> Nil) + | _ -> Nil); + + (* === Regex === wrapping Re + Re.Pcre *) + let regex_table : (int, Re.re * string * string) Hashtbl.t = Hashtbl.create 32 in + let regex_next_id = ref 0 in + let parse_flags flags = + let opts = ref [] in + String.iter (function + | 'i' -> opts := `CASELESS :: !opts + | 'm' -> opts := `MULTILINE :: !opts + | 's' -> opts := `DOTALL :: !opts + | _ -> ()) flags; + !opts + in + let make_regex_value id source flags = + let d = Hashtbl.create 4 in + Hashtbl.replace d "__regex__" (Bool true); + Hashtbl.replace d "id" (Number (float_of_int id)); + Hashtbl.replace d "source" (String source); + Hashtbl.replace d "flags" (String flags); + Dict d + in + let regex_of_value = function + | Dict d -> + (match Hashtbl.find_opt d "id" with + | Some (Number n) -> + (match Hashtbl.find_opt regex_table (int_of_float n) with + | Some r -> r + | None -> raise (Eval_error "regex: handle not found")) + | _ -> raise (Eval_error "regex: missing id")) + | _ -> raise (Eval_error "regex: expected regex dict") + in + let group_to_dict g input = + let d = Hashtbl.create 4 in + Hashtbl.replace d "match" (String (Re.Group.get g 0)); + Hashtbl.replace d "index" (Number (float_of_int (Re.Group.start g 0))); + Hashtbl.replace d "input" (String input); + let count = Re.Group.nb_groups g in + let groups = ref [] in + for i = count - 1 downto 1 do + let s = try Re.Group.get g i with Not_found -> "" in + groups := String s :: !groups + done; + Hashtbl.replace d "groups" (List !groups); + Dict d + in + register "regex-compile" (fun args -> + match args with + | [String source; String flags] | [String source; String flags; _] -> + let opts = parse_flags flags in + (try + let re = Re.compile (Re.Pcre.re ~flags:opts source) in + let id = !regex_next_id in + incr regex_next_id; + Hashtbl.replace regex_table id (re, source, flags); + make_regex_value id source flags + with _ -> raise (Eval_error ("regex-compile: invalid pattern " ^ source))) + | [String source] -> + (try + let re = Re.compile (Re.Pcre.re source) in + let id = !regex_next_id in + incr regex_next_id; + Hashtbl.replace regex_table id (re, source, ""); + make_regex_value id source "" + with _ -> raise (Eval_error ("regex-compile: invalid pattern " ^ source))) + | _ -> raise (Eval_error "regex-compile: (source flags)")); + register "regex-test" (fun args -> + match args with + | [rx; String s] -> + let (re, _, _) = regex_of_value rx in + Bool (Re.execp re s) + | _ -> raise (Eval_error "regex-test: (regex string)")); + register "regex-exec" (fun args -> + let (rx, s, start) = match args with + | [rx; String s] -> (rx, s, 0) + | [rx; String s; Number n] -> (rx, s, int_of_float n) + | _ -> raise (Eval_error "regex-exec: (regex string start?)") + in + let (re, _, _) = regex_of_value rx in + try + let g = Re.exec ~pos:start re s in + group_to_dict g s + with Not_found -> Nil); + register "regex-match-all" (fun args -> + match args with + | [rx; String s] -> + let (re, _, _) = regex_of_value rx in + let all = Re.all re s in + List (List.map (fun g -> group_to_dict g s) all) + | _ -> raise (Eval_error "regex-match-all: (regex string)")); + register "regex-replace" (fun args -> + match args with + | [rx; String s; String replacement] -> + let (re, _, flags) = regex_of_value rx in + let expand g = + let buf = Buffer.create (String.length replacement) in + let i = ref 0 in + let n = String.length replacement in + while !i < n do + let c = replacement.[!i] in + if c = '$' && !i + 1 < n then + (match replacement.[!i + 1] with + | '&' -> Buffer.add_string buf (Re.Group.get g 0); i := !i + 2 + | '$' -> Buffer.add_char buf '$'; i := !i + 2 + | c when c >= '0' && c <= '9' -> + let idx = Char.code c - Char.code '0' in + (try Buffer.add_string buf (Re.Group.get g idx) with Not_found -> ()); + i := !i + 2 + | _ -> Buffer.add_char buf c; incr i) + else (Buffer.add_char buf c; incr i) + done; + Buffer.contents buf + in + let global = String.contains flags 'g' in + if global then + String (Re.replace re ~f:expand s) + else + (match Re.exec_opt re s with + | None -> String s + | Some g -> + let repl = expand g in + let before = String.sub s 0 (Re.Group.start g 0) in + let after_start = Re.Group.stop g 0 in + let after = String.sub s after_start (String.length s - after_start) in + String (before ^ repl ^ after)) + | _ -> raise (Eval_error "regex-replace: (regex string replacement)")); + register "regex-replace-fn" (fun args -> + match args with + | [rx; String s; f] -> + let (re, _, flags) = regex_of_value rx in + let call_fn g = + let match_str = Re.Group.get g 0 in + let count = Re.Group.nb_groups g in + let groups_before = ref [] in + for i = count - 1 downto 1 do + let v = try String (Re.Group.get g i) with Not_found -> Nil in + groups_before := v :: !groups_before + done; + let idx = Number (float_of_int (Re.Group.start g 0)) in + let all_args = [String match_str] @ !groups_before @ [idx; String s] in + match !Sx_types._cek_call_ref f (List all_args) with + | String s -> s + | Number n -> Sx_types.format_number n + | v -> Sx_types.inspect v + in + let global = String.contains flags 'g' in + if global then + String (Re.replace re ~f:call_fn s) + else + (match Re.exec_opt re s with + | None -> String s + | Some g -> + let repl = call_fn g in + let before = String.sub s 0 (Re.Group.start g 0) in + let after_start = Re.Group.stop g 0 in + let after = String.sub s after_start (String.length s - after_start) in + String (before ^ repl ^ after)) + | _ -> raise (Eval_error "regex-replace-fn: (regex string fn)")); + register "regex-split" (fun args -> + match args with + | [rx; String s] -> + let (re, _, _) = regex_of_value rx in + List (List.map (fun x -> String x) (Re.split re s)) + | _ -> raise (Eval_error "regex-split: (regex string)")); + register "regex-source" (fun args -> + match args with + | [rx] -> + let (_, source, _) = regex_of_value rx in + String source + | _ -> raise (Eval_error "regex-source: (regex)")); + register "regex-flags" (fun args -> + match args with + | [rx] -> + let (_, _, flags) = regex_of_value rx in + String flags + | _ -> raise (Eval_error "regex-flags: (regex)"))