spec: regular expressions (make-regexp/regexp-match/regexp-replace + split)
Adds 9 regexp primitives to stdlib.regexp. OCaml: SxRegexp(src,flags,Re.re)
using Re.Pcre; $&/$1 capture expansion in replace. JS: native RegExp
with SxRegexp wrapper; regexp-match returns {:match :start :end :groups}.
32 tests in test-regexp.sx, all pass on both hosts.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2224,6 +2224,127 @@ let () =
|
||||
String flags
|
||||
| _ -> raise (Eval_error "regex-flags: (regex)"));
|
||||
|
||||
(* make-regexp / regexp? / regexp-match / regexp-match-all / regexp-replace / regexp-replace-all / regexp-split *)
|
||||
let parse_re_flags flags =
|
||||
let opts = ref [] in
|
||||
String.iter (function
|
||||
| 'i' -> opts := `CASELESS :: !opts
|
||||
| 'm' -> opts := `MULTILINE :: !opts
|
||||
| 's' -> opts := `DOTALL :: !opts
|
||||
| _ -> ()) flags;
|
||||
!opts
|
||||
in
|
||||
let make_regexp_value source flags =
|
||||
let opts = parse_re_flags flags in
|
||||
try
|
||||
let compiled = Re.compile (Re.Pcre.re ~flags:opts source) in
|
||||
SxRegexp (source, flags, compiled)
|
||||
with _ -> raise (Eval_error ("make-regexp: invalid pattern: " ^ source))
|
||||
in
|
||||
let match_dict g input =
|
||||
let d = Hashtbl.create 4 in
|
||||
Hashtbl.replace d "match" (String (Re.Group.get g 0));
|
||||
Hashtbl.replace d "start" (Integer (Re.Group.start g 0));
|
||||
Hashtbl.replace d "end" (Integer (Re.Group.stop g 0));
|
||||
Hashtbl.replace d "input" (String input);
|
||||
let count = Re.Group.nb_groups g in
|
||||
let groups = ref [] in
|
||||
for i = count - 1 downto 1 do
|
||||
let s = try Re.Group.get g i with Not_found -> "" in
|
||||
groups := String s :: !groups
|
||||
done;
|
||||
Hashtbl.replace d "groups" (List !groups);
|
||||
Dict d
|
||||
in
|
||||
register "make-regexp" (fun args ->
|
||||
match args with
|
||||
| [String src] -> make_regexp_value src ""
|
||||
| [String src; String flags] -> make_regexp_value src flags
|
||||
| _ -> raise (Eval_error "make-regexp: (pattern [flags])"));
|
||||
register "regexp?" (fun args ->
|
||||
match args with
|
||||
| [SxRegexp _] -> Bool true
|
||||
| [_] -> Bool false
|
||||
| _ -> raise (Eval_error "regexp?: 1 arg"));
|
||||
register "regexp-source" (fun args ->
|
||||
match args with
|
||||
| [SxRegexp (src, _, _)] -> String src
|
||||
| _ -> raise (Eval_error "regexp-source: expected regexp"));
|
||||
register "regexp-flags" (fun args ->
|
||||
match args with
|
||||
| [SxRegexp (_, flags, _)] -> String flags
|
||||
| _ -> raise (Eval_error "regexp-flags: expected regexp"));
|
||||
register "regexp-match" (fun args ->
|
||||
match args with
|
||||
| [SxRegexp (_, _, re); String s] ->
|
||||
(match Re.exec_opt re s with
|
||||
| None -> Nil
|
||||
| Some g -> match_dict g s)
|
||||
| _ -> raise (Eval_error "regexp-match: (regexp string)"));
|
||||
register "regexp-match-all" (fun args ->
|
||||
match args with
|
||||
| [SxRegexp (_, _, re); String s] ->
|
||||
List (List.map (fun g -> match_dict g s) (Re.all re s))
|
||||
| _ -> raise (Eval_error "regexp-match-all: (regexp string)"));
|
||||
register "regexp-replace" (fun args ->
|
||||
match args with
|
||||
| [SxRegexp (_, _, re); String s; String replacement] ->
|
||||
(match Re.exec_opt re s with
|
||||
| None -> String s
|
||||
| Some g ->
|
||||
let buf = Buffer.create (String.length s) in
|
||||
let i = ref 0 in
|
||||
let n = String.length replacement in
|
||||
let expand () =
|
||||
while !i < n do
|
||||
let c = replacement.[!i] in
|
||||
if c = '$' && !i + 1 < n then
|
||||
(match replacement.[!i + 1] with
|
||||
| '&' -> Buffer.add_string buf (Re.Group.get g 0); i := !i + 2
|
||||
| '$' -> Buffer.add_char buf '$'; i := !i + 2
|
||||
| c when c >= '0' && c <= '9' ->
|
||||
let idx = Char.code c - Char.code '0' in
|
||||
(try Buffer.add_string buf (Re.Group.get g idx) with Not_found -> ());
|
||||
i := !i + 2
|
||||
| _ -> Buffer.add_char buf c; incr i)
|
||||
else (Buffer.add_char buf c; incr i)
|
||||
done
|
||||
in
|
||||
Buffer.add_string buf (String.sub s 0 (Re.Group.start g 0));
|
||||
expand ();
|
||||
Buffer.add_string buf (String.sub s (Re.Group.stop g 0)
|
||||
(String.length s - Re.Group.stop g 0));
|
||||
String (Buffer.contents buf))
|
||||
| _ -> raise (Eval_error "regexp-replace: (regexp string replacement)"));
|
||||
register "regexp-replace-all" (fun args ->
|
||||
match args with
|
||||
| [SxRegexp (_, _, re); String s; String replacement] ->
|
||||
let expand g =
|
||||
let buf = Buffer.create (String.length replacement) in
|
||||
let i = ref 0 in
|
||||
let n = String.length replacement in
|
||||
while !i < n do
|
||||
let c = replacement.[!i] in
|
||||
if c = '$' && !i + 1 < n then
|
||||
(match replacement.[!i + 1] with
|
||||
| '&' -> Buffer.add_string buf (Re.Group.get g 0); i := !i + 2
|
||||
| '$' -> Buffer.add_char buf '$'; i := !i + 2
|
||||
| c when c >= '0' && c <= '9' ->
|
||||
let idx = Char.code c - Char.code '0' in
|
||||
(try Buffer.add_string buf (Re.Group.get g idx) with Not_found -> ());
|
||||
i := !i + 2
|
||||
| _ -> Buffer.add_char buf c; incr i)
|
||||
else (Buffer.add_char buf c; incr i)
|
||||
done;
|
||||
Buffer.contents buf
|
||||
in
|
||||
String (Re.replace re ~f:expand s)
|
||||
| _ -> raise (Eval_error "regexp-replace-all: (regexp string replacement)"));
|
||||
register "regexp-split" (fun args ->
|
||||
match args with
|
||||
| [SxRegexp (_, _, re); String s] ->
|
||||
List (List.map (fun x -> String x) (Re.split re s))
|
||||
| _ -> raise (Eval_error "regexp-split: (regexp string)"));
|
||||
(* Bitwise operations *)
|
||||
register "bitwise-and" (fun args ->
|
||||
match args with
|
||||
|
||||
@@ -80,6 +80,7 @@ and value =
|
||||
| Port of sx_port (** String port — input (string cursor) or output (buffer). *)
|
||||
| Rational of int * int (** Exact rational: numerator, denominator (reduced, denom>0). *)
|
||||
| SxSet of (string, value) Hashtbl.t (** Mutable set keyed by inspect(value). *)
|
||||
| SxRegexp of string * string * Re.re (** Regexp: source, flags, compiled. *)
|
||||
|
||||
(** String input port: source string + mutable cursor position. *)
|
||||
and sx_port_kind =
|
||||
@@ -516,6 +517,7 @@ let type_of = function
|
||||
| Port { sp_kind = PortOutput _; _ } -> "output-port"
|
||||
| Rational _ -> "rational"
|
||||
| SxSet _ -> "set"
|
||||
| SxRegexp _ -> "regexp"
|
||||
|
||||
let is_nil = function Nil -> true | _ -> false
|
||||
let is_lambda = function Lambda _ -> true | _ -> false
|
||||
@@ -879,3 +881,4 @@ let rec inspect = function
|
||||
Printf.sprintf "<output-port:len=%d%s>" (Buffer.length buf) (if sp_closed then ":closed" else "")
|
||||
| Rational (n, d) -> Printf.sprintf "%d/%d" n d
|
||||
| SxSet ht -> Printf.sprintf "<set:%d>" (Hashtbl.length ht)
|
||||
| SxRegexp (src, flags, _) -> Printf.sprintf "#/%s/%s" src flags
|
||||
|
||||
Reference in New Issue
Block a user