spec: regular expressions (make-regexp/regexp-match/regexp-replace + split)
Adds 9 regexp primitives to stdlib.regexp. OCaml: SxRegexp(src,flags,Re.re)
using Re.Pcre; $&/$1 capture expansion in replace. JS: native RegExp
with SxRegexp wrapper; regexp-match returns {:match :start :end :groups}.
32 tests in test-regexp.sx, all pass on both hosts.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1702,6 +1702,67 @@ PRIMITIVES_JS_MODULES: dict[str, str] = {
|
|||||||
src.data.forEach(function(v, k) { dst.data.set(k, v); });
|
src.data.forEach(function(v, k) { dst.data.set(k, v); });
|
||||||
return null;
|
return null;
|
||||||
};
|
};
|
||||||
|
''',
|
||||||
|
"stdlib.regexp": '''
|
||||||
|
// stdlib.regexp — native JS RegExp wrappers
|
||||||
|
function SxRegexp(source, flags) {
|
||||||
|
this._regexp = true;
|
||||||
|
this.source = source;
|
||||||
|
this.flags = flags || "";
|
||||||
|
}
|
||||||
|
function sxRxCompile(rx) {
|
||||||
|
if (!rx._compiled) {
|
||||||
|
var jsFlags = "";
|
||||||
|
if (rx.flags.indexOf("i") >= 0) jsFlags += "i";
|
||||||
|
if (rx.flags.indexOf("m") >= 0) jsFlags += "m";
|
||||||
|
if (rx.flags.indexOf("s") >= 0) jsFlags += "s";
|
||||||
|
rx._compiled = new RegExp(rx.source, jsFlags);
|
||||||
|
}
|
||||||
|
return rx._compiled;
|
||||||
|
}
|
||||||
|
function sxRxMatchDict(m, input) {
|
||||||
|
if (!m) return NIL;
|
||||||
|
var groups = [];
|
||||||
|
for (var i = 1; i < m.length; i++) groups.push(m[i] !== undefined ? m[i] : "");
|
||||||
|
return {"match": m[0], "start": m.index, "end": m.index + m[0].length,
|
||||||
|
"input": input, "groups": groups};
|
||||||
|
}
|
||||||
|
PRIMITIVES["make-regexp"] = function(src, flags) {
|
||||||
|
return new SxRegexp(src, flags || "");
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp?"] = function(v) { return v instanceof SxRegexp; };
|
||||||
|
PRIMITIVES["regexp-source"] = function(rx) { return rx.source; };
|
||||||
|
PRIMITIVES["regexp-flags"] = function(rx) { return rx.flags; };
|
||||||
|
PRIMITIVES["regexp-match"] = function(rx, s) {
|
||||||
|
var re = new RegExp(sxRxCompile(rx).source,
|
||||||
|
sxRxCompile(rx).flags.replace("g",""));
|
||||||
|
var m = s.match(re);
|
||||||
|
return sxRxMatchDict(m, s);
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp-match-all"] = function(rx, s) {
|
||||||
|
var compiled = sxRxCompile(rx);
|
||||||
|
var re = new RegExp(compiled.source, "g" + compiled.flags.replace("g",""));
|
||||||
|
var results = [], m;
|
||||||
|
while ((m = re.exec(s)) !== null) {
|
||||||
|
results.push(sxRxMatchDict(m, s));
|
||||||
|
if (m[0].length === 0) re.lastIndex++;
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp-replace"] = function(rx, s, replacement) {
|
||||||
|
var compiled = sxRxCompile(rx);
|
||||||
|
var re = new RegExp(compiled.source, compiled.flags.replace("g",""));
|
||||||
|
return s.replace(re, replacement);
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp-replace-all"] = function(rx, s, replacement) {
|
||||||
|
var compiled = sxRxCompile(rx);
|
||||||
|
var re = new RegExp(compiled.source, "g" + compiled.flags.replace("g",""));
|
||||||
|
return s.replace(re, replacement);
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp-split"] = function(rx, s) {
|
||||||
|
var re = sxRxCompile(rx);
|
||||||
|
return s.split(re);
|
||||||
|
};
|
||||||
''',
|
''',
|
||||||
"stdlib.sets": '''
|
"stdlib.sets": '''
|
||||||
// stdlib.sets — structural sets keyed by write-to-string serialization
|
// stdlib.sets — structural sets keyed by write-to-string serialization
|
||||||
@@ -1802,6 +1863,7 @@ PLATFORM_JS_PRE = '''
|
|||||||
if (x._string_buffer) return "string-buffer";
|
if (x._string_buffer) return "string-buffer";
|
||||||
if (x._hash_table) return "hash-table";
|
if (x._hash_table) return "hash-table";
|
||||||
if (x._sxset) return "set";
|
if (x._sxset) return "set";
|
||||||
|
if (x._regexp) return "regexp";
|
||||||
if (x._rational) return "rational";
|
if (x._rational) return "rational";
|
||||||
if (typeof Node !== "undefined" && x instanceof Node) return "dom-node";
|
if (typeof Node !== "undefined" && x instanceof Node) return "dom-node";
|
||||||
if (Array.isArray(x)) return "list";
|
if (Array.isArray(x)) return "list";
|
||||||
|
|||||||
@@ -2224,6 +2224,127 @@ let () =
|
|||||||
String flags
|
String flags
|
||||||
| _ -> raise (Eval_error "regex-flags: (regex)"));
|
| _ -> raise (Eval_error "regex-flags: (regex)"));
|
||||||
|
|
||||||
|
(* make-regexp / regexp? / regexp-match / regexp-match-all / regexp-replace / regexp-replace-all / regexp-split *)
|
||||||
|
let parse_re_flags flags =
|
||||||
|
let opts = ref [] in
|
||||||
|
String.iter (function
|
||||||
|
| 'i' -> opts := `CASELESS :: !opts
|
||||||
|
| 'm' -> opts := `MULTILINE :: !opts
|
||||||
|
| 's' -> opts := `DOTALL :: !opts
|
||||||
|
| _ -> ()) flags;
|
||||||
|
!opts
|
||||||
|
in
|
||||||
|
let make_regexp_value source flags =
|
||||||
|
let opts = parse_re_flags flags in
|
||||||
|
try
|
||||||
|
let compiled = Re.compile (Re.Pcre.re ~flags:opts source) in
|
||||||
|
SxRegexp (source, flags, compiled)
|
||||||
|
with _ -> raise (Eval_error ("make-regexp: invalid pattern: " ^ source))
|
||||||
|
in
|
||||||
|
let match_dict g input =
|
||||||
|
let d = Hashtbl.create 4 in
|
||||||
|
Hashtbl.replace d "match" (String (Re.Group.get g 0));
|
||||||
|
Hashtbl.replace d "start" (Integer (Re.Group.start g 0));
|
||||||
|
Hashtbl.replace d "end" (Integer (Re.Group.stop g 0));
|
||||||
|
Hashtbl.replace d "input" (String input);
|
||||||
|
let count = Re.Group.nb_groups g in
|
||||||
|
let groups = ref [] in
|
||||||
|
for i = count - 1 downto 1 do
|
||||||
|
let s = try Re.Group.get g i with Not_found -> "" in
|
||||||
|
groups := String s :: !groups
|
||||||
|
done;
|
||||||
|
Hashtbl.replace d "groups" (List !groups);
|
||||||
|
Dict d
|
||||||
|
in
|
||||||
|
register "make-regexp" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [String src] -> make_regexp_value src ""
|
||||||
|
| [String src; String flags] -> make_regexp_value src flags
|
||||||
|
| _ -> raise (Eval_error "make-regexp: (pattern [flags])"));
|
||||||
|
register "regexp?" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [SxRegexp _] -> Bool true
|
||||||
|
| [_] -> Bool false
|
||||||
|
| _ -> raise (Eval_error "regexp?: 1 arg"));
|
||||||
|
register "regexp-source" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [SxRegexp (src, _, _)] -> String src
|
||||||
|
| _ -> raise (Eval_error "regexp-source: expected regexp"));
|
||||||
|
register "regexp-flags" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [SxRegexp (_, flags, _)] -> String flags
|
||||||
|
| _ -> raise (Eval_error "regexp-flags: expected regexp"));
|
||||||
|
register "regexp-match" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [SxRegexp (_, _, re); String s] ->
|
||||||
|
(match Re.exec_opt re s with
|
||||||
|
| None -> Nil
|
||||||
|
| Some g -> match_dict g s)
|
||||||
|
| _ -> raise (Eval_error "regexp-match: (regexp string)"));
|
||||||
|
register "regexp-match-all" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [SxRegexp (_, _, re); String s] ->
|
||||||
|
List (List.map (fun g -> match_dict g s) (Re.all re s))
|
||||||
|
| _ -> raise (Eval_error "regexp-match-all: (regexp string)"));
|
||||||
|
register "regexp-replace" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [SxRegexp (_, _, re); String s; String replacement] ->
|
||||||
|
(match Re.exec_opt re s with
|
||||||
|
| None -> String s
|
||||||
|
| Some g ->
|
||||||
|
let buf = Buffer.create (String.length s) in
|
||||||
|
let i = ref 0 in
|
||||||
|
let n = String.length replacement in
|
||||||
|
let expand () =
|
||||||
|
while !i < n do
|
||||||
|
let c = replacement.[!i] in
|
||||||
|
if c = '$' && !i + 1 < n then
|
||||||
|
(match replacement.[!i + 1] with
|
||||||
|
| '&' -> Buffer.add_string buf (Re.Group.get g 0); i := !i + 2
|
||||||
|
| '$' -> Buffer.add_char buf '$'; i := !i + 2
|
||||||
|
| c when c >= '0' && c <= '9' ->
|
||||||
|
let idx = Char.code c - Char.code '0' in
|
||||||
|
(try Buffer.add_string buf (Re.Group.get g idx) with Not_found -> ());
|
||||||
|
i := !i + 2
|
||||||
|
| _ -> Buffer.add_char buf c; incr i)
|
||||||
|
else (Buffer.add_char buf c; incr i)
|
||||||
|
done
|
||||||
|
in
|
||||||
|
Buffer.add_string buf (String.sub s 0 (Re.Group.start g 0));
|
||||||
|
expand ();
|
||||||
|
Buffer.add_string buf (String.sub s (Re.Group.stop g 0)
|
||||||
|
(String.length s - Re.Group.stop g 0));
|
||||||
|
String (Buffer.contents buf))
|
||||||
|
| _ -> raise (Eval_error "regexp-replace: (regexp string replacement)"));
|
||||||
|
register "regexp-replace-all" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [SxRegexp (_, _, re); String s; String replacement] ->
|
||||||
|
let expand g =
|
||||||
|
let buf = Buffer.create (String.length replacement) in
|
||||||
|
let i = ref 0 in
|
||||||
|
let n = String.length replacement in
|
||||||
|
while !i < n do
|
||||||
|
let c = replacement.[!i] in
|
||||||
|
if c = '$' && !i + 1 < n then
|
||||||
|
(match replacement.[!i + 1] with
|
||||||
|
| '&' -> Buffer.add_string buf (Re.Group.get g 0); i := !i + 2
|
||||||
|
| '$' -> Buffer.add_char buf '$'; i := !i + 2
|
||||||
|
| c when c >= '0' && c <= '9' ->
|
||||||
|
let idx = Char.code c - Char.code '0' in
|
||||||
|
(try Buffer.add_string buf (Re.Group.get g idx) with Not_found -> ());
|
||||||
|
i := !i + 2
|
||||||
|
| _ -> Buffer.add_char buf c; incr i)
|
||||||
|
else (Buffer.add_char buf c; incr i)
|
||||||
|
done;
|
||||||
|
Buffer.contents buf
|
||||||
|
in
|
||||||
|
String (Re.replace re ~f:expand s)
|
||||||
|
| _ -> raise (Eval_error "regexp-replace-all: (regexp string replacement)"));
|
||||||
|
register "regexp-split" (fun args ->
|
||||||
|
match args with
|
||||||
|
| [SxRegexp (_, _, re); String s] ->
|
||||||
|
List (List.map (fun x -> String x) (Re.split re s))
|
||||||
|
| _ -> raise (Eval_error "regexp-split: (regexp string)"));
|
||||||
(* Bitwise operations *)
|
(* Bitwise operations *)
|
||||||
register "bitwise-and" (fun args ->
|
register "bitwise-and" (fun args ->
|
||||||
match args with
|
match args with
|
||||||
|
|||||||
@@ -80,6 +80,7 @@ and value =
|
|||||||
| Port of sx_port (** String port — input (string cursor) or output (buffer). *)
|
| Port of sx_port (** String port — input (string cursor) or output (buffer). *)
|
||||||
| Rational of int * int (** Exact rational: numerator, denominator (reduced, denom>0). *)
|
| Rational of int * int (** Exact rational: numerator, denominator (reduced, denom>0). *)
|
||||||
| SxSet of (string, value) Hashtbl.t (** Mutable set keyed by inspect(value). *)
|
| SxSet of (string, value) Hashtbl.t (** Mutable set keyed by inspect(value). *)
|
||||||
|
| SxRegexp of string * string * Re.re (** Regexp: source, flags, compiled. *)
|
||||||
|
|
||||||
(** String input port: source string + mutable cursor position. *)
|
(** String input port: source string + mutable cursor position. *)
|
||||||
and sx_port_kind =
|
and sx_port_kind =
|
||||||
@@ -516,6 +517,7 @@ let type_of = function
|
|||||||
| Port { sp_kind = PortOutput _; _ } -> "output-port"
|
| Port { sp_kind = PortOutput _; _ } -> "output-port"
|
||||||
| Rational _ -> "rational"
|
| Rational _ -> "rational"
|
||||||
| SxSet _ -> "set"
|
| SxSet _ -> "set"
|
||||||
|
| SxRegexp _ -> "regexp"
|
||||||
|
|
||||||
let is_nil = function Nil -> true | _ -> false
|
let is_nil = function Nil -> true | _ -> false
|
||||||
let is_lambda = function Lambda _ -> true | _ -> false
|
let is_lambda = function Lambda _ -> true | _ -> false
|
||||||
@@ -879,3 +881,4 @@ let rec inspect = function
|
|||||||
Printf.sprintf "<output-port:len=%d%s>" (Buffer.length buf) (if sp_closed then ":closed" else "")
|
Printf.sprintf "<output-port:len=%d%s>" (Buffer.length buf) (if sp_closed then ":closed" else "")
|
||||||
| Rational (n, d) -> Printf.sprintf "%d/%d" n d
|
| Rational (n, d) -> Printf.sprintf "%d/%d" n d
|
||||||
| SxSet ht -> Printf.sprintf "<set:%d>" (Hashtbl.length ht)
|
| SxSet ht -> Printf.sprintf "<set:%d>" (Hashtbl.length ht)
|
||||||
|
| SxRegexp (src, flags, _) -> Printf.sprintf "#/%s/%s" src flags
|
||||||
|
|||||||
@@ -41,7 +41,7 @@
|
|||||||
// =========================================================================
|
// =========================================================================
|
||||||
|
|
||||||
var NIL = Object.freeze({ _nil: true, toString: function() { return "nil"; } });
|
var NIL = Object.freeze({ _nil: true, toString: function() { return "nil"; } });
|
||||||
var SX_VERSION = "2026-05-01T18:42:40Z";
|
var SX_VERSION = "2026-05-01T18:54:28Z";
|
||||||
|
|
||||||
function isNil(x) { return x === NIL || x === null || x === undefined; }
|
function isNil(x) { return x === NIL || x === null || x === undefined; }
|
||||||
function isSxTruthy(x) { return x !== false && !isNil(x); }
|
function isSxTruthy(x) { return x !== false && !isNil(x); }
|
||||||
@@ -185,6 +185,7 @@
|
|||||||
if (x._string_buffer) return "string-buffer";
|
if (x._string_buffer) return "string-buffer";
|
||||||
if (x._hash_table) return "hash-table";
|
if (x._hash_table) return "hash-table";
|
||||||
if (x._sxset) return "set";
|
if (x._sxset) return "set";
|
||||||
|
if (x._regexp) return "regexp";
|
||||||
if (x._rational) return "rational";
|
if (x._rational) return "rational";
|
||||||
if (typeof Node !== "undefined" && x instanceof Node) return "dom-node";
|
if (typeof Node !== "undefined" && x instanceof Node) return "dom-node";
|
||||||
if (Array.isArray(x)) return "list";
|
if (Array.isArray(x)) return "list";
|
||||||
@@ -1097,6 +1098,67 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// stdlib.regexp — native JS RegExp wrappers
|
||||||
|
function SxRegexp(source, flags) {
|
||||||
|
this._regexp = true;
|
||||||
|
this.source = source;
|
||||||
|
this.flags = flags || "";
|
||||||
|
}
|
||||||
|
function sxRxCompile(rx) {
|
||||||
|
if (!rx._compiled) {
|
||||||
|
var jsFlags = "";
|
||||||
|
if (rx.flags.indexOf("i") >= 0) jsFlags += "i";
|
||||||
|
if (rx.flags.indexOf("m") >= 0) jsFlags += "m";
|
||||||
|
if (rx.flags.indexOf("s") >= 0) jsFlags += "s";
|
||||||
|
rx._compiled = new RegExp(rx.source, jsFlags);
|
||||||
|
}
|
||||||
|
return rx._compiled;
|
||||||
|
}
|
||||||
|
function sxRxMatchDict(m, input) {
|
||||||
|
if (!m) return NIL;
|
||||||
|
var groups = [];
|
||||||
|
for (var i = 1; i < m.length; i++) groups.push(m[i] !== undefined ? m[i] : "");
|
||||||
|
return {"match": m[0], "start": m.index, "end": m.index + m[0].length,
|
||||||
|
"input": input, "groups": groups};
|
||||||
|
}
|
||||||
|
PRIMITIVES["make-regexp"] = function(src, flags) {
|
||||||
|
return new SxRegexp(src, flags || "");
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp?"] = function(v) { return v instanceof SxRegexp; };
|
||||||
|
PRIMITIVES["regexp-source"] = function(rx) { return rx.source; };
|
||||||
|
PRIMITIVES["regexp-flags"] = function(rx) { return rx.flags; };
|
||||||
|
PRIMITIVES["regexp-match"] = function(rx, s) {
|
||||||
|
var re = new RegExp(sxRxCompile(rx).source,
|
||||||
|
sxRxCompile(rx).flags.replace("g",""));
|
||||||
|
var m = s.match(re);
|
||||||
|
return sxRxMatchDict(m, s);
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp-match-all"] = function(rx, s) {
|
||||||
|
var compiled = sxRxCompile(rx);
|
||||||
|
var re = new RegExp(compiled.source, "g" + compiled.flags.replace("g",""));
|
||||||
|
var results = [], m;
|
||||||
|
while ((m = re.exec(s)) !== null) {
|
||||||
|
results.push(sxRxMatchDict(m, s));
|
||||||
|
if (m[0].length === 0) re.lastIndex++;
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp-replace"] = function(rx, s, replacement) {
|
||||||
|
var compiled = sxRxCompile(rx);
|
||||||
|
var re = new RegExp(compiled.source, compiled.flags.replace("g",""));
|
||||||
|
return s.replace(re, replacement);
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp-replace-all"] = function(rx, s, replacement) {
|
||||||
|
var compiled = sxRxCompile(rx);
|
||||||
|
var re = new RegExp(compiled.source, "g" + compiled.flags.replace("g",""));
|
||||||
|
return s.replace(re, replacement);
|
||||||
|
};
|
||||||
|
PRIMITIVES["regexp-split"] = function(rx, s) {
|
||||||
|
var re = sxRxCompile(rx);
|
||||||
|
return s.split(re);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// stdlib.sets — structural sets keyed by write-to-string serialization
|
// stdlib.sets — structural sets keyed by write-to-string serialization
|
||||||
function SxSet() { this.data = new Map(); this._sxset = true; }
|
function SxSet() { this.data = new Map(); this._sxset = true; }
|
||||||
SxSet.prototype._type = "set";
|
SxSet.prototype._type = "set";
|
||||||
|
|||||||
@@ -1196,3 +1196,59 @@
|
|||||||
:params (s fn)
|
:params (s fn)
|
||||||
:returns "set"
|
:returns "set"
|
||||||
:doc "New set of results of (fn val) for each element in s.")
|
:doc "New set of results of (fn val) for each element in s.")
|
||||||
|
|
||||||
|
(define-module :stdlib.regexp)
|
||||||
|
|
||||||
|
(define-primitive
|
||||||
|
"make-regexp"
|
||||||
|
:params ((pattern :as string) &rest (flags :as string))
|
||||||
|
:returns "regexp"
|
||||||
|
:doc "Compile regexp from pattern string and optional flags string (\"i\" case-insensitive, \"m\" multiline, \"s\" dotall).")
|
||||||
|
|
||||||
|
(define-primitive
|
||||||
|
"regexp?"
|
||||||
|
:params (v)
|
||||||
|
:returns "boolean"
|
||||||
|
:doc "True if v is a compiled regexp.")
|
||||||
|
|
||||||
|
(define-primitive
|
||||||
|
"regexp-source"
|
||||||
|
:params ((re :as regexp))
|
||||||
|
:returns "string"
|
||||||
|
:doc "Pattern string of a regexp.")
|
||||||
|
|
||||||
|
(define-primitive
|
||||||
|
"regexp-flags"
|
||||||
|
:params ((re :as regexp))
|
||||||
|
:returns "string"
|
||||||
|
:doc "Flags string of a regexp.")
|
||||||
|
|
||||||
|
(define-primitive
|
||||||
|
"regexp-match"
|
||||||
|
:params ((re :as regexp) (str :as string))
|
||||||
|
:returns "any"
|
||||||
|
:doc "First match of re in str. Returns {:match \"...\" :start N :end N :groups (...)} or nil.")
|
||||||
|
|
||||||
|
(define-primitive
|
||||||
|
"regexp-match-all"
|
||||||
|
:params ((re :as regexp) (str :as string))
|
||||||
|
:returns "list"
|
||||||
|
:doc "All non-overlapping matches of re in str as a list of match dicts.")
|
||||||
|
|
||||||
|
(define-primitive
|
||||||
|
"regexp-replace"
|
||||||
|
:params ((re :as regexp) (str :as string) (replacement :as string))
|
||||||
|
:returns "string"
|
||||||
|
:doc "Replace first match of re in str with replacement. $& = whole match, $1..$9 = groups.")
|
||||||
|
|
||||||
|
(define-primitive
|
||||||
|
"regexp-replace-all"
|
||||||
|
:params ((re :as regexp) (str :as string) (replacement :as string))
|
||||||
|
:returns "string"
|
||||||
|
:doc "Replace all matches of re in str with replacement.")
|
||||||
|
|
||||||
|
(define-primitive
|
||||||
|
"regexp-split"
|
||||||
|
:params ((re :as regexp) (str :as string))
|
||||||
|
:returns "list"
|
||||||
|
:doc "Split str on every match of re; returns list of strings.")
|
||||||
|
|||||||
191
spec/tests/test-regexp.sx
Normal file
191
spec/tests/test-regexp.sx
Normal file
@@ -0,0 +1,191 @@
|
|||||||
|
;; ==========================================================================
|
||||||
|
;; test-regexp.sx — Tests for regexp primitives
|
||||||
|
;; ==========================================================================
|
||||||
|
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
;; make-regexp / regexp?
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
(defsuite
|
||||||
|
"regexp:create"
|
||||||
|
(deftest "make-regexp returns regexp" (assert (regexp? (make-regexp "abc"))))
|
||||||
|
(deftest
|
||||||
|
"make-regexp with flags"
|
||||||
|
(assert (regexp? (make-regexp "[a-z]+" "i"))))
|
||||||
|
(deftest "regexp? true for regexp" (assert (regexp? (make-regexp "x"))))
|
||||||
|
(deftest "regexp? false for string" (assert (not (regexp? "abc"))))
|
||||||
|
(deftest "regexp? false for nil" (assert (not (regexp? nil))))
|
||||||
|
(deftest
|
||||||
|
"regexp-source"
|
||||||
|
(assert= (regexp-source (make-regexp "hello")) "hello"))
|
||||||
|
(deftest
|
||||||
|
"regexp-flags"
|
||||||
|
(assert= (regexp-flags (make-regexp "x" "im")) "im"))
|
||||||
|
(deftest
|
||||||
|
"regexp-flags empty string"
|
||||||
|
(assert= (regexp-flags (make-regexp "x")) "")))
|
||||||
|
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
;; regexp-match — basic
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
(defsuite
|
||||||
|
"regexp:match"
|
||||||
|
(deftest
|
||||||
|
"match returns dict"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "hel+o") "hello world")))
|
||||||
|
(assert (dict? m))))
|
||||||
|
(deftest
|
||||||
|
"match :match key"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "hel+o") "say hello")))
|
||||||
|
(assert= (get m "match") "hello")))
|
||||||
|
(deftest
|
||||||
|
"match :start key"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "lo") "hello")))
|
||||||
|
(assert= (get m "start") 3)))
|
||||||
|
(deftest
|
||||||
|
"match :end key"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "lo") "hello")))
|
||||||
|
(assert= (get m "end") 5)))
|
||||||
|
(deftest
|
||||||
|
"no match returns nil"
|
||||||
|
(assert-nil (regexp-match (make-regexp "xyz") "hello")))
|
||||||
|
(deftest
|
||||||
|
"match at start"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "^hel") "hello")))
|
||||||
|
(assert= (get m "start") 0)))
|
||||||
|
(deftest
|
||||||
|
"match digit pattern"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "[0-9]+") "abc 123 def")))
|
||||||
|
(assert= (get m "match") "123"))))
|
||||||
|
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
;; regexp-match — groups
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
(defsuite
|
||||||
|
"regexp:groups"
|
||||||
|
(deftest
|
||||||
|
"no capture groups → empty list"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "hello") "hello world")))
|
||||||
|
(assert= (length (get m "groups")) 0)))
|
||||||
|
(deftest
|
||||||
|
"one capture group"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "([0-9]+)") "price: 42")))
|
||||||
|
(assert= (first (get m "groups")) "42")))
|
||||||
|
(deftest
|
||||||
|
"two capture groups"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "([a-z]+)=([0-9]+)") "x=10")))
|
||||||
|
(let
|
||||||
|
((gs (get m "groups")))
|
||||||
|
(assert
|
||||||
|
(and (= (first gs) "x") (= (first (rest gs)) "10")))))))
|
||||||
|
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
;; regexp-match-all
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
(defsuite
|
||||||
|
"regexp:match-all"
|
||||||
|
(deftest
|
||||||
|
"match-all returns list"
|
||||||
|
(let
|
||||||
|
((ms (regexp-match-all (make-regexp "[0-9]+") "1 and 2 and 3")))
|
||||||
|
(assert (list? ms))))
|
||||||
|
(deftest
|
||||||
|
"match-all count"
|
||||||
|
(assert=
|
||||||
|
(length (regexp-match-all (make-regexp "[0-9]+") "1 and 2 and 3"))
|
||||||
|
3))
|
||||||
|
(deftest
|
||||||
|
"match-all first match"
|
||||||
|
(let
|
||||||
|
((ms (regexp-match-all (make-regexp "[0-9]+") "10 20 30")))
|
||||||
|
(assert= (get (first ms) "match") "10")))
|
||||||
|
(deftest
|
||||||
|
"match-all empty when no match"
|
||||||
|
(assert=
|
||||||
|
(length (regexp-match-all (make-regexp "xyz") "hello"))
|
||||||
|
0)))
|
||||||
|
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
;; regexp-replace / regexp-replace-all
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
(defsuite
|
||||||
|
"regexp:replace"
|
||||||
|
(deftest
|
||||||
|
"replace first match"
|
||||||
|
(assert= (regexp-replace (make-regexp "o+") "foobar boo" "0") "f0bar boo"))
|
||||||
|
(deftest
|
||||||
|
"replace no match returns original"
|
||||||
|
(assert= (regexp-replace (make-regexp "xyz") "hello" "X") "hello"))
|
||||||
|
(deftest
|
||||||
|
"replace-all all matches"
|
||||||
|
(assert= (regexp-replace-all (make-regexp "o") "foo boo" "0") "f00 b00"))
|
||||||
|
(deftest
|
||||||
|
"replace with $& (whole match)"
|
||||||
|
(assert=
|
||||||
|
(regexp-replace (make-regexp "[0-9]+") "price 42" "[$&]")
|
||||||
|
"price [42]"))
|
||||||
|
(deftest
|
||||||
|
"replace-all removes digits"
|
||||||
|
(assert=
|
||||||
|
(regexp-replace-all (make-regexp "[0-9]") "a1b2c3" "")
|
||||||
|
"abc")))
|
||||||
|
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
;; regexp-split
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
(defsuite
|
||||||
|
"regexp:split"
|
||||||
|
(deftest
|
||||||
|
"split on whitespace"
|
||||||
|
(let
|
||||||
|
((parts (regexp-split (make-regexp " +") "hello world foo")))
|
||||||
|
(assert= (length parts) 3)))
|
||||||
|
(deftest
|
||||||
|
"split first part"
|
||||||
|
(let
|
||||||
|
((parts (regexp-split (make-regexp ",") "a,b,c")))
|
||||||
|
(assert= (first parts) "a")))
|
||||||
|
(deftest
|
||||||
|
"split last part"
|
||||||
|
(let
|
||||||
|
((parts (regexp-split (make-regexp ",") "a,b,c")))
|
||||||
|
(assert= (first (rest (rest parts))) "c")))
|
||||||
|
(deftest
|
||||||
|
"split no match → single element"
|
||||||
|
(let
|
||||||
|
((parts (regexp-split (make-regexp ",") "hello")))
|
||||||
|
(assert= (length parts) 1))))
|
||||||
|
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
;; flags
|
||||||
|
;; --------------------------------------------------------------------------
|
||||||
|
|
||||||
|
(defsuite
|
||||||
|
"regexp:flags"
|
||||||
|
(deftest
|
||||||
|
"case-insensitive flag"
|
||||||
|
(let
|
||||||
|
((m (regexp-match (make-regexp "HELLO" "i") "hello world")))
|
||||||
|
(assert (not (nil? m)))))
|
||||||
|
(deftest
|
||||||
|
"case-sensitive without flag"
|
||||||
|
(assert-nil (regexp-match (make-regexp "HELLO") "hello world")))
|
||||||
|
(deftest
|
||||||
|
"multiline ^ matches line starts"
|
||||||
|
(let
|
||||||
|
((ms (regexp-match-all (make-regexp "^[a-z]" "m") "a\nb\nc")))
|
||||||
|
(assert= (length ms) 3))))
|
||||||
Reference in New Issue
Block a user