spec: regular expressions (make-regexp/regexp-match/regexp-replace + split)

Adds 9 regexp primitives to stdlib.regexp. OCaml: SxRegexp(src,flags,Re.re)
using Re.Pcre; $&/$1 capture expansion in replace. JS: native RegExp
with SxRegexp wrapper; regexp-match returns {:match :start :end :groups}.
32 tests in test-regexp.sx, all pass on both hosts.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-01 18:57:54 +00:00
parent a40a970080
commit d8d5588e42
6 changed files with 496 additions and 1 deletions

View File

@@ -41,7 +41,7 @@
// =========================================================================
var NIL = Object.freeze({ _nil: true, toString: function() { return "nil"; } });
var SX_VERSION = "2026-05-01T18:42:40Z";
var SX_VERSION = "2026-05-01T18:54:28Z";
function isNil(x) { return x === NIL || x === null || x === undefined; }
function isSxTruthy(x) { return x !== false && !isNil(x); }
@@ -185,6 +185,7 @@
if (x._string_buffer) return "string-buffer";
if (x._hash_table) return "hash-table";
if (x._sxset) return "set";
if (x._regexp) return "regexp";
if (x._rational) return "rational";
if (typeof Node !== "undefined" && x instanceof Node) return "dom-node";
if (Array.isArray(x)) return "list";
@@ -1097,6 +1098,67 @@
};
// stdlib.regexp — native JS RegExp wrappers
function SxRegexp(source, flags) {
this._regexp = true;
this.source = source;
this.flags = flags || "";
}
function sxRxCompile(rx) {
if (!rx._compiled) {
var jsFlags = "";
if (rx.flags.indexOf("i") >= 0) jsFlags += "i";
if (rx.flags.indexOf("m") >= 0) jsFlags += "m";
if (rx.flags.indexOf("s") >= 0) jsFlags += "s";
rx._compiled = new RegExp(rx.source, jsFlags);
}
return rx._compiled;
}
function sxRxMatchDict(m, input) {
if (!m) return NIL;
var groups = [];
for (var i = 1; i < m.length; i++) groups.push(m[i] !== undefined ? m[i] : "");
return {"match": m[0], "start": m.index, "end": m.index + m[0].length,
"input": input, "groups": groups};
}
PRIMITIVES["make-regexp"] = function(src, flags) {
return new SxRegexp(src, flags || "");
};
PRIMITIVES["regexp?"] = function(v) { return v instanceof SxRegexp; };
PRIMITIVES["regexp-source"] = function(rx) { return rx.source; };
PRIMITIVES["regexp-flags"] = function(rx) { return rx.flags; };
PRIMITIVES["regexp-match"] = function(rx, s) {
var re = new RegExp(sxRxCompile(rx).source,
sxRxCompile(rx).flags.replace("g",""));
var m = s.match(re);
return sxRxMatchDict(m, s);
};
PRIMITIVES["regexp-match-all"] = function(rx, s) {
var compiled = sxRxCompile(rx);
var re = new RegExp(compiled.source, "g" + compiled.flags.replace("g",""));
var results = [], m;
while ((m = re.exec(s)) !== null) {
results.push(sxRxMatchDict(m, s));
if (m[0].length === 0) re.lastIndex++;
}
return results;
};
PRIMITIVES["regexp-replace"] = function(rx, s, replacement) {
var compiled = sxRxCompile(rx);
var re = new RegExp(compiled.source, compiled.flags.replace("g",""));
return s.replace(re, replacement);
};
PRIMITIVES["regexp-replace-all"] = function(rx, s, replacement) {
var compiled = sxRxCompile(rx);
var re = new RegExp(compiled.source, "g" + compiled.flags.replace("g",""));
return s.replace(re, replacement);
};
PRIMITIVES["regexp-split"] = function(rx, s) {
var re = sxRxCompile(rx);
return s.split(re);
};
// stdlib.sets — structural sets keyed by write-to-string serialization
function SxSet() { this.data = new Map(); this._sxset = true; }
SxSet.prototype._type = "set";