spec: regular expressions (make-regexp/regexp-match/regexp-replace + split)
Adds 9 regexp primitives to stdlib.regexp. OCaml: SxRegexp(src,flags,Re.re)
using Re.Pcre; $&/$1 capture expansion in replace. JS: native RegExp
with SxRegexp wrapper; regexp-match returns {:match :start :end :groups}.
32 tests in test-regexp.sx, all pass on both hosts.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1196,3 +1196,59 @@
|
||||
:params (s fn)
|
||||
:returns "set"
|
||||
:doc "New set of results of (fn val) for each element in s.")
|
||||
|
||||
(define-module :stdlib.regexp)
|
||||
|
||||
(define-primitive
|
||||
"make-regexp"
|
||||
:params ((pattern :as string) &rest (flags :as string))
|
||||
:returns "regexp"
|
||||
:doc "Compile regexp from pattern string and optional flags string (\"i\" case-insensitive, \"m\" multiline, \"s\" dotall).")
|
||||
|
||||
(define-primitive
|
||||
"regexp?"
|
||||
:params (v)
|
||||
:returns "boolean"
|
||||
:doc "True if v is a compiled regexp.")
|
||||
|
||||
(define-primitive
|
||||
"regexp-source"
|
||||
:params ((re :as regexp))
|
||||
:returns "string"
|
||||
:doc "Pattern string of a regexp.")
|
||||
|
||||
(define-primitive
|
||||
"regexp-flags"
|
||||
:params ((re :as regexp))
|
||||
:returns "string"
|
||||
:doc "Flags string of a regexp.")
|
||||
|
||||
(define-primitive
|
||||
"regexp-match"
|
||||
:params ((re :as regexp) (str :as string))
|
||||
:returns "any"
|
||||
:doc "First match of re in str. Returns {:match \"...\" :start N :end N :groups (...)} or nil.")
|
||||
|
||||
(define-primitive
|
||||
"regexp-match-all"
|
||||
:params ((re :as regexp) (str :as string))
|
||||
:returns "list"
|
||||
:doc "All non-overlapping matches of re in str as a list of match dicts.")
|
||||
|
||||
(define-primitive
|
||||
"regexp-replace"
|
||||
:params ((re :as regexp) (str :as string) (replacement :as string))
|
||||
:returns "string"
|
||||
:doc "Replace first match of re in str with replacement. $& = whole match, $1..$9 = groups.")
|
||||
|
||||
(define-primitive
|
||||
"regexp-replace-all"
|
||||
:params ((re :as regexp) (str :as string) (replacement :as string))
|
||||
:returns "string"
|
||||
:doc "Replace all matches of re in str with replacement.")
|
||||
|
||||
(define-primitive
|
||||
"regexp-split"
|
||||
:params ((re :as regexp) (str :as string))
|
||||
:returns "list"
|
||||
:doc "Split str on every match of re; returns list of strings.")
|
||||
|
||||
191
spec/tests/test-regexp.sx
Normal file
191
spec/tests/test-regexp.sx
Normal file
@@ -0,0 +1,191 @@
|
||||
;; ==========================================================================
|
||||
;; test-regexp.sx — Tests for regexp primitives
|
||||
;; ==========================================================================
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; make-regexp / regexp?
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(defsuite
|
||||
"regexp:create"
|
||||
(deftest "make-regexp returns regexp" (assert (regexp? (make-regexp "abc"))))
|
||||
(deftest
|
||||
"make-regexp with flags"
|
||||
(assert (regexp? (make-regexp "[a-z]+" "i"))))
|
||||
(deftest "regexp? true for regexp" (assert (regexp? (make-regexp "x"))))
|
||||
(deftest "regexp? false for string" (assert (not (regexp? "abc"))))
|
||||
(deftest "regexp? false for nil" (assert (not (regexp? nil))))
|
||||
(deftest
|
||||
"regexp-source"
|
||||
(assert= (regexp-source (make-regexp "hello")) "hello"))
|
||||
(deftest
|
||||
"regexp-flags"
|
||||
(assert= (regexp-flags (make-regexp "x" "im")) "im"))
|
||||
(deftest
|
||||
"regexp-flags empty string"
|
||||
(assert= (regexp-flags (make-regexp "x")) "")))
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; regexp-match — basic
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(defsuite
|
||||
"regexp:match"
|
||||
(deftest
|
||||
"match returns dict"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "hel+o") "hello world")))
|
||||
(assert (dict? m))))
|
||||
(deftest
|
||||
"match :match key"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "hel+o") "say hello")))
|
||||
(assert= (get m "match") "hello")))
|
||||
(deftest
|
||||
"match :start key"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "lo") "hello")))
|
||||
(assert= (get m "start") 3)))
|
||||
(deftest
|
||||
"match :end key"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "lo") "hello")))
|
||||
(assert= (get m "end") 5)))
|
||||
(deftest
|
||||
"no match returns nil"
|
||||
(assert-nil (regexp-match (make-regexp "xyz") "hello")))
|
||||
(deftest
|
||||
"match at start"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "^hel") "hello")))
|
||||
(assert= (get m "start") 0)))
|
||||
(deftest
|
||||
"match digit pattern"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "[0-9]+") "abc 123 def")))
|
||||
(assert= (get m "match") "123"))))
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; regexp-match — groups
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(defsuite
|
||||
"regexp:groups"
|
||||
(deftest
|
||||
"no capture groups → empty list"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "hello") "hello world")))
|
||||
(assert= (length (get m "groups")) 0)))
|
||||
(deftest
|
||||
"one capture group"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "([0-9]+)") "price: 42")))
|
||||
(assert= (first (get m "groups")) "42")))
|
||||
(deftest
|
||||
"two capture groups"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "([a-z]+)=([0-9]+)") "x=10")))
|
||||
(let
|
||||
((gs (get m "groups")))
|
||||
(assert
|
||||
(and (= (first gs) "x") (= (first (rest gs)) "10")))))))
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; regexp-match-all
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(defsuite
|
||||
"regexp:match-all"
|
||||
(deftest
|
||||
"match-all returns list"
|
||||
(let
|
||||
((ms (regexp-match-all (make-regexp "[0-9]+") "1 and 2 and 3")))
|
||||
(assert (list? ms))))
|
||||
(deftest
|
||||
"match-all count"
|
||||
(assert=
|
||||
(length (regexp-match-all (make-regexp "[0-9]+") "1 and 2 and 3"))
|
||||
3))
|
||||
(deftest
|
||||
"match-all first match"
|
||||
(let
|
||||
((ms (regexp-match-all (make-regexp "[0-9]+") "10 20 30")))
|
||||
(assert= (get (first ms) "match") "10")))
|
||||
(deftest
|
||||
"match-all empty when no match"
|
||||
(assert=
|
||||
(length (regexp-match-all (make-regexp "xyz") "hello"))
|
||||
0)))
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; regexp-replace / regexp-replace-all
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(defsuite
|
||||
"regexp:replace"
|
||||
(deftest
|
||||
"replace first match"
|
||||
(assert= (regexp-replace (make-regexp "o+") "foobar boo" "0") "f0bar boo"))
|
||||
(deftest
|
||||
"replace no match returns original"
|
||||
(assert= (regexp-replace (make-regexp "xyz") "hello" "X") "hello"))
|
||||
(deftest
|
||||
"replace-all all matches"
|
||||
(assert= (regexp-replace-all (make-regexp "o") "foo boo" "0") "f00 b00"))
|
||||
(deftest
|
||||
"replace with $& (whole match)"
|
||||
(assert=
|
||||
(regexp-replace (make-regexp "[0-9]+") "price 42" "[$&]")
|
||||
"price [42]"))
|
||||
(deftest
|
||||
"replace-all removes digits"
|
||||
(assert=
|
||||
(regexp-replace-all (make-regexp "[0-9]") "a1b2c3" "")
|
||||
"abc")))
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; regexp-split
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(defsuite
|
||||
"regexp:split"
|
||||
(deftest
|
||||
"split on whitespace"
|
||||
(let
|
||||
((parts (regexp-split (make-regexp " +") "hello world foo")))
|
||||
(assert= (length parts) 3)))
|
||||
(deftest
|
||||
"split first part"
|
||||
(let
|
||||
((parts (regexp-split (make-regexp ",") "a,b,c")))
|
||||
(assert= (first parts) "a")))
|
||||
(deftest
|
||||
"split last part"
|
||||
(let
|
||||
((parts (regexp-split (make-regexp ",") "a,b,c")))
|
||||
(assert= (first (rest (rest parts))) "c")))
|
||||
(deftest
|
||||
"split no match → single element"
|
||||
(let
|
||||
((parts (regexp-split (make-regexp ",") "hello")))
|
||||
(assert= (length parts) 1))))
|
||||
|
||||
;; --------------------------------------------------------------------------
|
||||
;; flags
|
||||
;; --------------------------------------------------------------------------
|
||||
|
||||
(defsuite
|
||||
"regexp:flags"
|
||||
(deftest
|
||||
"case-insensitive flag"
|
||||
(let
|
||||
((m (regexp-match (make-regexp "HELLO" "i") "hello world")))
|
||||
(assert (not (nil? m)))))
|
||||
(deftest
|
||||
"case-sensitive without flag"
|
||||
(assert-nil (regexp-match (make-regexp "HELLO") "hello world")))
|
||||
(deftest
|
||||
"multiline ^ matches line starts"
|
||||
(let
|
||||
((ms (regexp-match-all (make-regexp "^[a-z]" "m") "a\nb\nc")))
|
||||
(assert= (length ms) 3))))
|
||||
Reference in New Issue
Block a user