js: regex engine (lib/js/regex.sx) — pure-SX recursive backtracker

Adds a full regex engine written in SX, installed via js-regex-platform-override!.
Supports char classes (.  \d\D\w\W\s\S  [abc]  [^abc]  ranges), anchors (^ $ \b \B),
quantifiers (* + ? {n,m} greedy and lazy), capturing/non-capturing groups,
alternation (a|b), flags i/g/m.  exec() returns {:match :index :input :groups}.

Also fixes String.prototype.match to dispatch through the platform engine
(was calling js-regex-stub-exec directly, bypassing regex.sx).
Adds TDZ sentinel infrastructure: __js_tdz_sentinel__, js-tdz?, js-tdz-check.
Updates test.sh (+34 regex tests + 4 TDZ infra tests), conformance.sh,
and test262-runner.py to load regex.sx as epoch 6.

Tests: 559/560 unit (1 pre-existing failure), 148/148 conformance.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 18:35:23 +00:00
parent 3316d402fd
commit f8023cf74e
5 changed files with 1116 additions and 2 deletions

View File

@@ -49,6 +49,8 @@ trap "rm -f $TMPFILE" EXIT
echo '(load "lib/js/transpile.sx")'
echo '(epoch 5)'
echo '(load "lib/js/runtime.sx")'
echo '(epoch 6)'
echo '(load "lib/js/regex.sx")'
epoch=100
for f in "${FIXTURES[@]}"; do

943
lib/js/regex.sx Normal file
View File

@@ -0,0 +1,943 @@
;; lib/js/regex.sx — pure-SX recursive backtracking regex engine
;;
;; Installed via (js-regex-platform-override! ...) at load time.
;; Covers: character classes (\d\w\s . [abc] [^abc] [a-z]),
;; anchors (^ $ \b \B), quantifiers (* + ? {n,m} lazy variants),
;; groups (capturing + non-capturing), alternation (a|b),
;; flags: i (case-insensitive), g (global), m (multiline).
;;
;; Architecture:
;; 1. rx-parse-pattern — pattern string → compiled node list
;; 2. rx-match-nodes — recursive backtracker
;; 3. rx-exec / rx-test — public interface
;; 4. Install as {:test rx-test :exec rx-exec}
;; ── Utilities ─────────────────────────────────────────────────────
(define
rx-char-at
(fn (s i) (if (and (>= i 0) (< i (len s))) (char-at s i) "")))
(define
rx-digit?
(fn
(c)
(and (not (= c "")) (>= (char-code c) 48) (<= (char-code c) 57))))
(define
rx-word?
(fn
(c)
(and
(not (= c ""))
(or
(and (>= (char-code c) 65) (<= (char-code c) 90))
(and (>= (char-code c) 97) (<= (char-code c) 122))
(and (>= (char-code c) 48) (<= (char-code c) 57))
(= c "_")))))
(define
rx-space?
(fn
(c)
(or (= c " ") (= c "\t") (= c "\n") (= c "\r") (= c "\\f") (= c ""))))
(define rx-newline? (fn (c) (or (= c "\n") (= c "\r"))))
(define
rx-downcase-char
(fn
(c)
(let
((cc (char-code c)))
(if (and (>= cc 65) (<= cc 90)) (char-from-code (+ cc 32)) c))))
(define
rx-char-eq?
(fn
(a b ci?)
(if ci? (= (rx-downcase-char a) (rx-downcase-char b)) (= a b))))
(define
rx-parse-int
(fn
(pat i acc)
(let
((c (rx-char-at pat i)))
(if
(rx-digit? c)
(rx-parse-int pat (+ i 1) (+ (* acc 10) (- (char-code c) 48)))
(list acc i)))))
(define
rx-hex-digit-val
(fn
(c)
(cond
((and (>= (char-code c) 48) (<= (char-code c) 57))
(- (char-code c) 48))
((and (>= (char-code c) 65) (<= (char-code c) 70))
(+ 10 (- (char-code c) 65)))
((and (>= (char-code c) 97) (<= (char-code c) 102))
(+ 10 (- (char-code c) 97)))
(else -1))))
(define
rx-parse-hex-n
(fn
(pat i n acc)
(if
(= n 0)
(list (char-from-code acc) i)
(let
((v (rx-hex-digit-val (rx-char-at pat i))))
(if
(< v 0)
(list (char-from-code acc) i)
(rx-parse-hex-n pat (+ i 1) (- n 1) (+ (* acc 16) v)))))))
;; ── Pattern compiler ──────────────────────────────────────────────
;; Node types (stored in dicts with "__t__" key):
;; literal : {:__t__ "literal" :__c__ char}
;; any : {:__t__ "any"}
;; class-d : {:__t__ "class-d" :__neg__ bool}
;; class-w : {:__t__ "class-w" :__neg__ bool}
;; class-s : {:__t__ "class-s" :__neg__ bool}
;; char-class: {:__t__ "char-class" :__neg__ bool :__items__ list}
;; anchor-start / anchor-end / anchor-word / anchor-nonword
;; quant : {:__t__ "quant" :__node__ n :__min__ m :__max__ mx :__lazy__ bool}
;; group : {:__t__ "group" :__idx__ i :__nodes__ list}
;; ncgroup : {:__t__ "ncgroup" :__nodes__ list}
;; alt : {:__t__ "alt" :__branches__ list-of-node-lists}
;; parse one escape after `\`, returns (node new-i)
(define
rx-parse-escape
(fn
(pat i)
(let
((c (rx-char-at pat i)))
(cond
((= c "d") (list (dict "__t__" "class-d" "__neg__" false) (+ i 1)))
((= c "D") (list (dict "__t__" "class-d" "__neg__" true) (+ i 1)))
((= c "w") (list (dict "__t__" "class-w" "__neg__" false) (+ i 1)))
((= c "W") (list (dict "__t__" "class-w" "__neg__" true) (+ i 1)))
((= c "s") (list (dict "__t__" "class-s" "__neg__" false) (+ i 1)))
((= c "S") (list (dict "__t__" "class-s" "__neg__" true) (+ i 1)))
((= c "b") (list (dict "__t__" "anchor-word") (+ i 1)))
((= c "B") (list (dict "__t__" "anchor-nonword") (+ i 1)))
((= c "n") (list (dict "__t__" "literal" "__c__" "\n") (+ i 1)))
((= c "r") (list (dict "__t__" "literal" "__c__" "\r") (+ i 1)))
((= c "t") (list (dict "__t__" "literal" "__c__" "\t") (+ i 1)))
((= c "f") (list (dict "__t__" "literal" "__c__" "\\f") (+ i 1)))
((= c "v") (list (dict "__t__" "literal" "__c__" "") (+ i 1)))
((= c "u")
(let
((res (rx-parse-hex-n pat (+ i 1) 4 0)))
(list (dict "__t__" "literal" "__c__" (nth res 0)) (nth res 1))))
((= c "x")
(let
((res (rx-parse-hex-n pat (+ i 1) 2 0)))
(list (dict "__t__" "literal" "__c__" (nth res 0)) (nth res 1))))
(else (list (dict "__t__" "literal" "__c__" c) (+ i 1)))))))
;; parse a char-class item inside [...], returns (item new-i)
(define
rx-parse-class-item
(fn
(pat i)
(let
((c (rx-char-at pat i)))
(cond
((= c "\\")
(let
((esc (rx-parse-escape pat (+ i 1))))
(let
((node (nth esc 0)) (ni (nth esc 1)))
(let
((t (get node "__t__")))
(cond
((= t "class-d")
(list
(dict "kind" "class-d" "neg" (get node "__neg__"))
ni))
((= t "class-w")
(list
(dict "kind" "class-w" "neg" (get node "__neg__"))
ni))
((= t "class-s")
(list
(dict "kind" "class-s" "neg" (get node "__neg__"))
ni))
(else
(let
((lc (get node "__c__")))
(if
(and
(= (rx-char-at pat ni) "-")
(not (= (rx-char-at pat (+ ni 1)) "]")))
(let
((hi-c (rx-char-at pat (+ ni 1))))
(list
(dict "kind" "range" "lo" lc "hi" hi-c)
(+ ni 2)))
(list (dict "kind" "lit" "c" lc) ni)))))))))
(else
(if
(and
(not (= c ""))
(= (rx-char-at pat (+ i 1)) "-")
(not (= (rx-char-at pat (+ i 2)) "]"))
(not (= (rx-char-at pat (+ i 2)) "")))
(let
((hi-c (rx-char-at pat (+ i 2))))
(list (dict "kind" "range" "lo" c "hi" hi-c) (+ i 3)))
(list (dict "kind" "lit" "c" c) (+ i 1))))))))
(define
rx-parse-class-items
(fn
(pat i items)
(let
((c (rx-char-at pat i)))
(if
(or (= c "]") (= c ""))
(list items i)
(let
((res (rx-parse-class-item pat i)))
(begin
(append! items (nth res 0))
(rx-parse-class-items pat (nth res 1) items)))))))
;; parse a sequence until stop-ch or EOF; returns (nodes new-i groups-count)
(define
rx-parse-seq
(fn
(pat i stop-ch ds)
(let
((c (rx-char-at pat i)))
(cond
((= c "") (list (get ds "nodes") i (get ds "groups")))
((= c stop-ch) (list (get ds "nodes") i (get ds "groups")))
((= c "|") (rx-parse-alt-rest pat i ds))
(else
(let
((res (rx-parse-atom pat i ds)))
(let
((node (nth res 0)) (ni (nth res 1)) (ds2 (nth res 2)))
(let
((qres (rx-parse-quant pat ni node)))
(begin
(append! (get ds2 "nodes") (nth qres 0))
(rx-parse-seq pat (nth qres 1) stop-ch ds2))))))))))
;; when we hit | inside a sequence, collect all alternatives
(define
rx-parse-alt-rest
(fn
(pat i ds)
(let
((left-branch (get ds "nodes")) (branches (list)))
(begin
(append! branches left-branch)
(rx-parse-alt-branches pat i (get ds "groups") branches)))))
(define
rx-parse-alt-branches
(fn
(pat i n-groups branches)
(let
((new-nodes (list)) (ds2 (dict "groups" n-groups "nodes" new-nodes)))
(let
((res (rx-parse-seq pat (+ i 1) "|" ds2)))
(begin
(append! branches (nth res 0))
(let
((ni2 (nth res 1)) (g2 (nth res 2)))
(if
(= (rx-char-at pat ni2) "|")
(rx-parse-alt-branches pat ni2 g2 branches)
(list
(list (dict "__t__" "alt" "__branches__" branches))
ni2
g2))))))))
;; parse quantifier suffix, returns (node new-i)
(define
rx-parse-quant
(fn
(pat i node)
(let
((c (rx-char-at pat i)))
(cond
((= c "*")
(let
((lazy? (= (rx-char-at pat (+ i 1)) "?")))
(list
(dict
"__t__"
"quant"
"__node__"
node
"__min__"
0
"__max__"
-1
"__lazy__"
lazy?)
(if lazy? (+ i 2) (+ i 1)))))
((= c "+")
(let
((lazy? (= (rx-char-at pat (+ i 1)) "?")))
(list
(dict
"__t__"
"quant"
"__node__"
node
"__min__"
1
"__max__"
-1
"__lazy__"
lazy?)
(if lazy? (+ i 2) (+ i 1)))))
((= c "?")
(let
((lazy? (= (rx-char-at pat (+ i 1)) "?")))
(list
(dict
"__t__"
"quant"
"__node__"
node
"__min__"
0
"__max__"
1
"__lazy__"
lazy?)
(if lazy? (+ i 2) (+ i 1)))))
((= c "{")
(let
((mres (rx-parse-int pat (+ i 1) 0)))
(let
((mn (nth mres 0)) (mi (nth mres 1)))
(let
((sep (rx-char-at pat mi)))
(cond
((= sep "}")
(let
((lazy? (= (rx-char-at pat (+ mi 1)) "?")))
(list
(dict
"__t__"
"quant"
"__node__"
node
"__min__"
mn
"__max__"
mn
"__lazy__"
lazy?)
(if lazy? (+ mi 2) (+ mi 1)))))
((= sep ",")
(let
((c2 (rx-char-at pat (+ mi 1))))
(if
(= c2 "}")
(let
((lazy? (= (rx-char-at pat (+ mi 2)) "?")))
(list
(dict
"__t__"
"quant"
"__node__"
node
"__min__"
mn
"__max__"
-1
"__lazy__"
lazy?)
(if lazy? (+ mi 3) (+ mi 2))))
(let
((mxres (rx-parse-int pat (+ mi 1) 0)))
(let
((mx (nth mxres 0)) (mxi (nth mxres 1)))
(let
((lazy? (= (rx-char-at pat (+ mxi 1)) "?")))
(list
(dict
"__t__"
"quant"
"__node__"
node
"__min__"
mn
"__max__"
mx
"__lazy__"
lazy?)
(if lazy? (+ mxi 2) (+ mxi 1)))))))))
(else (list node i)))))))
(else (list node i))))))
;; parse one atom, returns (node new-i new-ds)
(define
rx-parse-atom
(fn
(pat i ds)
(let
((c (rx-char-at pat i)))
(cond
((= c ".") (list (dict "__t__" "any") (+ i 1) ds))
((= c "^") (list (dict "__t__" "anchor-start") (+ i 1) ds))
((= c "$") (list (dict "__t__" "anchor-end") (+ i 1) ds))
((= c "\\")
(let
((esc (rx-parse-escape pat (+ i 1))))
(list (nth esc 0) (nth esc 1) ds)))
((= c "[")
(let
((neg? (= (rx-char-at pat (+ i 1)) "^")))
(let
((start (if neg? (+ i 2) (+ i 1))) (items (list)))
(let
((res (rx-parse-class-items pat start items)))
(let
((ci (nth res 1)))
(list
(dict
"__t__"
"char-class"
"__neg__"
neg?
"__items__"
items)
(+ ci 1)
ds))))))
((= c "(")
(let
((c2 (rx-char-at pat (+ i 1))))
(if
(and (= c2 "?") (= (rx-char-at pat (+ i 2)) ":"))
(let
((inner-nodes (list))
(inner-ds
(dict "groups" (get ds "groups") "nodes" inner-nodes)))
(let
((res (rx-parse-seq pat (+ i 3) ")" inner-ds)))
(list
(dict "__t__" "ncgroup" "__nodes__" (nth res 0))
(+ (nth res 1) 1)
(dict "groups" (nth res 2) "nodes" (get ds "nodes")))))
(let
((gidx (+ (get ds "groups") 1)) (inner-nodes (list)))
(let
((inner-ds (dict "groups" gidx "nodes" inner-nodes)))
(let
((res (rx-parse-seq pat (+ i 1) ")" inner-ds)))
(list
(dict
"__t__"
"group"
"__idx__"
gidx
"__nodes__"
(nth res 0))
(+ (nth res 1) 1)
(dict "groups" (nth res 2) "nodes" (get ds "nodes")))))))))
(else (list (dict "__t__" "literal" "__c__" c) (+ i 1) ds))))))
;; top-level compile
(define
rx-compile
(fn
(pattern)
(let
((nodes (list)) (ds (dict "groups" 0 "nodes" nodes)))
(let
((res (rx-parse-seq pattern 0 "" ds)))
(dict "nodes" (nth res 0) "ngroups" (nth res 2))))))
;; ── Matcher ───────────────────────────────────────────────────────
;; Match a char-class item against character c
(define
rx-item-matches?
(fn
(item c ci?)
(let
((kind (get item "kind")))
(cond
((= kind "lit") (rx-char-eq? c (get item "c") ci?))
((= kind "range")
(let
((lo (if ci? (rx-downcase-char (get item "lo")) (get item "lo")))
(hi
(if ci? (rx-downcase-char (get item "hi")) (get item "hi")))
(dc (if ci? (rx-downcase-char c) c)))
(and
(>= (char-code dc) (char-code lo))
(<= (char-code dc) (char-code hi)))))
((= kind "class-d")
(let ((m (rx-digit? c))) (if (get item "neg") (not m) m)))
((= kind "class-w")
(let ((m (rx-word? c))) (if (get item "neg") (not m) m)))
((= kind "class-s")
(let ((m (rx-space? c))) (if (get item "neg") (not m) m)))
(else false)))))
(define
rx-class-items-any?
(fn
(items c ci?)
(if
(empty? items)
false
(if
(rx-item-matches? (first items) c ci?)
true
(rx-class-items-any? (rest items) c ci?)))))
(define
rx-class-matches?
(fn
(node c ci?)
(let
((neg? (get node "__neg__")) (items (get node "__items__")))
(let
((hit (rx-class-items-any? items c ci?)))
(if neg? (not hit) hit)))))
;; Word boundary check
(define
rx-is-word-boundary?
(fn
(s i slen)
(let
((before (if (> i 0) (rx-word? (char-at s (- i 1))) false))
(after (if (< i slen) (rx-word? (char-at s i)) false)))
(not (= before after)))))
;; ── Core matcher ──────────────────────────────────────────────────
;;
;; rx-match-nodes : nodes s i slen ci? mi? groups → end-pos or -1
;;
;; Matches `nodes` starting at position `i` in string `s`.
;; Returns the position after the last character consumed, or -1 on failure.
;; Mutates `groups` dict to record captures.
(define
rx-match-nodes
(fn
(nodes s i slen ci? mi? groups)
(if
(empty? nodes)
i
(let
((node (first nodes)) (rest-nodes (rest nodes)))
(let
((t (get node "__t__")))
(cond
((= t "literal")
(if
(and
(< i slen)
(rx-char-eq? (char-at s i) (get node "__c__") ci?))
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
-1))
((= t "any")
(if
(and (< i slen) (not (rx-newline? (char-at s i))))
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
-1))
((= t "class-d")
(let
((m (and (< i slen) (rx-digit? (char-at s i)))))
(if
(if (get node "__neg__") (not m) m)
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
-1)))
((= t "class-w")
(let
((m (and (< i slen) (rx-word? (char-at s i)))))
(if
(if (get node "__neg__") (not m) m)
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
-1)))
((= t "class-s")
(let
((m (and (< i slen) (rx-space? (char-at s i)))))
(if
(if (get node "__neg__") (not m) m)
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
-1)))
((= t "char-class")
(if
(and (< i slen) (rx-class-matches? node (char-at s i) ci?))
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
-1))
((= t "anchor-start")
(if
(or
(= i 0)
(and mi? (rx-newline? (rx-char-at s (- i 1)))))
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
-1))
((= t "anchor-end")
(if
(or (= i slen) (and mi? (rx-newline? (rx-char-at s i))))
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
-1))
((= t "anchor-word")
(if
(rx-is-word-boundary? s i slen)
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
-1))
((= t "anchor-nonword")
(if
(not (rx-is-word-boundary? s i slen))
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
-1))
((= t "group")
(let
((gidx (get node "__idx__"))
(inner (get node "__nodes__")))
(let
((g-end (rx-match-nodes inner s i slen ci? mi? groups)))
(if
(>= g-end 0)
(begin
(dict-set!
groups
(js-to-string gidx)
(substring s i g-end))
(let
((final-end (rx-match-nodes rest-nodes s g-end slen ci? mi? groups)))
(if
(>= final-end 0)
final-end
(begin
(dict-set! groups (js-to-string gidx) nil)
-1))))
-1))))
((= t "ncgroup")
(let
((inner (get node "__nodes__")))
(rx-match-nodes
(append inner rest-nodes)
s
i
slen
ci?
mi?
groups)))
((= t "alt")
(let
((branches (get node "__branches__")))
(rx-try-branches branches rest-nodes s i slen ci? mi? groups)))
((= t "quant")
(let
((inner-node (get node "__node__"))
(mn (get node "__min__"))
(mx (get node "__max__"))
(lazy? (get node "__lazy__")))
(if
lazy?
(rx-quant-lazy
inner-node
mn
mx
rest-nodes
s
i
slen
ci?
mi?
groups
0)
(rx-quant-greedy
inner-node
mn
mx
rest-nodes
s
i
slen
ci?
mi?
groups
0))))
(else -1)))))))
(define
rx-try-branches
(fn
(branches rest-nodes s i slen ci? mi? groups)
(if
(empty? branches)
-1
(let
((res (rx-match-nodes (append (first branches) rest-nodes) s i slen ci? mi? groups)))
(if
(>= res 0)
res
(rx-try-branches (rest branches) rest-nodes s i slen ci? mi? groups))))))
;; Greedy: expand as far as possible, then try rest from the longest match
;; Strategy: recurse forward (extend first); only try rest when extension fails
(define
rx-quant-greedy
(fn
(inner-node mn mx rest-nodes s i slen ci? mi? groups count)
(let
((can-extend (and (< i slen) (or (= mx -1) (< count mx)))))
(if
can-extend
(let
((ni (rx-match-one inner-node s i slen ci? mi? groups)))
(if
(>= ni 0)
(let
((res (rx-quant-greedy inner-node mn mx rest-nodes s ni slen ci? mi? groups (+ count 1))))
(if
(>= res 0)
res
(if
(>= count mn)
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
-1)))
(if
(>= count mn)
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
-1)))
(if
(>= count mn)
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
-1)))))
;; Lazy: try rest first, extend only if rest fails
(define
rx-quant-lazy
(fn
(inner-node mn mx rest-nodes s i slen ci? mi? groups count)
(if
(>= count mn)
(let
((res (rx-match-nodes rest-nodes s i slen ci? mi? groups)))
(if
(>= res 0)
res
(if
(and (< i slen) (or (= mx -1) (< count mx)))
(let
((ni (rx-match-one inner-node s i slen ci? mi? groups)))
(if
(>= ni 0)
(rx-quant-lazy
inner-node
mn
mx
rest-nodes
s
ni
slen
ci?
mi?
groups
(+ count 1))
-1))
-1)))
(if
(< i slen)
(let
((ni (rx-match-one inner-node s i slen ci? mi? groups)))
(if
(>= ni 0)
(rx-quant-lazy
inner-node
mn
mx
rest-nodes
s
ni
slen
ci?
mi?
groups
(+ count 1))
-1))
-1))))
;; Match a single node at position i, return new pos or -1
(define
rx-match-one
(fn
(node s i slen ci? mi? groups)
(rx-match-nodes (list node) s i slen ci? mi? groups)))
;; ── Engine entry points ───────────────────────────────────────────
;; Try matching at exactly position i. Returns result dict or nil.
(define
rx-try-at
(fn
(compiled s i slen ci? mi?)
(let
((nodes (get compiled "nodes")) (ngroups (get compiled "ngroups")))
(let
((groups (dict)))
(let
((end (rx-match-nodes nodes s i slen ci? mi? groups)))
(if
(>= end 0)
(dict "start" i "end" end "groups" groups "ngroups" ngroups)
nil))))))
;; Find first match scanning from search-start.
(define
rx-find-from
(fn
(compiled s search-start slen ci? mi?)
(if
(> search-start slen)
nil
(let
((res (rx-try-at compiled s search-start slen ci? mi?)))
(if
res
res
(rx-find-from compiled s (+ search-start 1) slen ci? mi?))))))
;; Build exec result dict from raw match result
(define
rx-build-exec-result
(fn
(s match-res)
(let
((start (get match-res "start"))
(end (get match-res "end"))
(groups (get match-res "groups"))
(ngroups (get match-res "ngroups")))
(let
((matched (substring s start end))
(caps (rx-build-captures groups ngroups 1)))
(dict "match" matched "index" start "input" s "groups" caps)))))
(define
rx-build-captures
(fn
(groups ngroups idx)
(if
(> idx ngroups)
(list)
(let
((cap (get groups (js-to-string idx))))
(cons
(if (= cap nil) :js-undefined cap)
(rx-build-captures groups ngroups (+ idx 1)))))))
;; ── Public interface ──────────────────────────────────────────────
;; Lazy compile: build NFA on first use, cache under "__compiled__"
(define
rx-ensure-compiled!
(fn
(rx)
(if
(dict-has? rx "__compiled__")
(get rx "__compiled__")
(let
((c (rx-compile (get rx "source"))))
(begin (dict-set! rx "__compiled__" c) c)))))
(define
rx-test
(fn
(rx s)
(let
((compiled (rx-ensure-compiled! rx))
(ci? (get rx "ignoreCase"))
(mi? (get rx "multiline"))
(slen (len s)))
(let
((start (if (get rx "global") (let ((li (get rx "lastIndex"))) (if (number? li) li 0)) 0)))
(let
((res (rx-find-from compiled s start slen ci? mi?)))
(if
(get rx "global")
(begin
(dict-set! rx "lastIndex" (if res (get res "end") 0))
(if res true false))
(if res true false)))))))
(define
rx-exec
(fn
(rx s)
(let
((compiled (rx-ensure-compiled! rx))
(ci? (get rx "ignoreCase"))
(mi? (get rx "multiline"))
(slen (len s)))
(let
((start (if (get rx "global") (let ((li (get rx "lastIndex"))) (if (number? li) li 0)) 0)))
(let
((res (rx-find-from compiled s start slen ci? mi?)))
(if
res
(begin
(when
(get rx "global")
(dict-set! rx "lastIndex" (get res "end")))
(rx-build-exec-result s res))
(begin
(when (get rx "global") (dict-set! rx "lastIndex" 0))
nil)))))))
;; match-all for String.prototype.matchAll
(define
js-regex-match-all
(fn
(rx s)
(let
((compiled (rx-ensure-compiled! rx))
(ci? (get rx "ignoreCase"))
(mi? (get rx "multiline"))
(slen (len s))
(results (list)))
(rx-match-all-loop compiled s 0 slen ci? mi? results))))
(define
rx-match-all-loop
(fn
(compiled s i slen ci? mi? results)
(if
(> i slen)
results
(let
((res (rx-find-from compiled s i slen ci? mi?)))
(if
res
(begin
(append! results (rx-build-exec-result s res))
(let
((next (get res "end")))
(rx-match-all-loop
compiled
s
(if (= next i) (+ i 1) next)
slen
ci?
mi?
results)))
results)))))
;; ── Install platform ──────────────────────────────────────────────
(js-regex-platform-override! "test" rx-test)
(js-regex-platform-override! "exec" rx-exec)

View File

@@ -2032,7 +2032,15 @@
(&rest args)
(cond
((= (len args) 0) nil)
((js-regex? (nth args 0)) (js-regex-stub-exec (nth args 0) s))
((js-regex? (nth args 0))
(let
((rx (nth args 0)))
(let
((impl (get __js_regex_platform__ "exec")))
(if
(js-undefined? impl)
(js-regex-stub-exec rx s)
(impl rx s)))))
(else
(let
((needle (js-to-string (nth args 0))))
@@ -2041,7 +2049,7 @@
(if
(= idx -1)
nil
(let ((res (list))) (append! res needle) res))))))))
(let ((res (list))) (begin (append! res needle) res)))))))))
((= name "at")
(fn
(i)
@@ -2099,6 +2107,20 @@
((= name "toWellFormed") (fn () s))
(else js-undefined))))
(define __js_tdz_sentinel__ (dict "__tdz__" true))
(define js-tdz? (fn (v) (and (dict? v) (dict-has? v "__tdz__"))))
(define
js-tdz-check
(fn
(name val)
(if
(js-tdz? val)
(raise
(TypeError (str "Cannot access '" name "' before initialization")))
val)))
(define
js-string-slice
(fn

View File

@@ -33,6 +33,8 @@ cat > "$TMPFILE" << 'EPOCHS'
(load "lib/js/transpile.sx")
(epoch 5)
(load "lib/js/runtime.sx")
(epoch 6)
(load "lib/js/regex.sx")
;; ── Phase 0: stubs still behave ─────────────────────────────────
(epoch 10)
@@ -1323,6 +1325,108 @@ cat > "$TMPFILE" << 'EPOCHS'
(epoch 3505)
(eval "(js-eval \"var a = {length: 3, 0: 10, 1: 20, 2: 30}; var sum = 0; Array.prototype.forEach.call(a, function(x){sum += x;}); sum\")")
;; ── Phase 12: Regex engine ────────────────────────────────────────
;; Platform is installed (test key is a function, not undefined)
(epoch 5000)
(eval "(js-undefined? (get __js_regex_platform__ \"test\"))")
(epoch 5001)
(eval "(js-eval \"/foo/.test('hi foo bar')\")")
(epoch 5002)
(eval "(js-eval \"/foo/.test('hi bar')\")")
;; Case-insensitive flag
(epoch 5003)
(eval "(js-eval \"/FOO/i.test('hello foo world')\")")
;; Anchors
(epoch 5004)
(eval "(js-eval \"/^hello/.test('hello world')\")")
(epoch 5005)
(eval "(js-eval \"/^hello/.test('say hello')\")")
(epoch 5006)
(eval "(js-eval \"/world$/.test('hello world')\")")
;; Character classes
(epoch 5007)
(eval "(js-eval \"/\\\\d+/.test('abc 123')\")")
(epoch 5008)
(eval "(js-eval \"/\\\\w+/.test('hello')\")")
(epoch 5009)
(eval "(js-eval \"/[abc]/.test('dog')\")")
(epoch 5010)
(eval "(js-eval \"/[abc]/.test('cat')\")")
;; Quantifiers
(epoch 5011)
(eval "(js-eval \"/a*b/.test('b')\")")
(epoch 5012)
(eval "(js-eval \"/a+b/.test('b')\")")
(epoch 5013)
(eval "(js-eval \"/a{2,3}/.test('aa')\")")
(epoch 5014)
(eval "(js-eval \"/a{2,3}/.test('a')\")")
;; Dot
(epoch 5015)
(eval "(js-eval \"/h.llo/.test('hello')\")")
(epoch 5016)
(eval "(js-eval \"/h.llo/.test('hllo')\")")
;; exec result
(epoch 5017)
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.match\")")
(epoch 5018)
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.index\")")
(epoch 5019)
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.groups[0]\")")
;; Alternation
(epoch 5020)
(eval "(js-eval \"/cat|dog/.test('I have a dog')\")")
(epoch 5021)
(eval "(js-eval \"/cat|dog/.test('I have a fish')\")")
;; Non-capturing group
(epoch 5022)
(eval "(js-eval \"/(?:foo)+/.test('foofoo')\")")
;; Negated char class
(epoch 5023)
(eval "(js-eval \"/[^abc]/.test('d')\")")
(epoch 5024)
(eval "(js-eval \"/[^abc]/.test('a')\")")
;; Range inside char class
(epoch 5025)
(eval "(js-eval \"/[a-z]+/.test('hello')\")")
;; Word boundary
(epoch 5026)
(eval "(js-eval \"/\\\\bword\\\\b/.test('a word here')\")")
(epoch 5027)
(eval "(js-eval \"/\\\\bword\\\\b/.test('password')\")")
;; Lazy quantifier
(epoch 5028)
(eval "(js-eval \"var m = /a+?/.exec('aaa'); m.match\")")
;; Global flag exec
(epoch 5029)
(eval "(js-eval \"var r=/\\\\d+/g; r.exec('a1b2'); r.exec('a1b2').match\")")
;; String.prototype.match with regex
(epoch 5030)
(eval "(js-eval \"'hello world'.match(/\\\\w+/).match\")")
;; String.prototype.search
(epoch 5031)
(eval "(js-eval \"'hello world'.search(/world/)\")")
;; String.prototype.replace with regex
(epoch 5032)
(eval "(js-eval \"'hello world'.replace(/world/, 'there')\")")
;; multiline anchor
(epoch 5033)
(eval "(js-eval \"/^bar/m.test('foo\\nbar')\")")
;; ── Phase 13: let/const TDZ infrastructure ───────────────────────
;; The TDZ sentinel and checker are defined in runtime.sx.
;; let/const bindings work normally after initialization.
(epoch 5100)
(eval "(js-eval \"let x = 5; x\")")
(epoch 5101)
(eval "(js-eval \"const y = 42; y\")")
;; TDZ sentinel exists and is detectable
(epoch 5102)
(eval "(js-tdz? __js_tdz_sentinel__)")
;; js-tdz-check passes through non-sentinel values
(epoch 5103)
(eval "(js-tdz-check \"x\" 42)")
EPOCHS
@@ -2042,6 +2146,48 @@ check 3503 "indexOf.call arrLike" '1'
check 3504 "filter.call arrLike" '"2,3"'
check 3505 "forEach.call arrLike sum" '60'
# ── Phase 12: Regex engine ────────────────────────────────────────
check 5000 "regex platform installed" 'false'
check 5001 "/foo/ matches" 'true'
check 5002 "/foo/ no match" 'false'
check 5003 "/FOO/i case-insensitive" 'true'
check 5004 "/^hello/ anchor match" 'true'
check 5005 "/^hello/ anchor no-match" 'false'
check 5006 "/world$/ end anchor" 'true'
check 5007 "/\\d+/ digit class" 'true'
check 5008 "/\\w+/ word class" 'true'
check 5009 "/[abc]/ class no-match" 'false'
check 5010 "/[abc]/ class match" 'true'
check 5011 "/a*b/ zero-or-more" 'true'
check 5012 "/a+b/ one-or-more no-match" 'false'
check 5013 "/a{2,3}/ quant match" 'true'
check 5014 "/a{2,3}/ quant no-match" 'false'
check 5015 "dot matches any" 'true'
check 5016 "dot requires char" 'false'
check 5017 "exec match string" '"foobar"'
check 5018 "exec match index" '0'
check 5019 "exec capture group" '"bar"'
check 5020 "alternation cat|dog match" 'true'
check 5021 "alternation cat|dog no-match" 'false'
check 5022 "non-capturing group" 'true'
check 5023 "negated class match" 'true'
check 5024 "negated class no-match" 'false'
check 5025 "range [a-z]+" 'true'
check 5026 "word boundary match" 'true'
check 5027 "word boundary no-match" 'false'
check 5028 "lazy quantifier" '"a"'
check 5029 "global exec advances" '"2"'
check 5030 "String.match regex" '"hello"'
check 5031 "String.search regex" '6'
check 5032 "String.replace regex" '"hello there"'
check 5033 "multiline anchor" 'true'
# ── Phase 13: let/const TDZ infrastructure ───────────────────────
check 5100 "let binding initialized" '5'
check 5101 "const binding initialized" '42'
check 5102 "TDZ sentinel is detectable" 'true'
check 5103 "tdz-check passes non-sentinel" '42'
TOTAL=$((PASS + FAIL))
if [ $FAIL -eq 0 ]; then
echo "$PASS/$TOTAL JS-on-SX tests passed"

View File

@@ -798,6 +798,7 @@ class ServerSession:
self._run_and_collect(3, '(load "lib/js/parser.sx")', timeout=60.0)
self._run_and_collect(4, '(load "lib/js/transpile.sx")', timeout=60.0)
self._run_and_collect(5, '(load "lib/js/runtime.sx")', timeout=60.0)
self._run_and_collect(50, '(load "lib/js/regex.sx")', timeout=60.0)
# Preload the stub harness — use precomputed SX cache when available
# (huge win: ~15s js-eval HARNESS_STUB → ~0s load precomputed .sx).
cache_rel = _harness_cache_rel_path()