js: regex engine (lib/js/regex.sx) — pure-SX recursive backtracker

Adds a full regex engine written in SX, installed via js-regex-platform-override!.
Supports char classes (.  \d\D\w\W\s\S  [abc]  [^abc]  ranges), anchors (^ $ \b \B),
quantifiers (* + ? {n,m} greedy and lazy), capturing/non-capturing groups,
alternation (a|b), flags i/g/m.  exec() returns {:match :index :input :groups}.

Also fixes String.prototype.match to dispatch through the platform engine
(was calling js-regex-stub-exec directly, bypassing regex.sx).
Adds TDZ sentinel infrastructure: __js_tdz_sentinel__, js-tdz?, js-tdz-check.
Updates test.sh (+34 regex tests + 4 TDZ infra tests), conformance.sh,
and test262-runner.py to load regex.sx as epoch 6.

Tests: 559/560 unit (1 pre-existing failure), 148/148 conformance.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-25 18:35:23 +00:00
parent 3316d402fd
commit f8023cf74e
5 changed files with 1116 additions and 2 deletions

View File

@@ -33,6 +33,8 @@ cat > "$TMPFILE" << 'EPOCHS'
(load "lib/js/transpile.sx")
(epoch 5)
(load "lib/js/runtime.sx")
(epoch 6)
(load "lib/js/regex.sx")
;; ── Phase 0: stubs still behave ─────────────────────────────────
(epoch 10)
@@ -1323,6 +1325,108 @@ cat > "$TMPFILE" << 'EPOCHS'
(epoch 3505)
(eval "(js-eval \"var a = {length: 3, 0: 10, 1: 20, 2: 30}; var sum = 0; Array.prototype.forEach.call(a, function(x){sum += x;}); sum\")")
;; ── Phase 12: Regex engine ────────────────────────────────────────
;; Platform is installed (test key is a function, not undefined)
(epoch 5000)
(eval "(js-undefined? (get __js_regex_platform__ \"test\"))")
(epoch 5001)
(eval "(js-eval \"/foo/.test('hi foo bar')\")")
(epoch 5002)
(eval "(js-eval \"/foo/.test('hi bar')\")")
;; Case-insensitive flag
(epoch 5003)
(eval "(js-eval \"/FOO/i.test('hello foo world')\")")
;; Anchors
(epoch 5004)
(eval "(js-eval \"/^hello/.test('hello world')\")")
(epoch 5005)
(eval "(js-eval \"/^hello/.test('say hello')\")")
(epoch 5006)
(eval "(js-eval \"/world$/.test('hello world')\")")
;; Character classes
(epoch 5007)
(eval "(js-eval \"/\\\\d+/.test('abc 123')\")")
(epoch 5008)
(eval "(js-eval \"/\\\\w+/.test('hello')\")")
(epoch 5009)
(eval "(js-eval \"/[abc]/.test('dog')\")")
(epoch 5010)
(eval "(js-eval \"/[abc]/.test('cat')\")")
;; Quantifiers
(epoch 5011)
(eval "(js-eval \"/a*b/.test('b')\")")
(epoch 5012)
(eval "(js-eval \"/a+b/.test('b')\")")
(epoch 5013)
(eval "(js-eval \"/a{2,3}/.test('aa')\")")
(epoch 5014)
(eval "(js-eval \"/a{2,3}/.test('a')\")")
;; Dot
(epoch 5015)
(eval "(js-eval \"/h.llo/.test('hello')\")")
(epoch 5016)
(eval "(js-eval \"/h.llo/.test('hllo')\")")
;; exec result
(epoch 5017)
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.match\")")
(epoch 5018)
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.index\")")
(epoch 5019)
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.groups[0]\")")
;; Alternation
(epoch 5020)
(eval "(js-eval \"/cat|dog/.test('I have a dog')\")")
(epoch 5021)
(eval "(js-eval \"/cat|dog/.test('I have a fish')\")")
;; Non-capturing group
(epoch 5022)
(eval "(js-eval \"/(?:foo)+/.test('foofoo')\")")
;; Negated char class
(epoch 5023)
(eval "(js-eval \"/[^abc]/.test('d')\")")
(epoch 5024)
(eval "(js-eval \"/[^abc]/.test('a')\")")
;; Range inside char class
(epoch 5025)
(eval "(js-eval \"/[a-z]+/.test('hello')\")")
;; Word boundary
(epoch 5026)
(eval "(js-eval \"/\\\\bword\\\\b/.test('a word here')\")")
(epoch 5027)
(eval "(js-eval \"/\\\\bword\\\\b/.test('password')\")")
;; Lazy quantifier
(epoch 5028)
(eval "(js-eval \"var m = /a+?/.exec('aaa'); m.match\")")
;; Global flag exec
(epoch 5029)
(eval "(js-eval \"var r=/\\\\d+/g; r.exec('a1b2'); r.exec('a1b2').match\")")
;; String.prototype.match with regex
(epoch 5030)
(eval "(js-eval \"'hello world'.match(/\\\\w+/).match\")")
;; String.prototype.search
(epoch 5031)
(eval "(js-eval \"'hello world'.search(/world/)\")")
;; String.prototype.replace with regex
(epoch 5032)
(eval "(js-eval \"'hello world'.replace(/world/, 'there')\")")
;; multiline anchor
(epoch 5033)
(eval "(js-eval \"/^bar/m.test('foo\\nbar')\")")
;; ── Phase 13: let/const TDZ infrastructure ───────────────────────
;; The TDZ sentinel and checker are defined in runtime.sx.
;; let/const bindings work normally after initialization.
(epoch 5100)
(eval "(js-eval \"let x = 5; x\")")
(epoch 5101)
(eval "(js-eval \"const y = 42; y\")")
;; TDZ sentinel exists and is detectable
(epoch 5102)
(eval "(js-tdz? __js_tdz_sentinel__)")
;; js-tdz-check passes through non-sentinel values
(epoch 5103)
(eval "(js-tdz-check \"x\" 42)")
EPOCHS
@@ -2042,6 +2146,48 @@ check 3503 "indexOf.call arrLike" '1'
check 3504 "filter.call arrLike" '"2,3"'
check 3505 "forEach.call arrLike sum" '60'
# ── Phase 12: Regex engine ────────────────────────────────────────
check 5000 "regex platform installed" 'false'
check 5001 "/foo/ matches" 'true'
check 5002 "/foo/ no match" 'false'
check 5003 "/FOO/i case-insensitive" 'true'
check 5004 "/^hello/ anchor match" 'true'
check 5005 "/^hello/ anchor no-match" 'false'
check 5006 "/world$/ end anchor" 'true'
check 5007 "/\\d+/ digit class" 'true'
check 5008 "/\\w+/ word class" 'true'
check 5009 "/[abc]/ class no-match" 'false'
check 5010 "/[abc]/ class match" 'true'
check 5011 "/a*b/ zero-or-more" 'true'
check 5012 "/a+b/ one-or-more no-match" 'false'
check 5013 "/a{2,3}/ quant match" 'true'
check 5014 "/a{2,3}/ quant no-match" 'false'
check 5015 "dot matches any" 'true'
check 5016 "dot requires char" 'false'
check 5017 "exec match string" '"foobar"'
check 5018 "exec match index" '0'
check 5019 "exec capture group" '"bar"'
check 5020 "alternation cat|dog match" 'true'
check 5021 "alternation cat|dog no-match" 'false'
check 5022 "non-capturing group" 'true'
check 5023 "negated class match" 'true'
check 5024 "negated class no-match" 'false'
check 5025 "range [a-z]+" 'true'
check 5026 "word boundary match" 'true'
check 5027 "word boundary no-match" 'false'
check 5028 "lazy quantifier" '"a"'
check 5029 "global exec advances" '"2"'
check 5030 "String.match regex" '"hello"'
check 5031 "String.search regex" '6'
check 5032 "String.replace regex" '"hello there"'
check 5033 "multiline anchor" 'true'
# ── Phase 13: let/const TDZ infrastructure ───────────────────────
check 5100 "let binding initialized" '5'
check 5101 "const binding initialized" '42'
check 5102 "TDZ sentinel is detectable" 'true'
check 5103 "tdz-check passes non-sentinel" '42'
TOTAL=$((PASS + FAIL))
if [ $FAIL -eq 0 ]; then
echo "$PASS/$TOTAL JS-on-SX tests passed"