js: regex engine (lib/js/regex.sx) — pure-SX recursive backtracker
Adds a full regex engine written in SX, installed via js-regex-platform-override!.
Supports char classes (. \d\D\w\W\s\S [abc] [^abc] ranges), anchors (^ $ \b \B),
quantifiers (* + ? {n,m} greedy and lazy), capturing/non-capturing groups,
alternation (a|b), flags i/g/m. exec() returns {:match :index :input :groups}.
Also fixes String.prototype.match to dispatch through the platform engine
(was calling js-regex-stub-exec directly, bypassing regex.sx).
Adds TDZ sentinel infrastructure: __js_tdz_sentinel__, js-tdz?, js-tdz-check.
Updates test.sh (+34 regex tests + 4 TDZ infra tests), conformance.sh,
and test262-runner.py to load regex.sx as epoch 6.
Tests: 559/560 unit (1 pre-existing failure), 148/148 conformance.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
146
lib/js/test.sh
146
lib/js/test.sh
@@ -33,6 +33,8 @@ cat > "$TMPFILE" << 'EPOCHS'
|
||||
(load "lib/js/transpile.sx")
|
||||
(epoch 5)
|
||||
(load "lib/js/runtime.sx")
|
||||
(epoch 6)
|
||||
(load "lib/js/regex.sx")
|
||||
|
||||
;; ── Phase 0: stubs still behave ─────────────────────────────────
|
||||
(epoch 10)
|
||||
@@ -1323,6 +1325,108 @@ cat > "$TMPFILE" << 'EPOCHS'
|
||||
(epoch 3505)
|
||||
(eval "(js-eval \"var a = {length: 3, 0: 10, 1: 20, 2: 30}; var sum = 0; Array.prototype.forEach.call(a, function(x){sum += x;}); sum\")")
|
||||
|
||||
;; ── Phase 12: Regex engine ────────────────────────────────────────
|
||||
;; Platform is installed (test key is a function, not undefined)
|
||||
(epoch 5000)
|
||||
(eval "(js-undefined? (get __js_regex_platform__ \"test\"))")
|
||||
(epoch 5001)
|
||||
(eval "(js-eval \"/foo/.test('hi foo bar')\")")
|
||||
(epoch 5002)
|
||||
(eval "(js-eval \"/foo/.test('hi bar')\")")
|
||||
;; Case-insensitive flag
|
||||
(epoch 5003)
|
||||
(eval "(js-eval \"/FOO/i.test('hello foo world')\")")
|
||||
;; Anchors
|
||||
(epoch 5004)
|
||||
(eval "(js-eval \"/^hello/.test('hello world')\")")
|
||||
(epoch 5005)
|
||||
(eval "(js-eval \"/^hello/.test('say hello')\")")
|
||||
(epoch 5006)
|
||||
(eval "(js-eval \"/world$/.test('hello world')\")")
|
||||
;; Character classes
|
||||
(epoch 5007)
|
||||
(eval "(js-eval \"/\\\\d+/.test('abc 123')\")")
|
||||
(epoch 5008)
|
||||
(eval "(js-eval \"/\\\\w+/.test('hello')\")")
|
||||
(epoch 5009)
|
||||
(eval "(js-eval \"/[abc]/.test('dog')\")")
|
||||
(epoch 5010)
|
||||
(eval "(js-eval \"/[abc]/.test('cat')\")")
|
||||
;; Quantifiers
|
||||
(epoch 5011)
|
||||
(eval "(js-eval \"/a*b/.test('b')\")")
|
||||
(epoch 5012)
|
||||
(eval "(js-eval \"/a+b/.test('b')\")")
|
||||
(epoch 5013)
|
||||
(eval "(js-eval \"/a{2,3}/.test('aa')\")")
|
||||
(epoch 5014)
|
||||
(eval "(js-eval \"/a{2,3}/.test('a')\")")
|
||||
;; Dot
|
||||
(epoch 5015)
|
||||
(eval "(js-eval \"/h.llo/.test('hello')\")")
|
||||
(epoch 5016)
|
||||
(eval "(js-eval \"/h.llo/.test('hllo')\")")
|
||||
;; exec result
|
||||
(epoch 5017)
|
||||
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.match\")")
|
||||
(epoch 5018)
|
||||
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.index\")")
|
||||
(epoch 5019)
|
||||
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.groups[0]\")")
|
||||
;; Alternation
|
||||
(epoch 5020)
|
||||
(eval "(js-eval \"/cat|dog/.test('I have a dog')\")")
|
||||
(epoch 5021)
|
||||
(eval "(js-eval \"/cat|dog/.test('I have a fish')\")")
|
||||
;; Non-capturing group
|
||||
(epoch 5022)
|
||||
(eval "(js-eval \"/(?:foo)+/.test('foofoo')\")")
|
||||
;; Negated char class
|
||||
(epoch 5023)
|
||||
(eval "(js-eval \"/[^abc]/.test('d')\")")
|
||||
(epoch 5024)
|
||||
(eval "(js-eval \"/[^abc]/.test('a')\")")
|
||||
;; Range inside char class
|
||||
(epoch 5025)
|
||||
(eval "(js-eval \"/[a-z]+/.test('hello')\")")
|
||||
;; Word boundary
|
||||
(epoch 5026)
|
||||
(eval "(js-eval \"/\\\\bword\\\\b/.test('a word here')\")")
|
||||
(epoch 5027)
|
||||
(eval "(js-eval \"/\\\\bword\\\\b/.test('password')\")")
|
||||
;; Lazy quantifier
|
||||
(epoch 5028)
|
||||
(eval "(js-eval \"var m = /a+?/.exec('aaa'); m.match\")")
|
||||
;; Global flag exec
|
||||
(epoch 5029)
|
||||
(eval "(js-eval \"var r=/\\\\d+/g; r.exec('a1b2'); r.exec('a1b2').match\")")
|
||||
;; String.prototype.match with regex
|
||||
(epoch 5030)
|
||||
(eval "(js-eval \"'hello world'.match(/\\\\w+/).match\")")
|
||||
;; String.prototype.search
|
||||
(epoch 5031)
|
||||
(eval "(js-eval \"'hello world'.search(/world/)\")")
|
||||
;; String.prototype.replace with regex
|
||||
(epoch 5032)
|
||||
(eval "(js-eval \"'hello world'.replace(/world/, 'there')\")")
|
||||
;; multiline anchor
|
||||
(epoch 5033)
|
||||
(eval "(js-eval \"/^bar/m.test('foo\\nbar')\")")
|
||||
|
||||
;; ── Phase 13: let/const TDZ infrastructure ───────────────────────
|
||||
;; The TDZ sentinel and checker are defined in runtime.sx.
|
||||
;; let/const bindings work normally after initialization.
|
||||
(epoch 5100)
|
||||
(eval "(js-eval \"let x = 5; x\")")
|
||||
(epoch 5101)
|
||||
(eval "(js-eval \"const y = 42; y\")")
|
||||
;; TDZ sentinel exists and is detectable
|
||||
(epoch 5102)
|
||||
(eval "(js-tdz? __js_tdz_sentinel__)")
|
||||
;; js-tdz-check passes through non-sentinel values
|
||||
(epoch 5103)
|
||||
(eval "(js-tdz-check \"x\" 42)")
|
||||
|
||||
EPOCHS
|
||||
|
||||
|
||||
@@ -2042,6 +2146,48 @@ check 3503 "indexOf.call arrLike" '1'
|
||||
check 3504 "filter.call arrLike" '"2,3"'
|
||||
check 3505 "forEach.call arrLike sum" '60'
|
||||
|
||||
# ── Phase 12: Regex engine ────────────────────────────────────────
|
||||
check 5000 "regex platform installed" 'false'
|
||||
check 5001 "/foo/ matches" 'true'
|
||||
check 5002 "/foo/ no match" 'false'
|
||||
check 5003 "/FOO/i case-insensitive" 'true'
|
||||
check 5004 "/^hello/ anchor match" 'true'
|
||||
check 5005 "/^hello/ anchor no-match" 'false'
|
||||
check 5006 "/world$/ end anchor" 'true'
|
||||
check 5007 "/\\d+/ digit class" 'true'
|
||||
check 5008 "/\\w+/ word class" 'true'
|
||||
check 5009 "/[abc]/ class no-match" 'false'
|
||||
check 5010 "/[abc]/ class match" 'true'
|
||||
check 5011 "/a*b/ zero-or-more" 'true'
|
||||
check 5012 "/a+b/ one-or-more no-match" 'false'
|
||||
check 5013 "/a{2,3}/ quant match" 'true'
|
||||
check 5014 "/a{2,3}/ quant no-match" 'false'
|
||||
check 5015 "dot matches any" 'true'
|
||||
check 5016 "dot requires char" 'false'
|
||||
check 5017 "exec match string" '"foobar"'
|
||||
check 5018 "exec match index" '0'
|
||||
check 5019 "exec capture group" '"bar"'
|
||||
check 5020 "alternation cat|dog match" 'true'
|
||||
check 5021 "alternation cat|dog no-match" 'false'
|
||||
check 5022 "non-capturing group" 'true'
|
||||
check 5023 "negated class match" 'true'
|
||||
check 5024 "negated class no-match" 'false'
|
||||
check 5025 "range [a-z]+" 'true'
|
||||
check 5026 "word boundary match" 'true'
|
||||
check 5027 "word boundary no-match" 'false'
|
||||
check 5028 "lazy quantifier" '"a"'
|
||||
check 5029 "global exec advances" '"2"'
|
||||
check 5030 "String.match regex" '"hello"'
|
||||
check 5031 "String.search regex" '6'
|
||||
check 5032 "String.replace regex" '"hello there"'
|
||||
check 5033 "multiline anchor" 'true'
|
||||
|
||||
# ── Phase 13: let/const TDZ infrastructure ───────────────────────
|
||||
check 5100 "let binding initialized" '5'
|
||||
check 5101 "const binding initialized" '42'
|
||||
check 5102 "TDZ sentinel is detectable" 'true'
|
||||
check 5103 "tdz-check passes non-sentinel" '42'
|
||||
|
||||
TOTAL=$((PASS + FAIL))
|
||||
if [ $FAIL -eq 0 ]; then
|
||||
echo "✓ $PASS/$TOTAL JS-on-SX tests passed"
|
||||
|
||||
Reference in New Issue
Block a user