diff --git a/lib/js/lexer.sx b/lib/js/lexer.sx index cabe9957..1e72bce1 100644 --- a/lib/js/lexer.sx +++ b/lib/js/lexer.sx @@ -386,6 +386,91 @@ (and (= (len parts) 1) (= (nth (nth parts 0) 0) "str")) (nth (nth parts 0) 1) parts))))) + (define + js-regex-context? + (fn + () + (if + (= (len tokens) 0) + true + (let + ((tk (nth tokens (- (len tokens) 1)))) + (let + ((ty (dict-get tk "type")) (vv (dict-get tk "value"))) + (cond + ((= ty "punct") + (and (not (= vv ")")) (not (= vv "]")))) + ((= ty "op") true) + ((= ty "keyword") + (contains? + (list + "return" + "typeof" + "in" + "of" + "throw" + "new" + "delete" + "instanceof" + "void" + "yield" + "await" + "case" + "do" + "else") + vv)) + (else false))))))) + (define + read-regex + (fn + () + (let + ((buf (list)) (in-class false)) + (advance! 1) + (define + body-loop + (fn + () + (cond + ((>= pos src-len) nil) + ((= (cur) "\\") + (begin + (append! buf (cur)) + (advance! 1) + (when + (< pos src-len) + (begin (append! buf (cur)) (advance! 1))) + (body-loop))) + ((= (cur) "[") + (begin + (set! in-class true) + (append! buf (cur)) + (advance! 1) + (body-loop))) + ((= (cur) "]") + (begin + (set! in-class false) + (append! buf (cur)) + (advance! 1) + (body-loop))) + ((and (= (cur) "/") (not in-class)) (advance! 1)) + (else + (begin (append! buf (cur)) (advance! 1) (body-loop)))))) + (body-loop) + (let + ((flags-buf (list))) + (define + flags-loop + (fn + () + (when + (and (< pos src-len) (js-ident-char? (cur))) + (begin + (append! flags-buf (cur)) + (advance! 1) + (flags-loop))))) + (flags-loop) + {:pattern (join "" buf) :flags (join "" flags-buf)})))) (define try-op-4! (fn @@ -510,6 +595,11 @@ word start)) (scan!))) + ((and (= ch "/") (js-regex-context?)) + (let + ((rx (read-regex))) + (js-emit! "regex" rx start) + (scan!))) ((try-op-4! start) (scan!)) ((try-op-3! start) (scan!)) ((try-op-2! start) (scan!)) diff --git a/lib/js/parser.sx b/lib/js/parser.sx index 9c93c269..6d411771 100644 --- a/lib/js/parser.sx +++ b/lib/js/parser.sx @@ -314,6 +314,13 @@ (list? val) (jp-build-template-ast val) (list (quote js-str) val))))) + ((= (get t :type) "regex") + (do + (jp-advance! st) + (list + (quote js-regex) + (get (get t :value) :pattern) + (get (get t :value) :flags)))) ((and (= (get t :type) "keyword") (= (get t :value) "true")) (do (jp-advance! st) (list (quote js-bool) true))) ((and (= (get t :type) "keyword") (= (get t :value) "false")) diff --git a/lib/js/runtime.sx b/lib/js/runtime.sx index 6887e117..47dc8839 100644 --- a/lib/js/runtime.sx +++ b/lib/js/runtime.sx @@ -91,6 +91,7 @@ (cond ((and (js-promise? recv) (js-promise-builtin-method? key)) (js-invoke-promise-method recv key args)) + ((js-regex? recv) (js-regex-invoke-method recv key args)) (else (let ((m (js-get-prop recv key))) @@ -1593,4 +1594,96 @@ (define __drain (fn () (js-drain-microtasks!) :js-undefined)) +(define __js_regex_platform__ (dict)) + +(define + js-regex-platform-override! + (fn (op impl) (dict-set! __js_regex_platform__ op impl))) + +(define + js-regex? + (fn (v) (and (dict? v) (contains? (keys v) "__js_regex__")))) + +(define + js-regex-has-flag? + (fn (flags ch) (>= (js-string-index-of flags ch 0) 0))) + +(define + js-regex-new + (fn + (pattern flags) + (let + ((rx (dict)) + (fl (if (js-undefined? flags) "" (if (= flags nil) "" flags)))) + (dict-set! rx "__js_regex__" true) + (dict-set! rx "source" pattern) + (dict-set! rx "flags" fl) + (dict-set! rx "global" (js-regex-has-flag? fl "g")) + (dict-set! rx "ignoreCase" (js-regex-has-flag? fl "i")) + (dict-set! rx "multiline" (js-regex-has-flag? fl "m")) + (dict-set! rx "sticky" (js-regex-has-flag? fl "y")) + (dict-set! rx "unicode" (js-regex-has-flag? fl "u")) + (dict-set! rx "dotAll" (js-regex-has-flag? fl "s")) + (dict-set! rx "hasIndices" (js-regex-has-flag? fl "d")) + (dict-set! rx "lastIndex" 0) + rx))) + +(define + js-regex-stub-test + (fn + (rx s) + (let + ((src (get rx "source")) (ci (get rx "ignoreCase"))) + (let + ((hay (if ci (js-lower-case s) s)) + (needle (if ci (js-lower-case src) src))) + (>= (js-string-index-of hay needle 0) 0))))) + +(define + js-regex-stub-exec + (fn + (rx s) + (let + ((src (get rx "source")) (ci (get rx "ignoreCase"))) + (let + ((hay (if ci (js-lower-case s) s)) + (needle (if ci (js-lower-case src) src))) + (let + ((idx (js-string-index-of hay needle 0))) + (if + (= idx -1) + nil + (let + ((matched (js-string-slice s idx (+ idx (len src)))) + (res (list))) + (append! res matched) + (dict-set! res "index" idx) + (dict-set! res "input" s) + res))))))) + +(define + js-regex-invoke-method + (fn + (rx name args) + (cond + ((= name "test") + (let + ((impl (get __js_regex_platform__ "test")) + (arg (if (= (len args) 0) "" (js-to-string (nth args 0))))) + (if + (js-undefined? impl) + (js-regex-stub-test rx arg) + (impl rx arg)))) + ((= name "exec") + (let + ((impl (get __js_regex_platform__ "exec")) + (arg (if (= (len args) 0) "" (js-to-string (nth args 0))))) + (if + (js-undefined? impl) + (js-regex-stub-exec rx arg) + (impl rx arg)))) + ((= name "toString") + (str "/" (get rx "source") "/" (get rx "flags"))) + (else js-undefined)))) + (define js-global {:console console :Math Math :NaN 0 :Infinity (/ 1 0) :undefined js-undefined}) diff --git a/lib/js/test.sh b/lib/js/test.sh index ee0a8c34..c2e14068 100755 --- a/lib/js/test.sh +++ b/lib/js/test.sh @@ -745,6 +745,90 @@ cat > "$TMPFILE" << 'EPOCHS' (epoch 938) (eval "(js-eval \"`[${''}-${''}]`\")") +;; ── Phase 11.regex: regex literal lexing ──────────────────────── +;; Simple regex at start of file +(epoch 1000) +(eval "(get (nth (js-tokenize \"/abc/\") 0) :type)") +(epoch 1001) +(eval "(get (get (nth (js-tokenize \"/abc/\") 0) :value) :pattern)") +(epoch 1002) +(eval "(get (get (nth (js-tokenize \"/abc/\") 0) :value) :flags)") +;; With flags +(epoch 1003) +(eval "(get (get (nth (js-tokenize \"/a+/gi\") 0) :value) :flags)") +;; Character class with embedded / +(epoch 1004) +(eval "(get (get (nth (js-tokenize \"/[/]/\") 0) :value) :pattern)") +;; Escaped / +(epoch 1005) +(eval "(get (get (nth (js-tokenize \"/a\\\\/b/\") 0) :value) :pattern)") +;; After `return` keyword → regex +(epoch 1006) +(eval "(get (nth (js-tokenize \"return /x/\") 1) :type)") +;; After `=` op → regex +(epoch 1007) +(eval "(get (nth (js-tokenize \"x = /y/\") 2) :type)") +;; After ident `x` → division (not regex) +(epoch 1008) +(eval "(get (nth (js-tokenize \"a/b\") 1) :type)") +(epoch 1009) +(eval "(get (nth (js-tokenize \"a/b\") 1) :value)") +;; After `)` → division +(epoch 1010) +(eval "(get (nth (js-tokenize \"(a)/b\") 3) :type)") +;; After number → division +(epoch 1011) +(eval "(get (nth (js-tokenize \"1/2\") 1) :type)") +;; Regex /= must still be division-assignment in expr context +(epoch 1012) +(eval "(get (nth (js-tokenize \"x/=2\") 1) :type)") +(epoch 1013) +(eval "(get (nth (js-tokenize \"x/=2\") 1) :value)") +;; Inside function body after statement separator +(epoch 1014) +(eval "(get (nth (js-tokenize \"; /abc/\") 1) :type)") +;; After `throw` +(epoch 1015) +(eval "(get (nth (js-tokenize \"throw /x/\") 1) :type)") + +;; ── Phase 11.regex: parser ────────────────────────────────────── +(epoch 1020) +(eval "(first (js-parse-expr \"/abc/\"))") +(epoch 1021) +(eval "(nth (js-parse-expr \"/foo/gi\") 1)") +(epoch 1022) +(eval "(nth (js-parse-expr \"/foo/gi\") 2)") + +;; ── Phase 11.regex: transpile ─────────────────────────────────── +(epoch 1030) +(eval "(first (js-transpile (js-parse-expr \"/abc/\")))") + +;; ── Phase 11.regex: runtime — regex object shape ─────────────── +(epoch 1040) +(eval "(get (js-regex-new \"ab\" \"g\") :source)") +(epoch 1041) +(eval "(get (js-regex-new \"ab\" \"g\") :flags)") +(epoch 1042) +(eval "(get (js-regex-new \"ab\" \"g\") :global)") +(epoch 1043) +(eval "(js-regex? (js-regex-new \"ab\" \"\"))") + +;; .source / .flags / .global etc via property access +(epoch 1050) +(eval "(js-eval \"/abc/g.source\")") +(epoch 1051) +(eval "(js-eval \"/abc/gi.flags\")") +(epoch 1052) +(eval "(js-eval \"/abc/g.global\")") +(epoch 1053) +(eval "(js-eval \"/abc/i.ignoreCase\")") + +;; .test() via stub: substring-based +(epoch 1060) +(eval "(js-eval \"/foo/.test('hello foo')\")") +(epoch 1061) +(eval "(js-eval \"/zzz/.test('hello')\")") + EPOCHS OUTPUT=$(timeout 180 "$SX_SERVER" < "$TMPFILE" 2>/dev/null) @@ -1144,6 +1228,48 @@ check 936 'bare ${42}' '"42"' check 937 "expr in interp" '"len is 12"' check 938 "empty interps" '"[-]"' +# ── Phase 11.regex: lexer ──────────────────────────────────────── +check 1000 "regex at sof → type" '"regex"' +check 1001 "regex pattern" '"abc"' +check 1002 "regex empty flags" '""' +check 1003 "regex with gi flags" '"gi"' +check 1004 "regex class with /" '"[/]"' +check 1005 "regex escaped /" '"a\\/b"' +check 1006 "after return → regex" '"regex"' +check 1007 "after = → regex" '"regex"' +check 1008 "after ident → op" '"op"' +check 1009 "after ident div value" '"/"' +check 1010 "after ) → op" '"op"' +check 1011 "after number → op" '"op"' +check 1012 "x/=2 is /=-assign" '"op"' +check 1013 "x/=2 /= op value" '"/="' +check 1014 "after ; → regex" '"regex"' +check 1015 "after throw → regex" '"regex"' + +# ── Phase 11.regex: parser ─────────────────────────────────────── +check 1020 "parse /abc/ head" 'js-regex' +check 1021 "parse pattern arg" '"foo"' +check 1022 "parse flags arg" '"gi"' + +# ── Phase 11.regex: transpile ──────────────────────────────────── +check 1030 "transpile uses js-regex-new" 'js-regex-new' + +# ── Phase 11.regex: runtime obj ────────────────────────────────── +check 1040 "regex source" '"ab"' +check 1041 "regex flags" '"g"' +check 1042 "regex global true" 'true' +check 1043 "js-regex? true" 'true' + +# ── Phase 11.regex: property access ────────────────────────────── +check 1050 "literal .source" '"abc"' +check 1051 "literal .flags" '"gi"' +check 1052 "literal .global" 'true' +check 1053 "literal .ignoreCase" 'true' + +# ── Phase 11.regex: test() ─────────────────────────────────────── +check 1060 "test match" 'true' +check 1061 "test no match" 'false' + TOTAL=$((PASS + FAIL)) if [ $FAIL -eq 0 ]; then echo "✓ $PASS/$TOTAL JS-on-SX tests passed" diff --git a/lib/js/test262-runner.py b/lib/js/test262-runner.py index dc30dd75..b511333f 100644 --- a/lib/js/test262-runner.py +++ b/lib/js/test262-runner.py @@ -527,16 +527,16 @@ class TestResult: elapsed_ms: int = 0 -def discover_tests(filter_prefix: str | None) -> list[Path]: +def discover_tests(filter_prefixes: list[str] | None) -> list[Path]: tests: list[Path] = [] for p in TEST_ROOT.rglob("*.js"): if p.name.endswith("_FIXTURE.js"): continue if "_FIXTURE" in p.parts: continue - if filter_prefix: + if filter_prefixes: rel = p.relative_to(TEST_ROOT).as_posix() - if not rel.startswith(filter_prefix): + if not any(rel.startswith(prefix) for prefix in filter_prefixes): continue tests.append(p) tests.sort() @@ -799,7 +799,8 @@ def write_markdown(scoreboard: dict, path: Path, pinned_commit: str, elapsed_s: def main(argv: list[str]) -> int: ap = argparse.ArgumentParser() ap.add_argument("--limit", type=int, default=0, help="max tests to run (0 = all)") - ap.add_argument("--filter", type=str, default=None, help="path prefix filter") + ap.add_argument("--filter", type=str, action="append", default=None, + help="path prefix filter (repeatable; OR'd together)") ap.add_argument("--per-test-timeout", type=float, default=DEFAULT_PER_TEST_TIMEOUT_S) ap.add_argument("--restart-every", type=int, default=500, help="restart server every N tests to keep memory bounded") diff --git a/lib/js/transpile.sx b/lib/js/transpile.sx index 673381aa..e92f7c27 100644 --- a/lib/js/transpile.sx +++ b/lib/js/transpile.sx @@ -53,6 +53,8 @@ ((js-tag? ast "js-num") (nth ast 1)) ((js-tag? ast "js-str") (nth ast 1)) ((js-tag? ast "js-bool") (nth ast 1)) + ((js-tag? ast "js-regex") + (list (js-sym "js-regex-new") (nth ast 1) (nth ast 2))) ((js-tag? ast "js-null") nil) ((js-tag? ast "js-undef") (list (js-sym "quote") :js-undefined)) ((js-tag? ast "js-ident") (js-transpile-ident (nth ast 1))) diff --git a/plans/js-on-sx.md b/plans/js-on-sx.md index 7ae489ef..0d4c18f1 100644 --- a/plans/js-on-sx.md +++ b/plans/js-on-sx.md @@ -173,6 +173,8 @@ Append-only record of completed iterations. Loop writes one line per iteration: - 2026-04-23 — **Queue item 2: fixed test262 runner.** Root-cause of 7/8 timeouts: runner re-parsed the entire 197-line `assert.js` for every test in one big `js-eval` (8.3s/test) — and the real harness uses `i++` which our parser doesn't support yet, so every test immediately died with a parse error. New runner ships a minimal in-Python JS-stub harness (`Test262Error`, `assert.sameValue`/`notSameValue`/`throws`/`_isSameValue`/`_toString`, stub `verifyProperty`/`verifyPrimordialProperty`/`isConstructor`/`compareArray`) covering >99% of tests' actual surface, and replaces the per-batch subprocess with a long-lived `ServerSession` that loads the kernel + harness once and feeds each test as a separate `js-eval` over persistent stdin. Added skip rules for 80+ unsupported features (Atomics/BigInt/Proxy/Reflect/Symbol/Temporal/TypedArrays/generators/destructuring/etc.) and path prefixes (`intl402/`, `annexB/`, `built-ins/{Atomics,BigInt,Proxy,Reflect,Symbol,Temporal,*Array,*Buffer,…}/`) so the scoreboard reflects what's actually attempted. Scoreboard over 288 runnable Math tests: **56/288 (19.4%)** in 185s, rate ≈ 2.3 tests/s (prev: 0/8 with 7 timeouts). Top failure modes: 83× assertion-fail (real semantic gaps in Math.floor/ceil/trunc/etc. details), 62× ReferenceError (builtins we haven't shimmed, e.g. `isConstructor`), 46× TypeError "not a function", 35× parse errors (mostly `i++`, destructuring, tagged templates). 278/280 unit + 148/148 slice unchanged. +- 2026-04-23 — **Regex literal support (lex+parse+transpile+runtime stub).** Runner now accepts repeatable `--filter` flags (OR'd). Lexer gains `js-regex-context?` (returns true at SOF or when last token is op/non-closing-punct/regex-keyword incl. return/typeof/in/of/throw/new/delete/instanceof/void/yield/await/case/do/else) and `read-regex` (handles `\` escapes and `[...]` classes, collects flags as ident chars). `scan!` intercepts `/` ahead of the operator-match tries when in a regex context and emits `{:type "regex" :value {:pattern :flags}}`. Parser adds a `regex` primary branch → `(js-regex pat flags)`. Transpile emits `(js-regex-new pat flags)`. Runtime adds: `js-regex?` predicate (dict + `__js_regex__` key), `js-regex-new` builds the tagged dict with `source / flags / global / ignoreCase / multiline / sticky / unicode / dotAll / hasIndices / lastIndex` populated; `js-regex-invoke-method` dispatches `.test` / `.exec` / `.toString`; `js-invoke-method` gets a regex branch before the generic method-lookup fallback. Stub engine (`js-regex-stub-test` / `-exec`) uses `js-string-index-of` — not a real regex, but enough to make `/foo/.test('hi foo')` work. `__js_regex_platform__` dict + `js-regex-platform-override!` let a real platform primitive be swapped in later without runtime changes. 30 new unit tests (17 lex + 3 parse + 1 transpile + 4 obj-shape + 4 prop + 2 test()): **308/310** (278→+30). Conformance unchanged. Gotcha: `contains?` with 2 args expects `(contains? list x)`, NOT a dict — use `(contains? (keys d) k)` or `dict-has?`. First pass forgot that and cascaded errors across Math / class tests via the `js-regex?` predicate inside `js-invoke-method`. Wide scoreboard run across 9 targeted categories launched in background. + ## Phase 3-5 gotchas Worth remembering for later phases: @@ -191,6 +193,18 @@ Anything that would require a change outside `lib/js/` goes here with a minimal - **Pending-Promise await** — our `js-await-value` drains microtasks and unwraps *settled* Promises; it cannot truly suspend a JS fiber and resume later. Every Promise that settles eventually through the synchronous `resolve`/`reject` + microtask path works. A Promise that never settles without external input (e.g. a real `setTimeout` waiting on the event loop) would hit the `"await on pending Promise (no scheduler)"` error. Proper async suspension would need the JS eval path to run under `cek-step-loop` (not `eval-expr` → `cek-run`) and treat `await pending-Promise` as a `perform` that registers a resume thunk on the Promise's callback list. Non-trivial plumbing; out of scope for this phase. Consider it a Phase 9.5 item. +- **Regex platform primitives** — runtime ships a substring-based stub (`js-regex-stub-test` / `-exec`). Overridable via `js-regex-platform-override!` so a real engine can be dropped in. Required platform-primitive surface: + - `regex-compile pattern flags` — build an opaque compiled handle + - `regex-test compiled s` → bool + - `regex-exec compiled s` → match dict `{match index input groups}` or nil + - `regex-match-all compiled s` → list of match dicts (or empty list) + - `regex-replace compiled s replacement` → string + - `regex-replace-fn compiled s fn` → string (fn receives match+groups, returns string) + - `regex-split compiled s` → list of strings + - `regex-source compiled` → string + - `regex-flags compiled` → string + Ideally a single `(js-regex-platform-install-all! platform)` entry point the host calls once at boot. OCaml would wrap `Str` / `Re` or a dedicated regex lib; JS host can just delegate to the native `RegExp`. + ## First-iteration checklist (scaffolding) — DONE - [x] `lib/js/lexer.sx` — stub `js-tokenize`