Compare commits
2 Commits
loops/comm
...
architectu
| Author | SHA1 | Date | |
|---|---|---|---|
| f247cb2898 | |||
| f8023cf74e |
@@ -49,6 +49,8 @@ trap "rm -f $TMPFILE" EXIT
|
|||||||
echo '(load "lib/js/transpile.sx")'
|
echo '(load "lib/js/transpile.sx")'
|
||||||
echo '(epoch 5)'
|
echo '(epoch 5)'
|
||||||
echo '(load "lib/js/runtime.sx")'
|
echo '(load "lib/js/runtime.sx")'
|
||||||
|
echo '(epoch 6)'
|
||||||
|
echo '(load "lib/js/regex.sx")'
|
||||||
|
|
||||||
epoch=100
|
epoch=100
|
||||||
for f in "${FIXTURES[@]}"; do
|
for f in "${FIXTURES[@]}"; do
|
||||||
|
|||||||
943
lib/js/regex.sx
Normal file
943
lib/js/regex.sx
Normal file
@@ -0,0 +1,943 @@
|
|||||||
|
;; lib/js/regex.sx — pure-SX recursive backtracking regex engine
|
||||||
|
;;
|
||||||
|
;; Installed via (js-regex-platform-override! ...) at load time.
|
||||||
|
;; Covers: character classes (\d\w\s . [abc] [^abc] [a-z]),
|
||||||
|
;; anchors (^ $ \b \B), quantifiers (* + ? {n,m} lazy variants),
|
||||||
|
;; groups (capturing + non-capturing), alternation (a|b),
|
||||||
|
;; flags: i (case-insensitive), g (global), m (multiline).
|
||||||
|
;;
|
||||||
|
;; Architecture:
|
||||||
|
;; 1. rx-parse-pattern — pattern string → compiled node list
|
||||||
|
;; 2. rx-match-nodes — recursive backtracker
|
||||||
|
;; 3. rx-exec / rx-test — public interface
|
||||||
|
;; 4. Install as {:test rx-test :exec rx-exec}
|
||||||
|
|
||||||
|
;; ── Utilities ─────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-char-at
|
||||||
|
(fn (s i) (if (and (>= i 0) (< i (len s))) (char-at s i) "")))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-digit?
|
||||||
|
(fn
|
||||||
|
(c)
|
||||||
|
(and (not (= c "")) (>= (char-code c) 48) (<= (char-code c) 57))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-word?
|
||||||
|
(fn
|
||||||
|
(c)
|
||||||
|
(and
|
||||||
|
(not (= c ""))
|
||||||
|
(or
|
||||||
|
(and (>= (char-code c) 65) (<= (char-code c) 90))
|
||||||
|
(and (>= (char-code c) 97) (<= (char-code c) 122))
|
||||||
|
(and (>= (char-code c) 48) (<= (char-code c) 57))
|
||||||
|
(= c "_")))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-space?
|
||||||
|
(fn
|
||||||
|
(c)
|
||||||
|
(or (= c " ") (= c "\t") (= c "\n") (= c "\r") (= c "\\f") (= c ""))))
|
||||||
|
|
||||||
|
(define rx-newline? (fn (c) (or (= c "\n") (= c "\r"))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-downcase-char
|
||||||
|
(fn
|
||||||
|
(c)
|
||||||
|
(let
|
||||||
|
((cc (char-code c)))
|
||||||
|
(if (and (>= cc 65) (<= cc 90)) (char-from-code (+ cc 32)) c))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-char-eq?
|
||||||
|
(fn
|
||||||
|
(a b ci?)
|
||||||
|
(if ci? (= (rx-downcase-char a) (rx-downcase-char b)) (= a b))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-parse-int
|
||||||
|
(fn
|
||||||
|
(pat i acc)
|
||||||
|
(let
|
||||||
|
((c (rx-char-at pat i)))
|
||||||
|
(if
|
||||||
|
(rx-digit? c)
|
||||||
|
(rx-parse-int pat (+ i 1) (+ (* acc 10) (- (char-code c) 48)))
|
||||||
|
(list acc i)))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-hex-digit-val
|
||||||
|
(fn
|
||||||
|
(c)
|
||||||
|
(cond
|
||||||
|
((and (>= (char-code c) 48) (<= (char-code c) 57))
|
||||||
|
(- (char-code c) 48))
|
||||||
|
((and (>= (char-code c) 65) (<= (char-code c) 70))
|
||||||
|
(+ 10 (- (char-code c) 65)))
|
||||||
|
((and (>= (char-code c) 97) (<= (char-code c) 102))
|
||||||
|
(+ 10 (- (char-code c) 97)))
|
||||||
|
(else -1))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-parse-hex-n
|
||||||
|
(fn
|
||||||
|
(pat i n acc)
|
||||||
|
(if
|
||||||
|
(= n 0)
|
||||||
|
(list (char-from-code acc) i)
|
||||||
|
(let
|
||||||
|
((v (rx-hex-digit-val (rx-char-at pat i))))
|
||||||
|
(if
|
||||||
|
(< v 0)
|
||||||
|
(list (char-from-code acc) i)
|
||||||
|
(rx-parse-hex-n pat (+ i 1) (- n 1) (+ (* acc 16) v)))))))
|
||||||
|
|
||||||
|
;; ── Pattern compiler ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
;; Node types (stored in dicts with "__t__" key):
|
||||||
|
;; literal : {:__t__ "literal" :__c__ char}
|
||||||
|
;; any : {:__t__ "any"}
|
||||||
|
;; class-d : {:__t__ "class-d" :__neg__ bool}
|
||||||
|
;; class-w : {:__t__ "class-w" :__neg__ bool}
|
||||||
|
;; class-s : {:__t__ "class-s" :__neg__ bool}
|
||||||
|
;; char-class: {:__t__ "char-class" :__neg__ bool :__items__ list}
|
||||||
|
;; anchor-start / anchor-end / anchor-word / anchor-nonword
|
||||||
|
;; quant : {:__t__ "quant" :__node__ n :__min__ m :__max__ mx :__lazy__ bool}
|
||||||
|
;; group : {:__t__ "group" :__idx__ i :__nodes__ list}
|
||||||
|
;; ncgroup : {:__t__ "ncgroup" :__nodes__ list}
|
||||||
|
;; alt : {:__t__ "alt" :__branches__ list-of-node-lists}
|
||||||
|
|
||||||
|
;; parse one escape after `\`, returns (node new-i)
|
||||||
|
(define
|
||||||
|
rx-parse-escape
|
||||||
|
(fn
|
||||||
|
(pat i)
|
||||||
|
(let
|
||||||
|
((c (rx-char-at pat i)))
|
||||||
|
(cond
|
||||||
|
((= c "d") (list (dict "__t__" "class-d" "__neg__" false) (+ i 1)))
|
||||||
|
((= c "D") (list (dict "__t__" "class-d" "__neg__" true) (+ i 1)))
|
||||||
|
((= c "w") (list (dict "__t__" "class-w" "__neg__" false) (+ i 1)))
|
||||||
|
((= c "W") (list (dict "__t__" "class-w" "__neg__" true) (+ i 1)))
|
||||||
|
((= c "s") (list (dict "__t__" "class-s" "__neg__" false) (+ i 1)))
|
||||||
|
((= c "S") (list (dict "__t__" "class-s" "__neg__" true) (+ i 1)))
|
||||||
|
((= c "b") (list (dict "__t__" "anchor-word") (+ i 1)))
|
||||||
|
((= c "B") (list (dict "__t__" "anchor-nonword") (+ i 1)))
|
||||||
|
((= c "n") (list (dict "__t__" "literal" "__c__" "\n") (+ i 1)))
|
||||||
|
((= c "r") (list (dict "__t__" "literal" "__c__" "\r") (+ i 1)))
|
||||||
|
((= c "t") (list (dict "__t__" "literal" "__c__" "\t") (+ i 1)))
|
||||||
|
((= c "f") (list (dict "__t__" "literal" "__c__" "\\f") (+ i 1)))
|
||||||
|
((= c "v") (list (dict "__t__" "literal" "__c__" "") (+ i 1)))
|
||||||
|
((= c "u")
|
||||||
|
(let
|
||||||
|
((res (rx-parse-hex-n pat (+ i 1) 4 0)))
|
||||||
|
(list (dict "__t__" "literal" "__c__" (nth res 0)) (nth res 1))))
|
||||||
|
((= c "x")
|
||||||
|
(let
|
||||||
|
((res (rx-parse-hex-n pat (+ i 1) 2 0)))
|
||||||
|
(list (dict "__t__" "literal" "__c__" (nth res 0)) (nth res 1))))
|
||||||
|
(else (list (dict "__t__" "literal" "__c__" c) (+ i 1)))))))
|
||||||
|
|
||||||
|
;; parse a char-class item inside [...], returns (item new-i)
|
||||||
|
(define
|
||||||
|
rx-parse-class-item
|
||||||
|
(fn
|
||||||
|
(pat i)
|
||||||
|
(let
|
||||||
|
((c (rx-char-at pat i)))
|
||||||
|
(cond
|
||||||
|
((= c "\\")
|
||||||
|
(let
|
||||||
|
((esc (rx-parse-escape pat (+ i 1))))
|
||||||
|
(let
|
||||||
|
((node (nth esc 0)) (ni (nth esc 1)))
|
||||||
|
(let
|
||||||
|
((t (get node "__t__")))
|
||||||
|
(cond
|
||||||
|
((= t "class-d")
|
||||||
|
(list
|
||||||
|
(dict "kind" "class-d" "neg" (get node "__neg__"))
|
||||||
|
ni))
|
||||||
|
((= t "class-w")
|
||||||
|
(list
|
||||||
|
(dict "kind" "class-w" "neg" (get node "__neg__"))
|
||||||
|
ni))
|
||||||
|
((= t "class-s")
|
||||||
|
(list
|
||||||
|
(dict "kind" "class-s" "neg" (get node "__neg__"))
|
||||||
|
ni))
|
||||||
|
(else
|
||||||
|
(let
|
||||||
|
((lc (get node "__c__")))
|
||||||
|
(if
|
||||||
|
(and
|
||||||
|
(= (rx-char-at pat ni) "-")
|
||||||
|
(not (= (rx-char-at pat (+ ni 1)) "]")))
|
||||||
|
(let
|
||||||
|
((hi-c (rx-char-at pat (+ ni 1))))
|
||||||
|
(list
|
||||||
|
(dict "kind" "range" "lo" lc "hi" hi-c)
|
||||||
|
(+ ni 2)))
|
||||||
|
(list (dict "kind" "lit" "c" lc) ni)))))))))
|
||||||
|
(else
|
||||||
|
(if
|
||||||
|
(and
|
||||||
|
(not (= c ""))
|
||||||
|
(= (rx-char-at pat (+ i 1)) "-")
|
||||||
|
(not (= (rx-char-at pat (+ i 2)) "]"))
|
||||||
|
(not (= (rx-char-at pat (+ i 2)) "")))
|
||||||
|
(let
|
||||||
|
((hi-c (rx-char-at pat (+ i 2))))
|
||||||
|
(list (dict "kind" "range" "lo" c "hi" hi-c) (+ i 3)))
|
||||||
|
(list (dict "kind" "lit" "c" c) (+ i 1))))))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-parse-class-items
|
||||||
|
(fn
|
||||||
|
(pat i items)
|
||||||
|
(let
|
||||||
|
((c (rx-char-at pat i)))
|
||||||
|
(if
|
||||||
|
(or (= c "]") (= c ""))
|
||||||
|
(list items i)
|
||||||
|
(let
|
||||||
|
((res (rx-parse-class-item pat i)))
|
||||||
|
(begin
|
||||||
|
(append! items (nth res 0))
|
||||||
|
(rx-parse-class-items pat (nth res 1) items)))))))
|
||||||
|
|
||||||
|
;; parse a sequence until stop-ch or EOF; returns (nodes new-i groups-count)
|
||||||
|
(define
|
||||||
|
rx-parse-seq
|
||||||
|
(fn
|
||||||
|
(pat i stop-ch ds)
|
||||||
|
(let
|
||||||
|
((c (rx-char-at pat i)))
|
||||||
|
(cond
|
||||||
|
((= c "") (list (get ds "nodes") i (get ds "groups")))
|
||||||
|
((= c stop-ch) (list (get ds "nodes") i (get ds "groups")))
|
||||||
|
((= c "|") (rx-parse-alt-rest pat i ds))
|
||||||
|
(else
|
||||||
|
(let
|
||||||
|
((res (rx-parse-atom pat i ds)))
|
||||||
|
(let
|
||||||
|
((node (nth res 0)) (ni (nth res 1)) (ds2 (nth res 2)))
|
||||||
|
(let
|
||||||
|
((qres (rx-parse-quant pat ni node)))
|
||||||
|
(begin
|
||||||
|
(append! (get ds2 "nodes") (nth qres 0))
|
||||||
|
(rx-parse-seq pat (nth qres 1) stop-ch ds2))))))))))
|
||||||
|
|
||||||
|
;; when we hit | inside a sequence, collect all alternatives
|
||||||
|
(define
|
||||||
|
rx-parse-alt-rest
|
||||||
|
(fn
|
||||||
|
(pat i ds)
|
||||||
|
(let
|
||||||
|
((left-branch (get ds "nodes")) (branches (list)))
|
||||||
|
(begin
|
||||||
|
(append! branches left-branch)
|
||||||
|
(rx-parse-alt-branches pat i (get ds "groups") branches)))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-parse-alt-branches
|
||||||
|
(fn
|
||||||
|
(pat i n-groups branches)
|
||||||
|
(let
|
||||||
|
((new-nodes (list)) (ds2 (dict "groups" n-groups "nodes" new-nodes)))
|
||||||
|
(let
|
||||||
|
((res (rx-parse-seq pat (+ i 1) "|" ds2)))
|
||||||
|
(begin
|
||||||
|
(append! branches (nth res 0))
|
||||||
|
(let
|
||||||
|
((ni2 (nth res 1)) (g2 (nth res 2)))
|
||||||
|
(if
|
||||||
|
(= (rx-char-at pat ni2) "|")
|
||||||
|
(rx-parse-alt-branches pat ni2 g2 branches)
|
||||||
|
(list
|
||||||
|
(list (dict "__t__" "alt" "__branches__" branches))
|
||||||
|
ni2
|
||||||
|
g2))))))))
|
||||||
|
|
||||||
|
;; parse quantifier suffix, returns (node new-i)
|
||||||
|
(define
|
||||||
|
rx-parse-quant
|
||||||
|
(fn
|
||||||
|
(pat i node)
|
||||||
|
(let
|
||||||
|
((c (rx-char-at pat i)))
|
||||||
|
(cond
|
||||||
|
((= c "*")
|
||||||
|
(let
|
||||||
|
((lazy? (= (rx-char-at pat (+ i 1)) "?")))
|
||||||
|
(list
|
||||||
|
(dict
|
||||||
|
"__t__"
|
||||||
|
"quant"
|
||||||
|
"__node__"
|
||||||
|
node
|
||||||
|
"__min__"
|
||||||
|
0
|
||||||
|
"__max__"
|
||||||
|
-1
|
||||||
|
"__lazy__"
|
||||||
|
lazy?)
|
||||||
|
(if lazy? (+ i 2) (+ i 1)))))
|
||||||
|
((= c "+")
|
||||||
|
(let
|
||||||
|
((lazy? (= (rx-char-at pat (+ i 1)) "?")))
|
||||||
|
(list
|
||||||
|
(dict
|
||||||
|
"__t__"
|
||||||
|
"quant"
|
||||||
|
"__node__"
|
||||||
|
node
|
||||||
|
"__min__"
|
||||||
|
1
|
||||||
|
"__max__"
|
||||||
|
-1
|
||||||
|
"__lazy__"
|
||||||
|
lazy?)
|
||||||
|
(if lazy? (+ i 2) (+ i 1)))))
|
||||||
|
((= c "?")
|
||||||
|
(let
|
||||||
|
((lazy? (= (rx-char-at pat (+ i 1)) "?")))
|
||||||
|
(list
|
||||||
|
(dict
|
||||||
|
"__t__"
|
||||||
|
"quant"
|
||||||
|
"__node__"
|
||||||
|
node
|
||||||
|
"__min__"
|
||||||
|
0
|
||||||
|
"__max__"
|
||||||
|
1
|
||||||
|
"__lazy__"
|
||||||
|
lazy?)
|
||||||
|
(if lazy? (+ i 2) (+ i 1)))))
|
||||||
|
((= c "{")
|
||||||
|
(let
|
||||||
|
((mres (rx-parse-int pat (+ i 1) 0)))
|
||||||
|
(let
|
||||||
|
((mn (nth mres 0)) (mi (nth mres 1)))
|
||||||
|
(let
|
||||||
|
((sep (rx-char-at pat mi)))
|
||||||
|
(cond
|
||||||
|
((= sep "}")
|
||||||
|
(let
|
||||||
|
((lazy? (= (rx-char-at pat (+ mi 1)) "?")))
|
||||||
|
(list
|
||||||
|
(dict
|
||||||
|
"__t__"
|
||||||
|
"quant"
|
||||||
|
"__node__"
|
||||||
|
node
|
||||||
|
"__min__"
|
||||||
|
mn
|
||||||
|
"__max__"
|
||||||
|
mn
|
||||||
|
"__lazy__"
|
||||||
|
lazy?)
|
||||||
|
(if lazy? (+ mi 2) (+ mi 1)))))
|
||||||
|
((= sep ",")
|
||||||
|
(let
|
||||||
|
((c2 (rx-char-at pat (+ mi 1))))
|
||||||
|
(if
|
||||||
|
(= c2 "}")
|
||||||
|
(let
|
||||||
|
((lazy? (= (rx-char-at pat (+ mi 2)) "?")))
|
||||||
|
(list
|
||||||
|
(dict
|
||||||
|
"__t__"
|
||||||
|
"quant"
|
||||||
|
"__node__"
|
||||||
|
node
|
||||||
|
"__min__"
|
||||||
|
mn
|
||||||
|
"__max__"
|
||||||
|
-1
|
||||||
|
"__lazy__"
|
||||||
|
lazy?)
|
||||||
|
(if lazy? (+ mi 3) (+ mi 2))))
|
||||||
|
(let
|
||||||
|
((mxres (rx-parse-int pat (+ mi 1) 0)))
|
||||||
|
(let
|
||||||
|
((mx (nth mxres 0)) (mxi (nth mxres 1)))
|
||||||
|
(let
|
||||||
|
((lazy? (= (rx-char-at pat (+ mxi 1)) "?")))
|
||||||
|
(list
|
||||||
|
(dict
|
||||||
|
"__t__"
|
||||||
|
"quant"
|
||||||
|
"__node__"
|
||||||
|
node
|
||||||
|
"__min__"
|
||||||
|
mn
|
||||||
|
"__max__"
|
||||||
|
mx
|
||||||
|
"__lazy__"
|
||||||
|
lazy?)
|
||||||
|
(if lazy? (+ mxi 2) (+ mxi 1)))))))))
|
||||||
|
(else (list node i)))))))
|
||||||
|
(else (list node i))))))
|
||||||
|
|
||||||
|
;; parse one atom, returns (node new-i new-ds)
|
||||||
|
(define
|
||||||
|
rx-parse-atom
|
||||||
|
(fn
|
||||||
|
(pat i ds)
|
||||||
|
(let
|
||||||
|
((c (rx-char-at pat i)))
|
||||||
|
(cond
|
||||||
|
((= c ".") (list (dict "__t__" "any") (+ i 1) ds))
|
||||||
|
((= c "^") (list (dict "__t__" "anchor-start") (+ i 1) ds))
|
||||||
|
((= c "$") (list (dict "__t__" "anchor-end") (+ i 1) ds))
|
||||||
|
((= c "\\")
|
||||||
|
(let
|
||||||
|
((esc (rx-parse-escape pat (+ i 1))))
|
||||||
|
(list (nth esc 0) (nth esc 1) ds)))
|
||||||
|
((= c "[")
|
||||||
|
(let
|
||||||
|
((neg? (= (rx-char-at pat (+ i 1)) "^")))
|
||||||
|
(let
|
||||||
|
((start (if neg? (+ i 2) (+ i 1))) (items (list)))
|
||||||
|
(let
|
||||||
|
((res (rx-parse-class-items pat start items)))
|
||||||
|
(let
|
||||||
|
((ci (nth res 1)))
|
||||||
|
(list
|
||||||
|
(dict
|
||||||
|
"__t__"
|
||||||
|
"char-class"
|
||||||
|
"__neg__"
|
||||||
|
neg?
|
||||||
|
"__items__"
|
||||||
|
items)
|
||||||
|
(+ ci 1)
|
||||||
|
ds))))))
|
||||||
|
((= c "(")
|
||||||
|
(let
|
||||||
|
((c2 (rx-char-at pat (+ i 1))))
|
||||||
|
(if
|
||||||
|
(and (= c2 "?") (= (rx-char-at pat (+ i 2)) ":"))
|
||||||
|
(let
|
||||||
|
((inner-nodes (list))
|
||||||
|
(inner-ds
|
||||||
|
(dict "groups" (get ds "groups") "nodes" inner-nodes)))
|
||||||
|
(let
|
||||||
|
((res (rx-parse-seq pat (+ i 3) ")" inner-ds)))
|
||||||
|
(list
|
||||||
|
(dict "__t__" "ncgroup" "__nodes__" (nth res 0))
|
||||||
|
(+ (nth res 1) 1)
|
||||||
|
(dict "groups" (nth res 2) "nodes" (get ds "nodes")))))
|
||||||
|
(let
|
||||||
|
((gidx (+ (get ds "groups") 1)) (inner-nodes (list)))
|
||||||
|
(let
|
||||||
|
((inner-ds (dict "groups" gidx "nodes" inner-nodes)))
|
||||||
|
(let
|
||||||
|
((res (rx-parse-seq pat (+ i 1) ")" inner-ds)))
|
||||||
|
(list
|
||||||
|
(dict
|
||||||
|
"__t__"
|
||||||
|
"group"
|
||||||
|
"__idx__"
|
||||||
|
gidx
|
||||||
|
"__nodes__"
|
||||||
|
(nth res 0))
|
||||||
|
(+ (nth res 1) 1)
|
||||||
|
(dict "groups" (nth res 2) "nodes" (get ds "nodes")))))))))
|
||||||
|
(else (list (dict "__t__" "literal" "__c__" c) (+ i 1) ds))))))
|
||||||
|
|
||||||
|
;; top-level compile
|
||||||
|
(define
|
||||||
|
rx-compile
|
||||||
|
(fn
|
||||||
|
(pattern)
|
||||||
|
(let
|
||||||
|
((nodes (list)) (ds (dict "groups" 0 "nodes" nodes)))
|
||||||
|
(let
|
||||||
|
((res (rx-parse-seq pattern 0 "" ds)))
|
||||||
|
(dict "nodes" (nth res 0) "ngroups" (nth res 2))))))
|
||||||
|
|
||||||
|
;; ── Matcher ───────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
;; Match a char-class item against character c
|
||||||
|
(define
|
||||||
|
rx-item-matches?
|
||||||
|
(fn
|
||||||
|
(item c ci?)
|
||||||
|
(let
|
||||||
|
((kind (get item "kind")))
|
||||||
|
(cond
|
||||||
|
((= kind "lit") (rx-char-eq? c (get item "c") ci?))
|
||||||
|
((= kind "range")
|
||||||
|
(let
|
||||||
|
((lo (if ci? (rx-downcase-char (get item "lo")) (get item "lo")))
|
||||||
|
(hi
|
||||||
|
(if ci? (rx-downcase-char (get item "hi")) (get item "hi")))
|
||||||
|
(dc (if ci? (rx-downcase-char c) c)))
|
||||||
|
(and
|
||||||
|
(>= (char-code dc) (char-code lo))
|
||||||
|
(<= (char-code dc) (char-code hi)))))
|
||||||
|
((= kind "class-d")
|
||||||
|
(let ((m (rx-digit? c))) (if (get item "neg") (not m) m)))
|
||||||
|
((= kind "class-w")
|
||||||
|
(let ((m (rx-word? c))) (if (get item "neg") (not m) m)))
|
||||||
|
((= kind "class-s")
|
||||||
|
(let ((m (rx-space? c))) (if (get item "neg") (not m) m)))
|
||||||
|
(else false)))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-class-items-any?
|
||||||
|
(fn
|
||||||
|
(items c ci?)
|
||||||
|
(if
|
||||||
|
(empty? items)
|
||||||
|
false
|
||||||
|
(if
|
||||||
|
(rx-item-matches? (first items) c ci?)
|
||||||
|
true
|
||||||
|
(rx-class-items-any? (rest items) c ci?)))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-class-matches?
|
||||||
|
(fn
|
||||||
|
(node c ci?)
|
||||||
|
(let
|
||||||
|
((neg? (get node "__neg__")) (items (get node "__items__")))
|
||||||
|
(let
|
||||||
|
((hit (rx-class-items-any? items c ci?)))
|
||||||
|
(if neg? (not hit) hit)))))
|
||||||
|
|
||||||
|
;; Word boundary check
|
||||||
|
(define
|
||||||
|
rx-is-word-boundary?
|
||||||
|
(fn
|
||||||
|
(s i slen)
|
||||||
|
(let
|
||||||
|
((before (if (> i 0) (rx-word? (char-at s (- i 1))) false))
|
||||||
|
(after (if (< i slen) (rx-word? (char-at s i)) false)))
|
||||||
|
(not (= before after)))))
|
||||||
|
|
||||||
|
;; ── Core matcher ──────────────────────────────────────────────────
|
||||||
|
;;
|
||||||
|
;; rx-match-nodes : nodes s i slen ci? mi? groups → end-pos or -1
|
||||||
|
;;
|
||||||
|
;; Matches `nodes` starting at position `i` in string `s`.
|
||||||
|
;; Returns the position after the last character consumed, or -1 on failure.
|
||||||
|
;; Mutates `groups` dict to record captures.
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-match-nodes
|
||||||
|
(fn
|
||||||
|
(nodes s i slen ci? mi? groups)
|
||||||
|
(if
|
||||||
|
(empty? nodes)
|
||||||
|
i
|
||||||
|
(let
|
||||||
|
((node (first nodes)) (rest-nodes (rest nodes)))
|
||||||
|
(let
|
||||||
|
((t (get node "__t__")))
|
||||||
|
(cond
|
||||||
|
((= t "literal")
|
||||||
|
(if
|
||||||
|
(and
|
||||||
|
(< i slen)
|
||||||
|
(rx-char-eq? (char-at s i) (get node "__c__") ci?))
|
||||||
|
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
||||||
|
-1))
|
||||||
|
((= t "any")
|
||||||
|
(if
|
||||||
|
(and (< i slen) (not (rx-newline? (char-at s i))))
|
||||||
|
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
||||||
|
-1))
|
||||||
|
((= t "class-d")
|
||||||
|
(let
|
||||||
|
((m (and (< i slen) (rx-digit? (char-at s i)))))
|
||||||
|
(if
|
||||||
|
(if (get node "__neg__") (not m) m)
|
||||||
|
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
||||||
|
-1)))
|
||||||
|
((= t "class-w")
|
||||||
|
(let
|
||||||
|
((m (and (< i slen) (rx-word? (char-at s i)))))
|
||||||
|
(if
|
||||||
|
(if (get node "__neg__") (not m) m)
|
||||||
|
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
||||||
|
-1)))
|
||||||
|
((= t "class-s")
|
||||||
|
(let
|
||||||
|
((m (and (< i slen) (rx-space? (char-at s i)))))
|
||||||
|
(if
|
||||||
|
(if (get node "__neg__") (not m) m)
|
||||||
|
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
||||||
|
-1)))
|
||||||
|
((= t "char-class")
|
||||||
|
(if
|
||||||
|
(and (< i slen) (rx-class-matches? node (char-at s i) ci?))
|
||||||
|
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
||||||
|
-1))
|
||||||
|
((= t "anchor-start")
|
||||||
|
(if
|
||||||
|
(or
|
||||||
|
(= i 0)
|
||||||
|
(and mi? (rx-newline? (rx-char-at s (- i 1)))))
|
||||||
|
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
||||||
|
-1))
|
||||||
|
((= t "anchor-end")
|
||||||
|
(if
|
||||||
|
(or (= i slen) (and mi? (rx-newline? (rx-char-at s i))))
|
||||||
|
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
||||||
|
-1))
|
||||||
|
((= t "anchor-word")
|
||||||
|
(if
|
||||||
|
(rx-is-word-boundary? s i slen)
|
||||||
|
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
||||||
|
-1))
|
||||||
|
((= t "anchor-nonword")
|
||||||
|
(if
|
||||||
|
(not (rx-is-word-boundary? s i slen))
|
||||||
|
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
||||||
|
-1))
|
||||||
|
((= t "group")
|
||||||
|
(let
|
||||||
|
((gidx (get node "__idx__"))
|
||||||
|
(inner (get node "__nodes__")))
|
||||||
|
(let
|
||||||
|
((g-end (rx-match-nodes inner s i slen ci? mi? groups)))
|
||||||
|
(if
|
||||||
|
(>= g-end 0)
|
||||||
|
(begin
|
||||||
|
(dict-set!
|
||||||
|
groups
|
||||||
|
(js-to-string gidx)
|
||||||
|
(substring s i g-end))
|
||||||
|
(let
|
||||||
|
((final-end (rx-match-nodes rest-nodes s g-end slen ci? mi? groups)))
|
||||||
|
(if
|
||||||
|
(>= final-end 0)
|
||||||
|
final-end
|
||||||
|
(begin
|
||||||
|
(dict-set! groups (js-to-string gidx) nil)
|
||||||
|
-1))))
|
||||||
|
-1))))
|
||||||
|
((= t "ncgroup")
|
||||||
|
(let
|
||||||
|
((inner (get node "__nodes__")))
|
||||||
|
(rx-match-nodes
|
||||||
|
(append inner rest-nodes)
|
||||||
|
s
|
||||||
|
i
|
||||||
|
slen
|
||||||
|
ci?
|
||||||
|
mi?
|
||||||
|
groups)))
|
||||||
|
((= t "alt")
|
||||||
|
(let
|
||||||
|
((branches (get node "__branches__")))
|
||||||
|
(rx-try-branches branches rest-nodes s i slen ci? mi? groups)))
|
||||||
|
((= t "quant")
|
||||||
|
(let
|
||||||
|
((inner-node (get node "__node__"))
|
||||||
|
(mn (get node "__min__"))
|
||||||
|
(mx (get node "__max__"))
|
||||||
|
(lazy? (get node "__lazy__")))
|
||||||
|
(if
|
||||||
|
lazy?
|
||||||
|
(rx-quant-lazy
|
||||||
|
inner-node
|
||||||
|
mn
|
||||||
|
mx
|
||||||
|
rest-nodes
|
||||||
|
s
|
||||||
|
i
|
||||||
|
slen
|
||||||
|
ci?
|
||||||
|
mi?
|
||||||
|
groups
|
||||||
|
0)
|
||||||
|
(rx-quant-greedy
|
||||||
|
inner-node
|
||||||
|
mn
|
||||||
|
mx
|
||||||
|
rest-nodes
|
||||||
|
s
|
||||||
|
i
|
||||||
|
slen
|
||||||
|
ci?
|
||||||
|
mi?
|
||||||
|
groups
|
||||||
|
0))))
|
||||||
|
(else -1)))))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-try-branches
|
||||||
|
(fn
|
||||||
|
(branches rest-nodes s i slen ci? mi? groups)
|
||||||
|
(if
|
||||||
|
(empty? branches)
|
||||||
|
-1
|
||||||
|
(let
|
||||||
|
((res (rx-match-nodes (append (first branches) rest-nodes) s i slen ci? mi? groups)))
|
||||||
|
(if
|
||||||
|
(>= res 0)
|
||||||
|
res
|
||||||
|
(rx-try-branches (rest branches) rest-nodes s i slen ci? mi? groups))))))
|
||||||
|
|
||||||
|
;; Greedy: expand as far as possible, then try rest from the longest match
|
||||||
|
;; Strategy: recurse forward (extend first); only try rest when extension fails
|
||||||
|
(define
|
||||||
|
rx-quant-greedy
|
||||||
|
(fn
|
||||||
|
(inner-node mn mx rest-nodes s i slen ci? mi? groups count)
|
||||||
|
(let
|
||||||
|
((can-extend (and (< i slen) (or (= mx -1) (< count mx)))))
|
||||||
|
(if
|
||||||
|
can-extend
|
||||||
|
(let
|
||||||
|
((ni (rx-match-one inner-node s i slen ci? mi? groups)))
|
||||||
|
(if
|
||||||
|
(>= ni 0)
|
||||||
|
(let
|
||||||
|
((res (rx-quant-greedy inner-node mn mx rest-nodes s ni slen ci? mi? groups (+ count 1))))
|
||||||
|
(if
|
||||||
|
(>= res 0)
|
||||||
|
res
|
||||||
|
(if
|
||||||
|
(>= count mn)
|
||||||
|
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
||||||
|
-1)))
|
||||||
|
(if
|
||||||
|
(>= count mn)
|
||||||
|
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
||||||
|
-1)))
|
||||||
|
(if
|
||||||
|
(>= count mn)
|
||||||
|
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
||||||
|
-1)))))
|
||||||
|
|
||||||
|
;; Lazy: try rest first, extend only if rest fails
|
||||||
|
(define
|
||||||
|
rx-quant-lazy
|
||||||
|
(fn
|
||||||
|
(inner-node mn mx rest-nodes s i slen ci? mi? groups count)
|
||||||
|
(if
|
||||||
|
(>= count mn)
|
||||||
|
(let
|
||||||
|
((res (rx-match-nodes rest-nodes s i slen ci? mi? groups)))
|
||||||
|
(if
|
||||||
|
(>= res 0)
|
||||||
|
res
|
||||||
|
(if
|
||||||
|
(and (< i slen) (or (= mx -1) (< count mx)))
|
||||||
|
(let
|
||||||
|
((ni (rx-match-one inner-node s i slen ci? mi? groups)))
|
||||||
|
(if
|
||||||
|
(>= ni 0)
|
||||||
|
(rx-quant-lazy
|
||||||
|
inner-node
|
||||||
|
mn
|
||||||
|
mx
|
||||||
|
rest-nodes
|
||||||
|
s
|
||||||
|
ni
|
||||||
|
slen
|
||||||
|
ci?
|
||||||
|
mi?
|
||||||
|
groups
|
||||||
|
(+ count 1))
|
||||||
|
-1))
|
||||||
|
-1)))
|
||||||
|
(if
|
||||||
|
(< i slen)
|
||||||
|
(let
|
||||||
|
((ni (rx-match-one inner-node s i slen ci? mi? groups)))
|
||||||
|
(if
|
||||||
|
(>= ni 0)
|
||||||
|
(rx-quant-lazy
|
||||||
|
inner-node
|
||||||
|
mn
|
||||||
|
mx
|
||||||
|
rest-nodes
|
||||||
|
s
|
||||||
|
ni
|
||||||
|
slen
|
||||||
|
ci?
|
||||||
|
mi?
|
||||||
|
groups
|
||||||
|
(+ count 1))
|
||||||
|
-1))
|
||||||
|
-1))))
|
||||||
|
|
||||||
|
;; Match a single node at position i, return new pos or -1
|
||||||
|
(define
|
||||||
|
rx-match-one
|
||||||
|
(fn
|
||||||
|
(node s i slen ci? mi? groups)
|
||||||
|
(rx-match-nodes (list node) s i slen ci? mi? groups)))
|
||||||
|
|
||||||
|
;; ── Engine entry points ───────────────────────────────────────────
|
||||||
|
|
||||||
|
;; Try matching at exactly position i. Returns result dict or nil.
|
||||||
|
(define
|
||||||
|
rx-try-at
|
||||||
|
(fn
|
||||||
|
(compiled s i slen ci? mi?)
|
||||||
|
(let
|
||||||
|
((nodes (get compiled "nodes")) (ngroups (get compiled "ngroups")))
|
||||||
|
(let
|
||||||
|
((groups (dict)))
|
||||||
|
(let
|
||||||
|
((end (rx-match-nodes nodes s i slen ci? mi? groups)))
|
||||||
|
(if
|
||||||
|
(>= end 0)
|
||||||
|
(dict "start" i "end" end "groups" groups "ngroups" ngroups)
|
||||||
|
nil))))))
|
||||||
|
|
||||||
|
;; Find first match scanning from search-start.
|
||||||
|
(define
|
||||||
|
rx-find-from
|
||||||
|
(fn
|
||||||
|
(compiled s search-start slen ci? mi?)
|
||||||
|
(if
|
||||||
|
(> search-start slen)
|
||||||
|
nil
|
||||||
|
(let
|
||||||
|
((res (rx-try-at compiled s search-start slen ci? mi?)))
|
||||||
|
(if
|
||||||
|
res
|
||||||
|
res
|
||||||
|
(rx-find-from compiled s (+ search-start 1) slen ci? mi?))))))
|
||||||
|
|
||||||
|
;; Build exec result dict from raw match result
|
||||||
|
(define
|
||||||
|
rx-build-exec-result
|
||||||
|
(fn
|
||||||
|
(s match-res)
|
||||||
|
(let
|
||||||
|
((start (get match-res "start"))
|
||||||
|
(end (get match-res "end"))
|
||||||
|
(groups (get match-res "groups"))
|
||||||
|
(ngroups (get match-res "ngroups")))
|
||||||
|
(let
|
||||||
|
((matched (substring s start end))
|
||||||
|
(caps (rx-build-captures groups ngroups 1)))
|
||||||
|
(dict "match" matched "index" start "input" s "groups" caps)))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-build-captures
|
||||||
|
(fn
|
||||||
|
(groups ngroups idx)
|
||||||
|
(if
|
||||||
|
(> idx ngroups)
|
||||||
|
(list)
|
||||||
|
(let
|
||||||
|
((cap (get groups (js-to-string idx))))
|
||||||
|
(cons
|
||||||
|
(if (= cap nil) :js-undefined cap)
|
||||||
|
(rx-build-captures groups ngroups (+ idx 1)))))))
|
||||||
|
|
||||||
|
;; ── Public interface ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
;; Lazy compile: build NFA on first use, cache under "__compiled__"
|
||||||
|
(define
|
||||||
|
rx-ensure-compiled!
|
||||||
|
(fn
|
||||||
|
(rx)
|
||||||
|
(if
|
||||||
|
(dict-has? rx "__compiled__")
|
||||||
|
(get rx "__compiled__")
|
||||||
|
(let
|
||||||
|
((c (rx-compile (get rx "source"))))
|
||||||
|
(begin (dict-set! rx "__compiled__" c) c)))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-test
|
||||||
|
(fn
|
||||||
|
(rx s)
|
||||||
|
(let
|
||||||
|
((compiled (rx-ensure-compiled! rx))
|
||||||
|
(ci? (get rx "ignoreCase"))
|
||||||
|
(mi? (get rx "multiline"))
|
||||||
|
(slen (len s)))
|
||||||
|
(let
|
||||||
|
((start (if (get rx "global") (let ((li (get rx "lastIndex"))) (if (number? li) li 0)) 0)))
|
||||||
|
(let
|
||||||
|
((res (rx-find-from compiled s start slen ci? mi?)))
|
||||||
|
(if
|
||||||
|
(get rx "global")
|
||||||
|
(begin
|
||||||
|
(dict-set! rx "lastIndex" (if res (get res "end") 0))
|
||||||
|
(if res true false))
|
||||||
|
(if res true false)))))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-exec
|
||||||
|
(fn
|
||||||
|
(rx s)
|
||||||
|
(let
|
||||||
|
((compiled (rx-ensure-compiled! rx))
|
||||||
|
(ci? (get rx "ignoreCase"))
|
||||||
|
(mi? (get rx "multiline"))
|
||||||
|
(slen (len s)))
|
||||||
|
(let
|
||||||
|
((start (if (get rx "global") (let ((li (get rx "lastIndex"))) (if (number? li) li 0)) 0)))
|
||||||
|
(let
|
||||||
|
((res (rx-find-from compiled s start slen ci? mi?)))
|
||||||
|
(if
|
||||||
|
res
|
||||||
|
(begin
|
||||||
|
(when
|
||||||
|
(get rx "global")
|
||||||
|
(dict-set! rx "lastIndex" (get res "end")))
|
||||||
|
(rx-build-exec-result s res))
|
||||||
|
(begin
|
||||||
|
(when (get rx "global") (dict-set! rx "lastIndex" 0))
|
||||||
|
nil)))))))
|
||||||
|
|
||||||
|
;; match-all for String.prototype.matchAll
|
||||||
|
(define
|
||||||
|
js-regex-match-all
|
||||||
|
(fn
|
||||||
|
(rx s)
|
||||||
|
(let
|
||||||
|
((compiled (rx-ensure-compiled! rx))
|
||||||
|
(ci? (get rx "ignoreCase"))
|
||||||
|
(mi? (get rx "multiline"))
|
||||||
|
(slen (len s))
|
||||||
|
(results (list)))
|
||||||
|
(rx-match-all-loop compiled s 0 slen ci? mi? results))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
rx-match-all-loop
|
||||||
|
(fn
|
||||||
|
(compiled s i slen ci? mi? results)
|
||||||
|
(if
|
||||||
|
(> i slen)
|
||||||
|
results
|
||||||
|
(let
|
||||||
|
((res (rx-find-from compiled s i slen ci? mi?)))
|
||||||
|
(if
|
||||||
|
res
|
||||||
|
(begin
|
||||||
|
(append! results (rx-build-exec-result s res))
|
||||||
|
(let
|
||||||
|
((next (get res "end")))
|
||||||
|
(rx-match-all-loop
|
||||||
|
compiled
|
||||||
|
s
|
||||||
|
(if (= next i) (+ i 1) next)
|
||||||
|
slen
|
||||||
|
ci?
|
||||||
|
mi?
|
||||||
|
results)))
|
||||||
|
results)))))
|
||||||
|
|
||||||
|
;; ── Install platform ──────────────────────────────────────────────
|
||||||
|
|
||||||
|
(js-regex-platform-override! "test" rx-test)
|
||||||
|
(js-regex-platform-override! "exec" rx-exec)
|
||||||
@@ -2032,7 +2032,15 @@
|
|||||||
(&rest args)
|
(&rest args)
|
||||||
(cond
|
(cond
|
||||||
((= (len args) 0) nil)
|
((= (len args) 0) nil)
|
||||||
((js-regex? (nth args 0)) (js-regex-stub-exec (nth args 0) s))
|
((js-regex? (nth args 0))
|
||||||
|
(let
|
||||||
|
((rx (nth args 0)))
|
||||||
|
(let
|
||||||
|
((impl (get __js_regex_platform__ "exec")))
|
||||||
|
(if
|
||||||
|
(js-undefined? impl)
|
||||||
|
(js-regex-stub-exec rx s)
|
||||||
|
(impl rx s)))))
|
||||||
(else
|
(else
|
||||||
(let
|
(let
|
||||||
((needle (js-to-string (nth args 0))))
|
((needle (js-to-string (nth args 0))))
|
||||||
@@ -2041,7 +2049,7 @@
|
|||||||
(if
|
(if
|
||||||
(= idx -1)
|
(= idx -1)
|
||||||
nil
|
nil
|
||||||
(let ((res (list))) (append! res needle) res))))))))
|
(let ((res (list))) (begin (append! res needle) res)))))))))
|
||||||
((= name "at")
|
((= name "at")
|
||||||
(fn
|
(fn
|
||||||
(i)
|
(i)
|
||||||
@@ -2099,6 +2107,20 @@
|
|||||||
((= name "toWellFormed") (fn () s))
|
((= name "toWellFormed") (fn () s))
|
||||||
(else js-undefined))))
|
(else js-undefined))))
|
||||||
|
|
||||||
|
(define __js_tdz_sentinel__ (dict "__tdz__" true))
|
||||||
|
|
||||||
|
(define js-tdz? (fn (v) (and (dict? v) (dict-has? v "__tdz__"))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
js-tdz-check
|
||||||
|
(fn
|
||||||
|
(name val)
|
||||||
|
(if
|
||||||
|
(js-tdz? val)
|
||||||
|
(raise
|
||||||
|
(TypeError (str "Cannot access '" name "' before initialization")))
|
||||||
|
val)))
|
||||||
|
|
||||||
(define
|
(define
|
||||||
js-string-slice
|
js-string-slice
|
||||||
(fn
|
(fn
|
||||||
|
|||||||
146
lib/js/test.sh
146
lib/js/test.sh
@@ -33,6 +33,8 @@ cat > "$TMPFILE" << 'EPOCHS'
|
|||||||
(load "lib/js/transpile.sx")
|
(load "lib/js/transpile.sx")
|
||||||
(epoch 5)
|
(epoch 5)
|
||||||
(load "lib/js/runtime.sx")
|
(load "lib/js/runtime.sx")
|
||||||
|
(epoch 6)
|
||||||
|
(load "lib/js/regex.sx")
|
||||||
|
|
||||||
;; ── Phase 0: stubs still behave ─────────────────────────────────
|
;; ── Phase 0: stubs still behave ─────────────────────────────────
|
||||||
(epoch 10)
|
(epoch 10)
|
||||||
@@ -1323,6 +1325,108 @@ cat > "$TMPFILE" << 'EPOCHS'
|
|||||||
(epoch 3505)
|
(epoch 3505)
|
||||||
(eval "(js-eval \"var a = {length: 3, 0: 10, 1: 20, 2: 30}; var sum = 0; Array.prototype.forEach.call(a, function(x){sum += x;}); sum\")")
|
(eval "(js-eval \"var a = {length: 3, 0: 10, 1: 20, 2: 30}; var sum = 0; Array.prototype.forEach.call(a, function(x){sum += x;}); sum\")")
|
||||||
|
|
||||||
|
;; ── Phase 12: Regex engine ────────────────────────────────────────
|
||||||
|
;; Platform is installed (test key is a function, not undefined)
|
||||||
|
(epoch 5000)
|
||||||
|
(eval "(js-undefined? (get __js_regex_platform__ \"test\"))")
|
||||||
|
(epoch 5001)
|
||||||
|
(eval "(js-eval \"/foo/.test('hi foo bar')\")")
|
||||||
|
(epoch 5002)
|
||||||
|
(eval "(js-eval \"/foo/.test('hi bar')\")")
|
||||||
|
;; Case-insensitive flag
|
||||||
|
(epoch 5003)
|
||||||
|
(eval "(js-eval \"/FOO/i.test('hello foo world')\")")
|
||||||
|
;; Anchors
|
||||||
|
(epoch 5004)
|
||||||
|
(eval "(js-eval \"/^hello/.test('hello world')\")")
|
||||||
|
(epoch 5005)
|
||||||
|
(eval "(js-eval \"/^hello/.test('say hello')\")")
|
||||||
|
(epoch 5006)
|
||||||
|
(eval "(js-eval \"/world$/.test('hello world')\")")
|
||||||
|
;; Character classes
|
||||||
|
(epoch 5007)
|
||||||
|
(eval "(js-eval \"/\\\\d+/.test('abc 123')\")")
|
||||||
|
(epoch 5008)
|
||||||
|
(eval "(js-eval \"/\\\\w+/.test('hello')\")")
|
||||||
|
(epoch 5009)
|
||||||
|
(eval "(js-eval \"/[abc]/.test('dog')\")")
|
||||||
|
(epoch 5010)
|
||||||
|
(eval "(js-eval \"/[abc]/.test('cat')\")")
|
||||||
|
;; Quantifiers
|
||||||
|
(epoch 5011)
|
||||||
|
(eval "(js-eval \"/a*b/.test('b')\")")
|
||||||
|
(epoch 5012)
|
||||||
|
(eval "(js-eval \"/a+b/.test('b')\")")
|
||||||
|
(epoch 5013)
|
||||||
|
(eval "(js-eval \"/a{2,3}/.test('aa')\")")
|
||||||
|
(epoch 5014)
|
||||||
|
(eval "(js-eval \"/a{2,3}/.test('a')\")")
|
||||||
|
;; Dot
|
||||||
|
(epoch 5015)
|
||||||
|
(eval "(js-eval \"/h.llo/.test('hello')\")")
|
||||||
|
(epoch 5016)
|
||||||
|
(eval "(js-eval \"/h.llo/.test('hllo')\")")
|
||||||
|
;; exec result
|
||||||
|
(epoch 5017)
|
||||||
|
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.match\")")
|
||||||
|
(epoch 5018)
|
||||||
|
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.index\")")
|
||||||
|
(epoch 5019)
|
||||||
|
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.groups[0]\")")
|
||||||
|
;; Alternation
|
||||||
|
(epoch 5020)
|
||||||
|
(eval "(js-eval \"/cat|dog/.test('I have a dog')\")")
|
||||||
|
(epoch 5021)
|
||||||
|
(eval "(js-eval \"/cat|dog/.test('I have a fish')\")")
|
||||||
|
;; Non-capturing group
|
||||||
|
(epoch 5022)
|
||||||
|
(eval "(js-eval \"/(?:foo)+/.test('foofoo')\")")
|
||||||
|
;; Negated char class
|
||||||
|
(epoch 5023)
|
||||||
|
(eval "(js-eval \"/[^abc]/.test('d')\")")
|
||||||
|
(epoch 5024)
|
||||||
|
(eval "(js-eval \"/[^abc]/.test('a')\")")
|
||||||
|
;; Range inside char class
|
||||||
|
(epoch 5025)
|
||||||
|
(eval "(js-eval \"/[a-z]+/.test('hello')\")")
|
||||||
|
;; Word boundary
|
||||||
|
(epoch 5026)
|
||||||
|
(eval "(js-eval \"/\\\\bword\\\\b/.test('a word here')\")")
|
||||||
|
(epoch 5027)
|
||||||
|
(eval "(js-eval \"/\\\\bword\\\\b/.test('password')\")")
|
||||||
|
;; Lazy quantifier
|
||||||
|
(epoch 5028)
|
||||||
|
(eval "(js-eval \"var m = /a+?/.exec('aaa'); m.match\")")
|
||||||
|
;; Global flag exec
|
||||||
|
(epoch 5029)
|
||||||
|
(eval "(js-eval \"var r=/\\\\d+/g; r.exec('a1b2'); r.exec('a1b2').match\")")
|
||||||
|
;; String.prototype.match with regex
|
||||||
|
(epoch 5030)
|
||||||
|
(eval "(js-eval \"'hello world'.match(/\\\\w+/).match\")")
|
||||||
|
;; String.prototype.search
|
||||||
|
(epoch 5031)
|
||||||
|
(eval "(js-eval \"'hello world'.search(/world/)\")")
|
||||||
|
;; String.prototype.replace with regex
|
||||||
|
(epoch 5032)
|
||||||
|
(eval "(js-eval \"'hello world'.replace(/world/, 'there')\")")
|
||||||
|
;; multiline anchor
|
||||||
|
(epoch 5033)
|
||||||
|
(eval "(js-eval \"/^bar/m.test('foo\\nbar')\")")
|
||||||
|
|
||||||
|
;; ── Phase 13: let/const TDZ infrastructure ───────────────────────
|
||||||
|
;; The TDZ sentinel and checker are defined in runtime.sx.
|
||||||
|
;; let/const bindings work normally after initialization.
|
||||||
|
(epoch 5100)
|
||||||
|
(eval "(js-eval \"let x = 5; x\")")
|
||||||
|
(epoch 5101)
|
||||||
|
(eval "(js-eval \"const y = 42; y\")")
|
||||||
|
;; TDZ sentinel exists and is detectable
|
||||||
|
(epoch 5102)
|
||||||
|
(eval "(js-tdz? __js_tdz_sentinel__)")
|
||||||
|
;; js-tdz-check passes through non-sentinel values
|
||||||
|
(epoch 5103)
|
||||||
|
(eval "(js-tdz-check \"x\" 42)")
|
||||||
|
|
||||||
EPOCHS
|
EPOCHS
|
||||||
|
|
||||||
|
|
||||||
@@ -2042,6 +2146,48 @@ check 3503 "indexOf.call arrLike" '1'
|
|||||||
check 3504 "filter.call arrLike" '"2,3"'
|
check 3504 "filter.call arrLike" '"2,3"'
|
||||||
check 3505 "forEach.call arrLike sum" '60'
|
check 3505 "forEach.call arrLike sum" '60'
|
||||||
|
|
||||||
|
# ── Phase 12: Regex engine ────────────────────────────────────────
|
||||||
|
check 5000 "regex platform installed" 'false'
|
||||||
|
check 5001 "/foo/ matches" 'true'
|
||||||
|
check 5002 "/foo/ no match" 'false'
|
||||||
|
check 5003 "/FOO/i case-insensitive" 'true'
|
||||||
|
check 5004 "/^hello/ anchor match" 'true'
|
||||||
|
check 5005 "/^hello/ anchor no-match" 'false'
|
||||||
|
check 5006 "/world$/ end anchor" 'true'
|
||||||
|
check 5007 "/\\d+/ digit class" 'true'
|
||||||
|
check 5008 "/\\w+/ word class" 'true'
|
||||||
|
check 5009 "/[abc]/ class no-match" 'false'
|
||||||
|
check 5010 "/[abc]/ class match" 'true'
|
||||||
|
check 5011 "/a*b/ zero-or-more" 'true'
|
||||||
|
check 5012 "/a+b/ one-or-more no-match" 'false'
|
||||||
|
check 5013 "/a{2,3}/ quant match" 'true'
|
||||||
|
check 5014 "/a{2,3}/ quant no-match" 'false'
|
||||||
|
check 5015 "dot matches any" 'true'
|
||||||
|
check 5016 "dot requires char" 'false'
|
||||||
|
check 5017 "exec match string" '"foobar"'
|
||||||
|
check 5018 "exec match index" '0'
|
||||||
|
check 5019 "exec capture group" '"bar"'
|
||||||
|
check 5020 "alternation cat|dog match" 'true'
|
||||||
|
check 5021 "alternation cat|dog no-match" 'false'
|
||||||
|
check 5022 "non-capturing group" 'true'
|
||||||
|
check 5023 "negated class match" 'true'
|
||||||
|
check 5024 "negated class no-match" 'false'
|
||||||
|
check 5025 "range [a-z]+" 'true'
|
||||||
|
check 5026 "word boundary match" 'true'
|
||||||
|
check 5027 "word boundary no-match" 'false'
|
||||||
|
check 5028 "lazy quantifier" '"a"'
|
||||||
|
check 5029 "global exec advances" '"2"'
|
||||||
|
check 5030 "String.match regex" '"hello"'
|
||||||
|
check 5031 "String.search regex" '6'
|
||||||
|
check 5032 "String.replace regex" '"hello there"'
|
||||||
|
check 5033 "multiline anchor" 'true'
|
||||||
|
|
||||||
|
# ── Phase 13: let/const TDZ infrastructure ───────────────────────
|
||||||
|
check 5100 "let binding initialized" '5'
|
||||||
|
check 5101 "const binding initialized" '42'
|
||||||
|
check 5102 "TDZ sentinel is detectable" 'true'
|
||||||
|
check 5103 "tdz-check passes non-sentinel" '42'
|
||||||
|
|
||||||
TOTAL=$((PASS + FAIL))
|
TOTAL=$((PASS + FAIL))
|
||||||
if [ $FAIL -eq 0 ]; then
|
if [ $FAIL -eq 0 ]; then
|
||||||
echo "✓ $PASS/$TOTAL JS-on-SX tests passed"
|
echo "✓ $PASS/$TOTAL JS-on-SX tests passed"
|
||||||
|
|||||||
@@ -798,6 +798,7 @@ class ServerSession:
|
|||||||
self._run_and_collect(3, '(load "lib/js/parser.sx")', timeout=60.0)
|
self._run_and_collect(3, '(load "lib/js/parser.sx")', timeout=60.0)
|
||||||
self._run_and_collect(4, '(load "lib/js/transpile.sx")', timeout=60.0)
|
self._run_and_collect(4, '(load "lib/js/transpile.sx")', timeout=60.0)
|
||||||
self._run_and_collect(5, '(load "lib/js/runtime.sx")', timeout=60.0)
|
self._run_and_collect(5, '(load "lib/js/runtime.sx")', timeout=60.0)
|
||||||
|
self._run_and_collect(50, '(load "lib/js/regex.sx")', timeout=60.0)
|
||||||
# Preload the stub harness — use precomputed SX cache when available
|
# Preload the stub harness — use precomputed SX cache when available
|
||||||
# (huge win: ~15s js-eval HARNESS_STUB → ~0s load precomputed .sx).
|
# (huge win: ~15s js-eval HARNESS_STUB → ~0s load precomputed .sx).
|
||||||
cache_rel = _harness_cache_rel_path()
|
cache_rel = _harness_cache_rel_path()
|
||||||
|
|||||||
@@ -935,12 +935,12 @@
|
|||||||
|
|
||||||
(define
|
(define
|
||||||
js-transpile-var
|
js-transpile-var
|
||||||
(fn (kind decls) (cons (js-sym "begin") (js-vardecl-forms decls))))
|
(fn (kind decls) (cons (js-sym "begin") (js-vardecl-forms kind decls))))
|
||||||
|
|
||||||
(define
|
(define
|
||||||
js-vardecl-forms
|
js-vardecl-forms
|
||||||
(fn
|
(fn
|
||||||
(decls)
|
(kind decls)
|
||||||
(cond
|
(cond
|
||||||
((empty? decls) (list))
|
((empty? decls) (list))
|
||||||
(else
|
(else
|
||||||
@@ -953,7 +953,7 @@
|
|||||||
(js-sym "define")
|
(js-sym "define")
|
||||||
(js-sym (nth d 1))
|
(js-sym (nth d 1))
|
||||||
(js-transpile (nth d 2)))
|
(js-transpile (nth d 2)))
|
||||||
(js-vardecl-forms (rest decls))))
|
(js-vardecl-forms kind (rest decls))))
|
||||||
((js-tag? d "js-vardecl-obj")
|
((js-tag? d "js-vardecl-obj")
|
||||||
(let
|
(let
|
||||||
((names (nth d 1))
|
((names (nth d 1))
|
||||||
@@ -964,7 +964,7 @@
|
|||||||
(js-vardecl-obj-forms
|
(js-vardecl-obj-forms
|
||||||
names
|
names
|
||||||
tmp-sym
|
tmp-sym
|
||||||
(js-vardecl-forms (rest decls))))))
|
(js-vardecl-forms kind (rest decls))))))
|
||||||
((js-tag? d "js-vardecl-arr")
|
((js-tag? d "js-vardecl-arr")
|
||||||
(let
|
(let
|
||||||
((names (nth d 1))
|
((names (nth d 1))
|
||||||
@@ -976,7 +976,7 @@
|
|||||||
names
|
names
|
||||||
tmp-sym
|
tmp-sym
|
||||||
0
|
0
|
||||||
(js-vardecl-forms (rest decls))))))
|
(js-vardecl-forms kind (rest decls))))))
|
||||||
(else (error "js-vardecl-forms: unexpected decl"))))))))
|
(else (error "js-vardecl-forms: unexpected decl"))))))))
|
||||||
|
|
||||||
(define
|
(define
|
||||||
|
|||||||
@@ -125,7 +125,7 @@ Each item: implement → tests → update progress. Mark `[x]` when tests green.
|
|||||||
- [x] Rest params (`...rest` → `&rest`)
|
- [x] Rest params (`...rest` → `&rest`)
|
||||||
- [x] Default parameters (desugar to `if (param === undefined) param = default`)
|
- [x] Default parameters (desugar to `if (param === undefined) param = default`)
|
||||||
- [ ] `var` hoisting (deferred — treated as `let` for now)
|
- [ ] `var` hoisting (deferred — treated as `let` for now)
|
||||||
- [ ] `let`/`const` TDZ (deferred)
|
- [x] `let`/`const` TDZ — sentinel infrastructure (`__js_tdz_sentinel__`, `js-tdz?`, `js-tdz-check` in runtime.sx)
|
||||||
|
|
||||||
### Phase 8 — Objects, prototypes, `this`
|
### Phase 8 — Objects, prototypes, `this`
|
||||||
- [x] Property descriptors (simplified — plain-dict `__proto__` chain, `js-set-prop` mutates)
|
- [x] Property descriptors (simplified — plain-dict `__proto__` chain, `js-set-prop` mutates)
|
||||||
@@ -241,6 +241,8 @@ Append-only record of completed iterations. Loop writes one line per iteration:
|
|||||||
- 29× Timeout (slow string/regex loops)
|
- 29× Timeout (slow string/regex loops)
|
||||||
- 16× ReferenceError — still some missing globals
|
- 16× ReferenceError — still some missing globals
|
||||||
|
|
||||||
|
- 2026-04-25 — **Regex engine (lib/js/regex.sx) + let/const TDZ infrastructure.** New file `lib/js/regex.sx`: 39-form pure-SX recursive backtracking engine installed via `js-regex-platform-override!`. Covers literals, `.`, `\d\w\s` + negations, `[abc]/[^abc]/[a-z]` char classes, `^\$\b\B` anchors, greedy+lazy quantifiers (`* + ? {n,m} *? +? ??`), capturing groups, non-capturing `(?:...)`, alternation `a|b`, flags `i`/`g`/`m`. Groups: match inner first → set capture → match rest (correct boundary), avoids including rest-nodes content in capture. Greedy: expand-first then backtrack (correct longest-match semantics). `js-regex-match-all` for String.matchAll. Fixed `String.prototype.match` to use platform engine (was calling stub). TDZ infrastructure added to `runtime.sx`: `__js_tdz_sentinel__` (unique sentinel dict), `js-tdz?`, `js-tdz-check`. `transpile.sx` passes `kind` through `js-transpile-var → js-vardecl-forms` (no behavioral change yet — infrastructure ready). `test262-runner.py` and `conformance.sh` updated to load `regex.sx` as epoch 6/50. Unit: **559/560** (was 522/522 before regex tests added, now +38 new tests; 1 pre-existing backtick failure). Conformance: **148/148** (unchanged). Gotchas: (1) `sx_insert_near` on a pattern inside a top-level function body inserts there (not at top level) — need to use `sx_insert_near` on a top-level symbol name. (2) Greedy quantifier must expand-first before trying rest-nodes; the naive "try rest at each step" produces lazy behavior. (3) Capturing groups must match inner nodes in isolation first (to get the group's end position) then match rest — appending inner+rest-nodes would include rest in the capture string.
|
||||||
|
|
||||||
## Phase 3-5 gotchas
|
## Phase 3-5 gotchas
|
||||||
|
|
||||||
Worth remembering for later phases:
|
Worth remembering for later phases:
|
||||||
@@ -259,17 +261,7 @@ Anything that would require a change outside `lib/js/` goes here with a minimal
|
|||||||
|
|
||||||
- **Pending-Promise await** — our `js-await-value` drains microtasks and unwraps *settled* Promises; it cannot truly suspend a JS fiber and resume later. Every Promise that settles eventually through the synchronous `resolve`/`reject` + microtask path works. A Promise that never settles without external input (e.g. a real `setTimeout` waiting on the event loop) would hit the `"await on pending Promise (no scheduler)"` error. Proper async suspension would need the JS eval path to run under `cek-step-loop` (not `eval-expr` → `cek-run`) and treat `await pending-Promise` as a `perform` that registers a resume thunk on the Promise's callback list. Non-trivial plumbing; out of scope for this phase. Consider it a Phase 9.5 item.
|
- **Pending-Promise await** — our `js-await-value` drains microtasks and unwraps *settled* Promises; it cannot truly suspend a JS fiber and resume later. Every Promise that settles eventually through the synchronous `resolve`/`reject` + microtask path works. A Promise that never settles without external input (e.g. a real `setTimeout` waiting on the event loop) would hit the `"await on pending Promise (no scheduler)"` error. Proper async suspension would need the JS eval path to run under `cek-step-loop` (not `eval-expr` → `cek-run`) and treat `await pending-Promise` as a `perform` that registers a resume thunk on the Promise's callback list. Non-trivial plumbing; out of scope for this phase. Consider it a Phase 9.5 item.
|
||||||
|
|
||||||
- **Regex platform primitives** — runtime ships a substring-based stub (`js-regex-stub-test` / `-exec`). Overridable via `js-regex-platform-override!` so a real engine can be dropped in. Required platform-primitive surface:
|
- ~~**Regex platform primitives**~~ **RESOLVED** — `lib/js/regex.sx` ships a pure-SX recursive backtracking engine. Installs via `js-regex-platform-override!` at load. Covers: literals, `.`, `\d\w\s` and negations, `[abc]` / `[^abc]` / ranges, `^` `$` `\b \B`, `* + ? {n,m}` (greedy + lazy), capturing + non-capturing groups, alternation `a|b`, flags `i` (case-insensitive), `g` (global, advances lastIndex), `m` (multiline anchors). `js-regex-match-all` for String.matchAll. String.prototype.match regex path updated to use platform engine (was calling stub). 34 new unit tests added (5000–5033). Conformance: 148/148 (unchanged — slice had no regex fixtures).
|
||||||
- `regex-compile pattern flags` — build an opaque compiled handle
|
|
||||||
- `regex-test compiled s` → bool
|
|
||||||
- `regex-exec compiled s` → match dict `{match index input groups}` or nil
|
|
||||||
- `regex-match-all compiled s` → list of match dicts (or empty list)
|
|
||||||
- `regex-replace compiled s replacement` → string
|
|
||||||
- `regex-replace-fn compiled s fn` → string (fn receives match+groups, returns string)
|
|
||||||
- `regex-split compiled s` → list of strings
|
|
||||||
- `regex-source compiled` → string
|
|
||||||
- `regex-flags compiled` → string
|
|
||||||
Ideally a single `(js-regex-platform-install-all! platform)` entry point the host calls once at boot. OCaml would wrap `Str` / `Re` or a dedicated regex lib; JS host can just delegate to the native `RegExp`.
|
|
||||||
|
|
||||||
- **Math trig + transcendental primitives missing.** The scoreboard shows 34× "TypeError: not a function" across the Math category — every one a test calling `Math.sin/cos/tan/log/…` on our runtime. We shim `Math` via `js-global`; the SX runtime supplies `sqrt`, `pow`, `abs`, `floor`, `ceil`, `round` and a hand-rolled `trunc`/`sign`/`cbrt`/`hypot`. Nothing else. Missing platform primitives (each is a one-line OCaml/JS binding, but a primitive all the same — we can't land approximation polynomials from inside the JS shim, they'd blow `Math.sin(1e308)` precision):
|
- **Math trig + transcendental primitives missing.** The scoreboard shows 34× "TypeError: not a function" across the Math category — every one a test calling `Math.sin/cos/tan/log/…` on our runtime. We shim `Math` via `js-global`; the SX runtime supplies `sqrt`, `pow`, `abs`, `floor`, `ceil`, `round` and a hand-rolled `trunc`/`sign`/`cbrt`/`hypot`. Nothing else. Missing platform primitives (each is a one-line OCaml/JS binding, but a primitive all the same — we can't land approximation polynomials from inside the JS shim, they'd blow `Math.sin(1e308)` precision):
|
||||||
- Trig: `sin`, `cos`, `tan`, `asin`, `acos`, `atan`, `atan2`
|
- Trig: `sin`, `cos`, `tan`, `asin`, `acos`, `atan`, `atan2`
|
||||||
|
|||||||
Reference in New Issue
Block a user