Compare commits
4 Commits
architectu
...
loops/tcl
| Author | SHA1 | Date | |
|---|---|---|---|
| 35aa998fcc | |||
| 6ee052593c | |||
| 1a17d8d232 | |||
| 666e29d5f0 |
@@ -49,8 +49,6 @@ trap "rm -f $TMPFILE" EXIT
|
|||||||
echo '(load "lib/js/transpile.sx")'
|
echo '(load "lib/js/transpile.sx")'
|
||||||
echo '(epoch 5)'
|
echo '(epoch 5)'
|
||||||
echo '(load "lib/js/runtime.sx")'
|
echo '(load "lib/js/runtime.sx")'
|
||||||
echo '(epoch 6)'
|
|
||||||
echo '(load "lib/js/regex.sx")'
|
|
||||||
|
|
||||||
epoch=100
|
epoch=100
|
||||||
for f in "${FIXTURES[@]}"; do
|
for f in "${FIXTURES[@]}"; do
|
||||||
|
|||||||
943
lib/js/regex.sx
943
lib/js/regex.sx
@@ -1,943 +0,0 @@
|
|||||||
;; lib/js/regex.sx — pure-SX recursive backtracking regex engine
|
|
||||||
;;
|
|
||||||
;; Installed via (js-regex-platform-override! ...) at load time.
|
|
||||||
;; Covers: character classes (\d\w\s . [abc] [^abc] [a-z]),
|
|
||||||
;; anchors (^ $ \b \B), quantifiers (* + ? {n,m} lazy variants),
|
|
||||||
;; groups (capturing + non-capturing), alternation (a|b),
|
|
||||||
;; flags: i (case-insensitive), g (global), m (multiline).
|
|
||||||
;;
|
|
||||||
;; Architecture:
|
|
||||||
;; 1. rx-parse-pattern — pattern string → compiled node list
|
|
||||||
;; 2. rx-match-nodes — recursive backtracker
|
|
||||||
;; 3. rx-exec / rx-test — public interface
|
|
||||||
;; 4. Install as {:test rx-test :exec rx-exec}
|
|
||||||
|
|
||||||
;; ── Utilities ─────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-char-at
|
|
||||||
(fn (s i) (if (and (>= i 0) (< i (len s))) (char-at s i) "")))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-digit?
|
|
||||||
(fn
|
|
||||||
(c)
|
|
||||||
(and (not (= c "")) (>= (char-code c) 48) (<= (char-code c) 57))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-word?
|
|
||||||
(fn
|
|
||||||
(c)
|
|
||||||
(and
|
|
||||||
(not (= c ""))
|
|
||||||
(or
|
|
||||||
(and (>= (char-code c) 65) (<= (char-code c) 90))
|
|
||||||
(and (>= (char-code c) 97) (<= (char-code c) 122))
|
|
||||||
(and (>= (char-code c) 48) (<= (char-code c) 57))
|
|
||||||
(= c "_")))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-space?
|
|
||||||
(fn
|
|
||||||
(c)
|
|
||||||
(or (= c " ") (= c "\t") (= c "\n") (= c "\r") (= c "\\f") (= c ""))))
|
|
||||||
|
|
||||||
(define rx-newline? (fn (c) (or (= c "\n") (= c "\r"))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-downcase-char
|
|
||||||
(fn
|
|
||||||
(c)
|
|
||||||
(let
|
|
||||||
((cc (char-code c)))
|
|
||||||
(if (and (>= cc 65) (<= cc 90)) (char-from-code (+ cc 32)) c))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-char-eq?
|
|
||||||
(fn
|
|
||||||
(a b ci?)
|
|
||||||
(if ci? (= (rx-downcase-char a) (rx-downcase-char b)) (= a b))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-parse-int
|
|
||||||
(fn
|
|
||||||
(pat i acc)
|
|
||||||
(let
|
|
||||||
((c (rx-char-at pat i)))
|
|
||||||
(if
|
|
||||||
(rx-digit? c)
|
|
||||||
(rx-parse-int pat (+ i 1) (+ (* acc 10) (- (char-code c) 48)))
|
|
||||||
(list acc i)))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-hex-digit-val
|
|
||||||
(fn
|
|
||||||
(c)
|
|
||||||
(cond
|
|
||||||
((and (>= (char-code c) 48) (<= (char-code c) 57))
|
|
||||||
(- (char-code c) 48))
|
|
||||||
((and (>= (char-code c) 65) (<= (char-code c) 70))
|
|
||||||
(+ 10 (- (char-code c) 65)))
|
|
||||||
((and (>= (char-code c) 97) (<= (char-code c) 102))
|
|
||||||
(+ 10 (- (char-code c) 97)))
|
|
||||||
(else -1))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-parse-hex-n
|
|
||||||
(fn
|
|
||||||
(pat i n acc)
|
|
||||||
(if
|
|
||||||
(= n 0)
|
|
||||||
(list (char-from-code acc) i)
|
|
||||||
(let
|
|
||||||
((v (rx-hex-digit-val (rx-char-at pat i))))
|
|
||||||
(if
|
|
||||||
(< v 0)
|
|
||||||
(list (char-from-code acc) i)
|
|
||||||
(rx-parse-hex-n pat (+ i 1) (- n 1) (+ (* acc 16) v)))))))
|
|
||||||
|
|
||||||
;; ── Pattern compiler ──────────────────────────────────────────────
|
|
||||||
|
|
||||||
;; Node types (stored in dicts with "__t__" key):
|
|
||||||
;; literal : {:__t__ "literal" :__c__ char}
|
|
||||||
;; any : {:__t__ "any"}
|
|
||||||
;; class-d : {:__t__ "class-d" :__neg__ bool}
|
|
||||||
;; class-w : {:__t__ "class-w" :__neg__ bool}
|
|
||||||
;; class-s : {:__t__ "class-s" :__neg__ bool}
|
|
||||||
;; char-class: {:__t__ "char-class" :__neg__ bool :__items__ list}
|
|
||||||
;; anchor-start / anchor-end / anchor-word / anchor-nonword
|
|
||||||
;; quant : {:__t__ "quant" :__node__ n :__min__ m :__max__ mx :__lazy__ bool}
|
|
||||||
;; group : {:__t__ "group" :__idx__ i :__nodes__ list}
|
|
||||||
;; ncgroup : {:__t__ "ncgroup" :__nodes__ list}
|
|
||||||
;; alt : {:__t__ "alt" :__branches__ list-of-node-lists}
|
|
||||||
|
|
||||||
;; parse one escape after `\`, returns (node new-i)
|
|
||||||
(define
|
|
||||||
rx-parse-escape
|
|
||||||
(fn
|
|
||||||
(pat i)
|
|
||||||
(let
|
|
||||||
((c (rx-char-at pat i)))
|
|
||||||
(cond
|
|
||||||
((= c "d") (list (dict "__t__" "class-d" "__neg__" false) (+ i 1)))
|
|
||||||
((= c "D") (list (dict "__t__" "class-d" "__neg__" true) (+ i 1)))
|
|
||||||
((= c "w") (list (dict "__t__" "class-w" "__neg__" false) (+ i 1)))
|
|
||||||
((= c "W") (list (dict "__t__" "class-w" "__neg__" true) (+ i 1)))
|
|
||||||
((= c "s") (list (dict "__t__" "class-s" "__neg__" false) (+ i 1)))
|
|
||||||
((= c "S") (list (dict "__t__" "class-s" "__neg__" true) (+ i 1)))
|
|
||||||
((= c "b") (list (dict "__t__" "anchor-word") (+ i 1)))
|
|
||||||
((= c "B") (list (dict "__t__" "anchor-nonword") (+ i 1)))
|
|
||||||
((= c "n") (list (dict "__t__" "literal" "__c__" "\n") (+ i 1)))
|
|
||||||
((= c "r") (list (dict "__t__" "literal" "__c__" "\r") (+ i 1)))
|
|
||||||
((= c "t") (list (dict "__t__" "literal" "__c__" "\t") (+ i 1)))
|
|
||||||
((= c "f") (list (dict "__t__" "literal" "__c__" "\\f") (+ i 1)))
|
|
||||||
((= c "v") (list (dict "__t__" "literal" "__c__" "") (+ i 1)))
|
|
||||||
((= c "u")
|
|
||||||
(let
|
|
||||||
((res (rx-parse-hex-n pat (+ i 1) 4 0)))
|
|
||||||
(list (dict "__t__" "literal" "__c__" (nth res 0)) (nth res 1))))
|
|
||||||
((= c "x")
|
|
||||||
(let
|
|
||||||
((res (rx-parse-hex-n pat (+ i 1) 2 0)))
|
|
||||||
(list (dict "__t__" "literal" "__c__" (nth res 0)) (nth res 1))))
|
|
||||||
(else (list (dict "__t__" "literal" "__c__" c) (+ i 1)))))))
|
|
||||||
|
|
||||||
;; parse a char-class item inside [...], returns (item new-i)
|
|
||||||
(define
|
|
||||||
rx-parse-class-item
|
|
||||||
(fn
|
|
||||||
(pat i)
|
|
||||||
(let
|
|
||||||
((c (rx-char-at pat i)))
|
|
||||||
(cond
|
|
||||||
((= c "\\")
|
|
||||||
(let
|
|
||||||
((esc (rx-parse-escape pat (+ i 1))))
|
|
||||||
(let
|
|
||||||
((node (nth esc 0)) (ni (nth esc 1)))
|
|
||||||
(let
|
|
||||||
((t (get node "__t__")))
|
|
||||||
(cond
|
|
||||||
((= t "class-d")
|
|
||||||
(list
|
|
||||||
(dict "kind" "class-d" "neg" (get node "__neg__"))
|
|
||||||
ni))
|
|
||||||
((= t "class-w")
|
|
||||||
(list
|
|
||||||
(dict "kind" "class-w" "neg" (get node "__neg__"))
|
|
||||||
ni))
|
|
||||||
((= t "class-s")
|
|
||||||
(list
|
|
||||||
(dict "kind" "class-s" "neg" (get node "__neg__"))
|
|
||||||
ni))
|
|
||||||
(else
|
|
||||||
(let
|
|
||||||
((lc (get node "__c__")))
|
|
||||||
(if
|
|
||||||
(and
|
|
||||||
(= (rx-char-at pat ni) "-")
|
|
||||||
(not (= (rx-char-at pat (+ ni 1)) "]")))
|
|
||||||
(let
|
|
||||||
((hi-c (rx-char-at pat (+ ni 1))))
|
|
||||||
(list
|
|
||||||
(dict "kind" "range" "lo" lc "hi" hi-c)
|
|
||||||
(+ ni 2)))
|
|
||||||
(list (dict "kind" "lit" "c" lc) ni)))))))))
|
|
||||||
(else
|
|
||||||
(if
|
|
||||||
(and
|
|
||||||
(not (= c ""))
|
|
||||||
(= (rx-char-at pat (+ i 1)) "-")
|
|
||||||
(not (= (rx-char-at pat (+ i 2)) "]"))
|
|
||||||
(not (= (rx-char-at pat (+ i 2)) "")))
|
|
||||||
(let
|
|
||||||
((hi-c (rx-char-at pat (+ i 2))))
|
|
||||||
(list (dict "kind" "range" "lo" c "hi" hi-c) (+ i 3)))
|
|
||||||
(list (dict "kind" "lit" "c" c) (+ i 1))))))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-parse-class-items
|
|
||||||
(fn
|
|
||||||
(pat i items)
|
|
||||||
(let
|
|
||||||
((c (rx-char-at pat i)))
|
|
||||||
(if
|
|
||||||
(or (= c "]") (= c ""))
|
|
||||||
(list items i)
|
|
||||||
(let
|
|
||||||
((res (rx-parse-class-item pat i)))
|
|
||||||
(begin
|
|
||||||
(append! items (nth res 0))
|
|
||||||
(rx-parse-class-items pat (nth res 1) items)))))))
|
|
||||||
|
|
||||||
;; parse a sequence until stop-ch or EOF; returns (nodes new-i groups-count)
|
|
||||||
(define
|
|
||||||
rx-parse-seq
|
|
||||||
(fn
|
|
||||||
(pat i stop-ch ds)
|
|
||||||
(let
|
|
||||||
((c (rx-char-at pat i)))
|
|
||||||
(cond
|
|
||||||
((= c "") (list (get ds "nodes") i (get ds "groups")))
|
|
||||||
((= c stop-ch) (list (get ds "nodes") i (get ds "groups")))
|
|
||||||
((= c "|") (rx-parse-alt-rest pat i ds))
|
|
||||||
(else
|
|
||||||
(let
|
|
||||||
((res (rx-parse-atom pat i ds)))
|
|
||||||
(let
|
|
||||||
((node (nth res 0)) (ni (nth res 1)) (ds2 (nth res 2)))
|
|
||||||
(let
|
|
||||||
((qres (rx-parse-quant pat ni node)))
|
|
||||||
(begin
|
|
||||||
(append! (get ds2 "nodes") (nth qres 0))
|
|
||||||
(rx-parse-seq pat (nth qres 1) stop-ch ds2))))))))))
|
|
||||||
|
|
||||||
;; when we hit | inside a sequence, collect all alternatives
|
|
||||||
(define
|
|
||||||
rx-parse-alt-rest
|
|
||||||
(fn
|
|
||||||
(pat i ds)
|
|
||||||
(let
|
|
||||||
((left-branch (get ds "nodes")) (branches (list)))
|
|
||||||
(begin
|
|
||||||
(append! branches left-branch)
|
|
||||||
(rx-parse-alt-branches pat i (get ds "groups") branches)))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-parse-alt-branches
|
|
||||||
(fn
|
|
||||||
(pat i n-groups branches)
|
|
||||||
(let
|
|
||||||
((new-nodes (list)) (ds2 (dict "groups" n-groups "nodes" new-nodes)))
|
|
||||||
(let
|
|
||||||
((res (rx-parse-seq pat (+ i 1) "|" ds2)))
|
|
||||||
(begin
|
|
||||||
(append! branches (nth res 0))
|
|
||||||
(let
|
|
||||||
((ni2 (nth res 1)) (g2 (nth res 2)))
|
|
||||||
(if
|
|
||||||
(= (rx-char-at pat ni2) "|")
|
|
||||||
(rx-parse-alt-branches pat ni2 g2 branches)
|
|
||||||
(list
|
|
||||||
(list (dict "__t__" "alt" "__branches__" branches))
|
|
||||||
ni2
|
|
||||||
g2))))))))
|
|
||||||
|
|
||||||
;; parse quantifier suffix, returns (node new-i)
|
|
||||||
(define
|
|
||||||
rx-parse-quant
|
|
||||||
(fn
|
|
||||||
(pat i node)
|
|
||||||
(let
|
|
||||||
((c (rx-char-at pat i)))
|
|
||||||
(cond
|
|
||||||
((= c "*")
|
|
||||||
(let
|
|
||||||
((lazy? (= (rx-char-at pat (+ i 1)) "?")))
|
|
||||||
(list
|
|
||||||
(dict
|
|
||||||
"__t__"
|
|
||||||
"quant"
|
|
||||||
"__node__"
|
|
||||||
node
|
|
||||||
"__min__"
|
|
||||||
0
|
|
||||||
"__max__"
|
|
||||||
-1
|
|
||||||
"__lazy__"
|
|
||||||
lazy?)
|
|
||||||
(if lazy? (+ i 2) (+ i 1)))))
|
|
||||||
((= c "+")
|
|
||||||
(let
|
|
||||||
((lazy? (= (rx-char-at pat (+ i 1)) "?")))
|
|
||||||
(list
|
|
||||||
(dict
|
|
||||||
"__t__"
|
|
||||||
"quant"
|
|
||||||
"__node__"
|
|
||||||
node
|
|
||||||
"__min__"
|
|
||||||
1
|
|
||||||
"__max__"
|
|
||||||
-1
|
|
||||||
"__lazy__"
|
|
||||||
lazy?)
|
|
||||||
(if lazy? (+ i 2) (+ i 1)))))
|
|
||||||
((= c "?")
|
|
||||||
(let
|
|
||||||
((lazy? (= (rx-char-at pat (+ i 1)) "?")))
|
|
||||||
(list
|
|
||||||
(dict
|
|
||||||
"__t__"
|
|
||||||
"quant"
|
|
||||||
"__node__"
|
|
||||||
node
|
|
||||||
"__min__"
|
|
||||||
0
|
|
||||||
"__max__"
|
|
||||||
1
|
|
||||||
"__lazy__"
|
|
||||||
lazy?)
|
|
||||||
(if lazy? (+ i 2) (+ i 1)))))
|
|
||||||
((= c "{")
|
|
||||||
(let
|
|
||||||
((mres (rx-parse-int pat (+ i 1) 0)))
|
|
||||||
(let
|
|
||||||
((mn (nth mres 0)) (mi (nth mres 1)))
|
|
||||||
(let
|
|
||||||
((sep (rx-char-at pat mi)))
|
|
||||||
(cond
|
|
||||||
((= sep "}")
|
|
||||||
(let
|
|
||||||
((lazy? (= (rx-char-at pat (+ mi 1)) "?")))
|
|
||||||
(list
|
|
||||||
(dict
|
|
||||||
"__t__"
|
|
||||||
"quant"
|
|
||||||
"__node__"
|
|
||||||
node
|
|
||||||
"__min__"
|
|
||||||
mn
|
|
||||||
"__max__"
|
|
||||||
mn
|
|
||||||
"__lazy__"
|
|
||||||
lazy?)
|
|
||||||
(if lazy? (+ mi 2) (+ mi 1)))))
|
|
||||||
((= sep ",")
|
|
||||||
(let
|
|
||||||
((c2 (rx-char-at pat (+ mi 1))))
|
|
||||||
(if
|
|
||||||
(= c2 "}")
|
|
||||||
(let
|
|
||||||
((lazy? (= (rx-char-at pat (+ mi 2)) "?")))
|
|
||||||
(list
|
|
||||||
(dict
|
|
||||||
"__t__"
|
|
||||||
"quant"
|
|
||||||
"__node__"
|
|
||||||
node
|
|
||||||
"__min__"
|
|
||||||
mn
|
|
||||||
"__max__"
|
|
||||||
-1
|
|
||||||
"__lazy__"
|
|
||||||
lazy?)
|
|
||||||
(if lazy? (+ mi 3) (+ mi 2))))
|
|
||||||
(let
|
|
||||||
((mxres (rx-parse-int pat (+ mi 1) 0)))
|
|
||||||
(let
|
|
||||||
((mx (nth mxres 0)) (mxi (nth mxres 1)))
|
|
||||||
(let
|
|
||||||
((lazy? (= (rx-char-at pat (+ mxi 1)) "?")))
|
|
||||||
(list
|
|
||||||
(dict
|
|
||||||
"__t__"
|
|
||||||
"quant"
|
|
||||||
"__node__"
|
|
||||||
node
|
|
||||||
"__min__"
|
|
||||||
mn
|
|
||||||
"__max__"
|
|
||||||
mx
|
|
||||||
"__lazy__"
|
|
||||||
lazy?)
|
|
||||||
(if lazy? (+ mxi 2) (+ mxi 1)))))))))
|
|
||||||
(else (list node i)))))))
|
|
||||||
(else (list node i))))))
|
|
||||||
|
|
||||||
;; parse one atom, returns (node new-i new-ds)
|
|
||||||
(define
|
|
||||||
rx-parse-atom
|
|
||||||
(fn
|
|
||||||
(pat i ds)
|
|
||||||
(let
|
|
||||||
((c (rx-char-at pat i)))
|
|
||||||
(cond
|
|
||||||
((= c ".") (list (dict "__t__" "any") (+ i 1) ds))
|
|
||||||
((= c "^") (list (dict "__t__" "anchor-start") (+ i 1) ds))
|
|
||||||
((= c "$") (list (dict "__t__" "anchor-end") (+ i 1) ds))
|
|
||||||
((= c "\\")
|
|
||||||
(let
|
|
||||||
((esc (rx-parse-escape pat (+ i 1))))
|
|
||||||
(list (nth esc 0) (nth esc 1) ds)))
|
|
||||||
((= c "[")
|
|
||||||
(let
|
|
||||||
((neg? (= (rx-char-at pat (+ i 1)) "^")))
|
|
||||||
(let
|
|
||||||
((start (if neg? (+ i 2) (+ i 1))) (items (list)))
|
|
||||||
(let
|
|
||||||
((res (rx-parse-class-items pat start items)))
|
|
||||||
(let
|
|
||||||
((ci (nth res 1)))
|
|
||||||
(list
|
|
||||||
(dict
|
|
||||||
"__t__"
|
|
||||||
"char-class"
|
|
||||||
"__neg__"
|
|
||||||
neg?
|
|
||||||
"__items__"
|
|
||||||
items)
|
|
||||||
(+ ci 1)
|
|
||||||
ds))))))
|
|
||||||
((= c "(")
|
|
||||||
(let
|
|
||||||
((c2 (rx-char-at pat (+ i 1))))
|
|
||||||
(if
|
|
||||||
(and (= c2 "?") (= (rx-char-at pat (+ i 2)) ":"))
|
|
||||||
(let
|
|
||||||
((inner-nodes (list))
|
|
||||||
(inner-ds
|
|
||||||
(dict "groups" (get ds "groups") "nodes" inner-nodes)))
|
|
||||||
(let
|
|
||||||
((res (rx-parse-seq pat (+ i 3) ")" inner-ds)))
|
|
||||||
(list
|
|
||||||
(dict "__t__" "ncgroup" "__nodes__" (nth res 0))
|
|
||||||
(+ (nth res 1) 1)
|
|
||||||
(dict "groups" (nth res 2) "nodes" (get ds "nodes")))))
|
|
||||||
(let
|
|
||||||
((gidx (+ (get ds "groups") 1)) (inner-nodes (list)))
|
|
||||||
(let
|
|
||||||
((inner-ds (dict "groups" gidx "nodes" inner-nodes)))
|
|
||||||
(let
|
|
||||||
((res (rx-parse-seq pat (+ i 1) ")" inner-ds)))
|
|
||||||
(list
|
|
||||||
(dict
|
|
||||||
"__t__"
|
|
||||||
"group"
|
|
||||||
"__idx__"
|
|
||||||
gidx
|
|
||||||
"__nodes__"
|
|
||||||
(nth res 0))
|
|
||||||
(+ (nth res 1) 1)
|
|
||||||
(dict "groups" (nth res 2) "nodes" (get ds "nodes")))))))))
|
|
||||||
(else (list (dict "__t__" "literal" "__c__" c) (+ i 1) ds))))))
|
|
||||||
|
|
||||||
;; top-level compile
|
|
||||||
(define
|
|
||||||
rx-compile
|
|
||||||
(fn
|
|
||||||
(pattern)
|
|
||||||
(let
|
|
||||||
((nodes (list)) (ds (dict "groups" 0 "nodes" nodes)))
|
|
||||||
(let
|
|
||||||
((res (rx-parse-seq pattern 0 "" ds)))
|
|
||||||
(dict "nodes" (nth res 0) "ngroups" (nth res 2))))))
|
|
||||||
|
|
||||||
;; ── Matcher ───────────────────────────────────────────────────────
|
|
||||||
|
|
||||||
;; Match a char-class item against character c
|
|
||||||
(define
|
|
||||||
rx-item-matches?
|
|
||||||
(fn
|
|
||||||
(item c ci?)
|
|
||||||
(let
|
|
||||||
((kind (get item "kind")))
|
|
||||||
(cond
|
|
||||||
((= kind "lit") (rx-char-eq? c (get item "c") ci?))
|
|
||||||
((= kind "range")
|
|
||||||
(let
|
|
||||||
((lo (if ci? (rx-downcase-char (get item "lo")) (get item "lo")))
|
|
||||||
(hi
|
|
||||||
(if ci? (rx-downcase-char (get item "hi")) (get item "hi")))
|
|
||||||
(dc (if ci? (rx-downcase-char c) c)))
|
|
||||||
(and
|
|
||||||
(>= (char-code dc) (char-code lo))
|
|
||||||
(<= (char-code dc) (char-code hi)))))
|
|
||||||
((= kind "class-d")
|
|
||||||
(let ((m (rx-digit? c))) (if (get item "neg") (not m) m)))
|
|
||||||
((= kind "class-w")
|
|
||||||
(let ((m (rx-word? c))) (if (get item "neg") (not m) m)))
|
|
||||||
((= kind "class-s")
|
|
||||||
(let ((m (rx-space? c))) (if (get item "neg") (not m) m)))
|
|
||||||
(else false)))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-class-items-any?
|
|
||||||
(fn
|
|
||||||
(items c ci?)
|
|
||||||
(if
|
|
||||||
(empty? items)
|
|
||||||
false
|
|
||||||
(if
|
|
||||||
(rx-item-matches? (first items) c ci?)
|
|
||||||
true
|
|
||||||
(rx-class-items-any? (rest items) c ci?)))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-class-matches?
|
|
||||||
(fn
|
|
||||||
(node c ci?)
|
|
||||||
(let
|
|
||||||
((neg? (get node "__neg__")) (items (get node "__items__")))
|
|
||||||
(let
|
|
||||||
((hit (rx-class-items-any? items c ci?)))
|
|
||||||
(if neg? (not hit) hit)))))
|
|
||||||
|
|
||||||
;; Word boundary check
|
|
||||||
(define
|
|
||||||
rx-is-word-boundary?
|
|
||||||
(fn
|
|
||||||
(s i slen)
|
|
||||||
(let
|
|
||||||
((before (if (> i 0) (rx-word? (char-at s (- i 1))) false))
|
|
||||||
(after (if (< i slen) (rx-word? (char-at s i)) false)))
|
|
||||||
(not (= before after)))))
|
|
||||||
|
|
||||||
;; ── Core matcher ──────────────────────────────────────────────────
|
|
||||||
;;
|
|
||||||
;; rx-match-nodes : nodes s i slen ci? mi? groups → end-pos or -1
|
|
||||||
;;
|
|
||||||
;; Matches `nodes` starting at position `i` in string `s`.
|
|
||||||
;; Returns the position after the last character consumed, or -1 on failure.
|
|
||||||
;; Mutates `groups` dict to record captures.
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-match-nodes
|
|
||||||
(fn
|
|
||||||
(nodes s i slen ci? mi? groups)
|
|
||||||
(if
|
|
||||||
(empty? nodes)
|
|
||||||
i
|
|
||||||
(let
|
|
||||||
((node (first nodes)) (rest-nodes (rest nodes)))
|
|
||||||
(let
|
|
||||||
((t (get node "__t__")))
|
|
||||||
(cond
|
|
||||||
((= t "literal")
|
|
||||||
(if
|
|
||||||
(and
|
|
||||||
(< i slen)
|
|
||||||
(rx-char-eq? (char-at s i) (get node "__c__") ci?))
|
|
||||||
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
|
||||||
-1))
|
|
||||||
((= t "any")
|
|
||||||
(if
|
|
||||||
(and (< i slen) (not (rx-newline? (char-at s i))))
|
|
||||||
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
|
||||||
-1))
|
|
||||||
((= t "class-d")
|
|
||||||
(let
|
|
||||||
((m (and (< i slen) (rx-digit? (char-at s i)))))
|
|
||||||
(if
|
|
||||||
(if (get node "__neg__") (not m) m)
|
|
||||||
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
|
||||||
-1)))
|
|
||||||
((= t "class-w")
|
|
||||||
(let
|
|
||||||
((m (and (< i slen) (rx-word? (char-at s i)))))
|
|
||||||
(if
|
|
||||||
(if (get node "__neg__") (not m) m)
|
|
||||||
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
|
||||||
-1)))
|
|
||||||
((= t "class-s")
|
|
||||||
(let
|
|
||||||
((m (and (< i slen) (rx-space? (char-at s i)))))
|
|
||||||
(if
|
|
||||||
(if (get node "__neg__") (not m) m)
|
|
||||||
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
|
||||||
-1)))
|
|
||||||
((= t "char-class")
|
|
||||||
(if
|
|
||||||
(and (< i slen) (rx-class-matches? node (char-at s i) ci?))
|
|
||||||
(rx-match-nodes rest-nodes s (+ i 1) slen ci? mi? groups)
|
|
||||||
-1))
|
|
||||||
((= t "anchor-start")
|
|
||||||
(if
|
|
||||||
(or
|
|
||||||
(= i 0)
|
|
||||||
(and mi? (rx-newline? (rx-char-at s (- i 1)))))
|
|
||||||
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
|
||||||
-1))
|
|
||||||
((= t "anchor-end")
|
|
||||||
(if
|
|
||||||
(or (= i slen) (and mi? (rx-newline? (rx-char-at s i))))
|
|
||||||
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
|
||||||
-1))
|
|
||||||
((= t "anchor-word")
|
|
||||||
(if
|
|
||||||
(rx-is-word-boundary? s i slen)
|
|
||||||
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
|
||||||
-1))
|
|
||||||
((= t "anchor-nonword")
|
|
||||||
(if
|
|
||||||
(not (rx-is-word-boundary? s i slen))
|
|
||||||
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
|
||||||
-1))
|
|
||||||
((= t "group")
|
|
||||||
(let
|
|
||||||
((gidx (get node "__idx__"))
|
|
||||||
(inner (get node "__nodes__")))
|
|
||||||
(let
|
|
||||||
((g-end (rx-match-nodes inner s i slen ci? mi? groups)))
|
|
||||||
(if
|
|
||||||
(>= g-end 0)
|
|
||||||
(begin
|
|
||||||
(dict-set!
|
|
||||||
groups
|
|
||||||
(js-to-string gidx)
|
|
||||||
(substring s i g-end))
|
|
||||||
(let
|
|
||||||
((final-end (rx-match-nodes rest-nodes s g-end slen ci? mi? groups)))
|
|
||||||
(if
|
|
||||||
(>= final-end 0)
|
|
||||||
final-end
|
|
||||||
(begin
|
|
||||||
(dict-set! groups (js-to-string gidx) nil)
|
|
||||||
-1))))
|
|
||||||
-1))))
|
|
||||||
((= t "ncgroup")
|
|
||||||
(let
|
|
||||||
((inner (get node "__nodes__")))
|
|
||||||
(rx-match-nodes
|
|
||||||
(append inner rest-nodes)
|
|
||||||
s
|
|
||||||
i
|
|
||||||
slen
|
|
||||||
ci?
|
|
||||||
mi?
|
|
||||||
groups)))
|
|
||||||
((= t "alt")
|
|
||||||
(let
|
|
||||||
((branches (get node "__branches__")))
|
|
||||||
(rx-try-branches branches rest-nodes s i slen ci? mi? groups)))
|
|
||||||
((= t "quant")
|
|
||||||
(let
|
|
||||||
((inner-node (get node "__node__"))
|
|
||||||
(mn (get node "__min__"))
|
|
||||||
(mx (get node "__max__"))
|
|
||||||
(lazy? (get node "__lazy__")))
|
|
||||||
(if
|
|
||||||
lazy?
|
|
||||||
(rx-quant-lazy
|
|
||||||
inner-node
|
|
||||||
mn
|
|
||||||
mx
|
|
||||||
rest-nodes
|
|
||||||
s
|
|
||||||
i
|
|
||||||
slen
|
|
||||||
ci?
|
|
||||||
mi?
|
|
||||||
groups
|
|
||||||
0)
|
|
||||||
(rx-quant-greedy
|
|
||||||
inner-node
|
|
||||||
mn
|
|
||||||
mx
|
|
||||||
rest-nodes
|
|
||||||
s
|
|
||||||
i
|
|
||||||
slen
|
|
||||||
ci?
|
|
||||||
mi?
|
|
||||||
groups
|
|
||||||
0))))
|
|
||||||
(else -1)))))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-try-branches
|
|
||||||
(fn
|
|
||||||
(branches rest-nodes s i slen ci? mi? groups)
|
|
||||||
(if
|
|
||||||
(empty? branches)
|
|
||||||
-1
|
|
||||||
(let
|
|
||||||
((res (rx-match-nodes (append (first branches) rest-nodes) s i slen ci? mi? groups)))
|
|
||||||
(if
|
|
||||||
(>= res 0)
|
|
||||||
res
|
|
||||||
(rx-try-branches (rest branches) rest-nodes s i slen ci? mi? groups))))))
|
|
||||||
|
|
||||||
;; Greedy: expand as far as possible, then try rest from the longest match
|
|
||||||
;; Strategy: recurse forward (extend first); only try rest when extension fails
|
|
||||||
(define
|
|
||||||
rx-quant-greedy
|
|
||||||
(fn
|
|
||||||
(inner-node mn mx rest-nodes s i slen ci? mi? groups count)
|
|
||||||
(let
|
|
||||||
((can-extend (and (< i slen) (or (= mx -1) (< count mx)))))
|
|
||||||
(if
|
|
||||||
can-extend
|
|
||||||
(let
|
|
||||||
((ni (rx-match-one inner-node s i slen ci? mi? groups)))
|
|
||||||
(if
|
|
||||||
(>= ni 0)
|
|
||||||
(let
|
|
||||||
((res (rx-quant-greedy inner-node mn mx rest-nodes s ni slen ci? mi? groups (+ count 1))))
|
|
||||||
(if
|
|
||||||
(>= res 0)
|
|
||||||
res
|
|
||||||
(if
|
|
||||||
(>= count mn)
|
|
||||||
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
|
||||||
-1)))
|
|
||||||
(if
|
|
||||||
(>= count mn)
|
|
||||||
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
|
||||||
-1)))
|
|
||||||
(if
|
|
||||||
(>= count mn)
|
|
||||||
(rx-match-nodes rest-nodes s i slen ci? mi? groups)
|
|
||||||
-1)))))
|
|
||||||
|
|
||||||
;; Lazy: try rest first, extend only if rest fails
|
|
||||||
(define
|
|
||||||
rx-quant-lazy
|
|
||||||
(fn
|
|
||||||
(inner-node mn mx rest-nodes s i slen ci? mi? groups count)
|
|
||||||
(if
|
|
||||||
(>= count mn)
|
|
||||||
(let
|
|
||||||
((res (rx-match-nodes rest-nodes s i slen ci? mi? groups)))
|
|
||||||
(if
|
|
||||||
(>= res 0)
|
|
||||||
res
|
|
||||||
(if
|
|
||||||
(and (< i slen) (or (= mx -1) (< count mx)))
|
|
||||||
(let
|
|
||||||
((ni (rx-match-one inner-node s i slen ci? mi? groups)))
|
|
||||||
(if
|
|
||||||
(>= ni 0)
|
|
||||||
(rx-quant-lazy
|
|
||||||
inner-node
|
|
||||||
mn
|
|
||||||
mx
|
|
||||||
rest-nodes
|
|
||||||
s
|
|
||||||
ni
|
|
||||||
slen
|
|
||||||
ci?
|
|
||||||
mi?
|
|
||||||
groups
|
|
||||||
(+ count 1))
|
|
||||||
-1))
|
|
||||||
-1)))
|
|
||||||
(if
|
|
||||||
(< i slen)
|
|
||||||
(let
|
|
||||||
((ni (rx-match-one inner-node s i slen ci? mi? groups)))
|
|
||||||
(if
|
|
||||||
(>= ni 0)
|
|
||||||
(rx-quant-lazy
|
|
||||||
inner-node
|
|
||||||
mn
|
|
||||||
mx
|
|
||||||
rest-nodes
|
|
||||||
s
|
|
||||||
ni
|
|
||||||
slen
|
|
||||||
ci?
|
|
||||||
mi?
|
|
||||||
groups
|
|
||||||
(+ count 1))
|
|
||||||
-1))
|
|
||||||
-1))))
|
|
||||||
|
|
||||||
;; Match a single node at position i, return new pos or -1
|
|
||||||
(define
|
|
||||||
rx-match-one
|
|
||||||
(fn
|
|
||||||
(node s i slen ci? mi? groups)
|
|
||||||
(rx-match-nodes (list node) s i slen ci? mi? groups)))
|
|
||||||
|
|
||||||
;; ── Engine entry points ───────────────────────────────────────────
|
|
||||||
|
|
||||||
;; Try matching at exactly position i. Returns result dict or nil.
|
|
||||||
(define
|
|
||||||
rx-try-at
|
|
||||||
(fn
|
|
||||||
(compiled s i slen ci? mi?)
|
|
||||||
(let
|
|
||||||
((nodes (get compiled "nodes")) (ngroups (get compiled "ngroups")))
|
|
||||||
(let
|
|
||||||
((groups (dict)))
|
|
||||||
(let
|
|
||||||
((end (rx-match-nodes nodes s i slen ci? mi? groups)))
|
|
||||||
(if
|
|
||||||
(>= end 0)
|
|
||||||
(dict "start" i "end" end "groups" groups "ngroups" ngroups)
|
|
||||||
nil))))))
|
|
||||||
|
|
||||||
;; Find first match scanning from search-start.
|
|
||||||
(define
|
|
||||||
rx-find-from
|
|
||||||
(fn
|
|
||||||
(compiled s search-start slen ci? mi?)
|
|
||||||
(if
|
|
||||||
(> search-start slen)
|
|
||||||
nil
|
|
||||||
(let
|
|
||||||
((res (rx-try-at compiled s search-start slen ci? mi?)))
|
|
||||||
(if
|
|
||||||
res
|
|
||||||
res
|
|
||||||
(rx-find-from compiled s (+ search-start 1) slen ci? mi?))))))
|
|
||||||
|
|
||||||
;; Build exec result dict from raw match result
|
|
||||||
(define
|
|
||||||
rx-build-exec-result
|
|
||||||
(fn
|
|
||||||
(s match-res)
|
|
||||||
(let
|
|
||||||
((start (get match-res "start"))
|
|
||||||
(end (get match-res "end"))
|
|
||||||
(groups (get match-res "groups"))
|
|
||||||
(ngroups (get match-res "ngroups")))
|
|
||||||
(let
|
|
||||||
((matched (substring s start end))
|
|
||||||
(caps (rx-build-captures groups ngroups 1)))
|
|
||||||
(dict "match" matched "index" start "input" s "groups" caps)))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-build-captures
|
|
||||||
(fn
|
|
||||||
(groups ngroups idx)
|
|
||||||
(if
|
|
||||||
(> idx ngroups)
|
|
||||||
(list)
|
|
||||||
(let
|
|
||||||
((cap (get groups (js-to-string idx))))
|
|
||||||
(cons
|
|
||||||
(if (= cap nil) :js-undefined cap)
|
|
||||||
(rx-build-captures groups ngroups (+ idx 1)))))))
|
|
||||||
|
|
||||||
;; ── Public interface ──────────────────────────────────────────────
|
|
||||||
|
|
||||||
;; Lazy compile: build NFA on first use, cache under "__compiled__"
|
|
||||||
(define
|
|
||||||
rx-ensure-compiled!
|
|
||||||
(fn
|
|
||||||
(rx)
|
|
||||||
(if
|
|
||||||
(dict-has? rx "__compiled__")
|
|
||||||
(get rx "__compiled__")
|
|
||||||
(let
|
|
||||||
((c (rx-compile (get rx "source"))))
|
|
||||||
(begin (dict-set! rx "__compiled__" c) c)))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-test
|
|
||||||
(fn
|
|
||||||
(rx s)
|
|
||||||
(let
|
|
||||||
((compiled (rx-ensure-compiled! rx))
|
|
||||||
(ci? (get rx "ignoreCase"))
|
|
||||||
(mi? (get rx "multiline"))
|
|
||||||
(slen (len s)))
|
|
||||||
(let
|
|
||||||
((start (if (get rx "global") (let ((li (get rx "lastIndex"))) (if (number? li) li 0)) 0)))
|
|
||||||
(let
|
|
||||||
((res (rx-find-from compiled s start slen ci? mi?)))
|
|
||||||
(if
|
|
||||||
(get rx "global")
|
|
||||||
(begin
|
|
||||||
(dict-set! rx "lastIndex" (if res (get res "end") 0))
|
|
||||||
(if res true false))
|
|
||||||
(if res true false)))))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-exec
|
|
||||||
(fn
|
|
||||||
(rx s)
|
|
||||||
(let
|
|
||||||
((compiled (rx-ensure-compiled! rx))
|
|
||||||
(ci? (get rx "ignoreCase"))
|
|
||||||
(mi? (get rx "multiline"))
|
|
||||||
(slen (len s)))
|
|
||||||
(let
|
|
||||||
((start (if (get rx "global") (let ((li (get rx "lastIndex"))) (if (number? li) li 0)) 0)))
|
|
||||||
(let
|
|
||||||
((res (rx-find-from compiled s start slen ci? mi?)))
|
|
||||||
(if
|
|
||||||
res
|
|
||||||
(begin
|
|
||||||
(when
|
|
||||||
(get rx "global")
|
|
||||||
(dict-set! rx "lastIndex" (get res "end")))
|
|
||||||
(rx-build-exec-result s res))
|
|
||||||
(begin
|
|
||||||
(when (get rx "global") (dict-set! rx "lastIndex" 0))
|
|
||||||
nil)))))))
|
|
||||||
|
|
||||||
;; match-all for String.prototype.matchAll
|
|
||||||
(define
|
|
||||||
js-regex-match-all
|
|
||||||
(fn
|
|
||||||
(rx s)
|
|
||||||
(let
|
|
||||||
((compiled (rx-ensure-compiled! rx))
|
|
||||||
(ci? (get rx "ignoreCase"))
|
|
||||||
(mi? (get rx "multiline"))
|
|
||||||
(slen (len s))
|
|
||||||
(results (list)))
|
|
||||||
(rx-match-all-loop compiled s 0 slen ci? mi? results))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
rx-match-all-loop
|
|
||||||
(fn
|
|
||||||
(compiled s i slen ci? mi? results)
|
|
||||||
(if
|
|
||||||
(> i slen)
|
|
||||||
results
|
|
||||||
(let
|
|
||||||
((res (rx-find-from compiled s i slen ci? mi?)))
|
|
||||||
(if
|
|
||||||
res
|
|
||||||
(begin
|
|
||||||
(append! results (rx-build-exec-result s res))
|
|
||||||
(let
|
|
||||||
((next (get res "end")))
|
|
||||||
(rx-match-all-loop
|
|
||||||
compiled
|
|
||||||
s
|
|
||||||
(if (= next i) (+ i 1) next)
|
|
||||||
slen
|
|
||||||
ci?
|
|
||||||
mi?
|
|
||||||
results)))
|
|
||||||
results)))))
|
|
||||||
|
|
||||||
;; ── Install platform ──────────────────────────────────────────────
|
|
||||||
|
|
||||||
(js-regex-platform-override! "test" rx-test)
|
|
||||||
(js-regex-platform-override! "exec" rx-exec)
|
|
||||||
@@ -2032,15 +2032,7 @@
|
|||||||
(&rest args)
|
(&rest args)
|
||||||
(cond
|
(cond
|
||||||
((= (len args) 0) nil)
|
((= (len args) 0) nil)
|
||||||
((js-regex? (nth args 0))
|
((js-regex? (nth args 0)) (js-regex-stub-exec (nth args 0) s))
|
||||||
(let
|
|
||||||
((rx (nth args 0)))
|
|
||||||
(let
|
|
||||||
((impl (get __js_regex_platform__ "exec")))
|
|
||||||
(if
|
|
||||||
(js-undefined? impl)
|
|
||||||
(js-regex-stub-exec rx s)
|
|
||||||
(impl rx s)))))
|
|
||||||
(else
|
(else
|
||||||
(let
|
(let
|
||||||
((needle (js-to-string (nth args 0))))
|
((needle (js-to-string (nth args 0))))
|
||||||
@@ -2049,7 +2041,7 @@
|
|||||||
(if
|
(if
|
||||||
(= idx -1)
|
(= idx -1)
|
||||||
nil
|
nil
|
||||||
(let ((res (list))) (begin (append! res needle) res)))))))))
|
(let ((res (list))) (append! res needle) res))))))))
|
||||||
((= name "at")
|
((= name "at")
|
||||||
(fn
|
(fn
|
||||||
(i)
|
(i)
|
||||||
@@ -2107,20 +2099,6 @@
|
|||||||
((= name "toWellFormed") (fn () s))
|
((= name "toWellFormed") (fn () s))
|
||||||
(else js-undefined))))
|
(else js-undefined))))
|
||||||
|
|
||||||
(define __js_tdz_sentinel__ (dict "__tdz__" true))
|
|
||||||
|
|
||||||
(define js-tdz? (fn (v) (and (dict? v) (dict-has? v "__tdz__"))))
|
|
||||||
|
|
||||||
(define
|
|
||||||
js-tdz-check
|
|
||||||
(fn
|
|
||||||
(name val)
|
|
||||||
(if
|
|
||||||
(js-tdz? val)
|
|
||||||
(raise
|
|
||||||
(TypeError (str "Cannot access '" name "' before initialization")))
|
|
||||||
val)))
|
|
||||||
|
|
||||||
(define
|
(define
|
||||||
js-string-slice
|
js-string-slice
|
||||||
(fn
|
(fn
|
||||||
|
|||||||
146
lib/js/test.sh
146
lib/js/test.sh
@@ -33,8 +33,6 @@ cat > "$TMPFILE" << 'EPOCHS'
|
|||||||
(load "lib/js/transpile.sx")
|
(load "lib/js/transpile.sx")
|
||||||
(epoch 5)
|
(epoch 5)
|
||||||
(load "lib/js/runtime.sx")
|
(load "lib/js/runtime.sx")
|
||||||
(epoch 6)
|
|
||||||
(load "lib/js/regex.sx")
|
|
||||||
|
|
||||||
;; ── Phase 0: stubs still behave ─────────────────────────────────
|
;; ── Phase 0: stubs still behave ─────────────────────────────────
|
||||||
(epoch 10)
|
(epoch 10)
|
||||||
@@ -1325,108 +1323,6 @@ cat > "$TMPFILE" << 'EPOCHS'
|
|||||||
(epoch 3505)
|
(epoch 3505)
|
||||||
(eval "(js-eval \"var a = {length: 3, 0: 10, 1: 20, 2: 30}; var sum = 0; Array.prototype.forEach.call(a, function(x){sum += x;}); sum\")")
|
(eval "(js-eval \"var a = {length: 3, 0: 10, 1: 20, 2: 30}; var sum = 0; Array.prototype.forEach.call(a, function(x){sum += x;}); sum\")")
|
||||||
|
|
||||||
;; ── Phase 12: Regex engine ────────────────────────────────────────
|
|
||||||
;; Platform is installed (test key is a function, not undefined)
|
|
||||||
(epoch 5000)
|
|
||||||
(eval "(js-undefined? (get __js_regex_platform__ \"test\"))")
|
|
||||||
(epoch 5001)
|
|
||||||
(eval "(js-eval \"/foo/.test('hi foo bar')\")")
|
|
||||||
(epoch 5002)
|
|
||||||
(eval "(js-eval \"/foo/.test('hi bar')\")")
|
|
||||||
;; Case-insensitive flag
|
|
||||||
(epoch 5003)
|
|
||||||
(eval "(js-eval \"/FOO/i.test('hello foo world')\")")
|
|
||||||
;; Anchors
|
|
||||||
(epoch 5004)
|
|
||||||
(eval "(js-eval \"/^hello/.test('hello world')\")")
|
|
||||||
(epoch 5005)
|
|
||||||
(eval "(js-eval \"/^hello/.test('say hello')\")")
|
|
||||||
(epoch 5006)
|
|
||||||
(eval "(js-eval \"/world$/.test('hello world')\")")
|
|
||||||
;; Character classes
|
|
||||||
(epoch 5007)
|
|
||||||
(eval "(js-eval \"/\\\\d+/.test('abc 123')\")")
|
|
||||||
(epoch 5008)
|
|
||||||
(eval "(js-eval \"/\\\\w+/.test('hello')\")")
|
|
||||||
(epoch 5009)
|
|
||||||
(eval "(js-eval \"/[abc]/.test('dog')\")")
|
|
||||||
(epoch 5010)
|
|
||||||
(eval "(js-eval \"/[abc]/.test('cat')\")")
|
|
||||||
;; Quantifiers
|
|
||||||
(epoch 5011)
|
|
||||||
(eval "(js-eval \"/a*b/.test('b')\")")
|
|
||||||
(epoch 5012)
|
|
||||||
(eval "(js-eval \"/a+b/.test('b')\")")
|
|
||||||
(epoch 5013)
|
|
||||||
(eval "(js-eval \"/a{2,3}/.test('aa')\")")
|
|
||||||
(epoch 5014)
|
|
||||||
(eval "(js-eval \"/a{2,3}/.test('a')\")")
|
|
||||||
;; Dot
|
|
||||||
(epoch 5015)
|
|
||||||
(eval "(js-eval \"/h.llo/.test('hello')\")")
|
|
||||||
(epoch 5016)
|
|
||||||
(eval "(js-eval \"/h.llo/.test('hllo')\")")
|
|
||||||
;; exec result
|
|
||||||
(epoch 5017)
|
|
||||||
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.match\")")
|
|
||||||
(epoch 5018)
|
|
||||||
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.index\")")
|
|
||||||
(epoch 5019)
|
|
||||||
(eval "(js-eval \"var m = /foo(\\\\w+)/.exec('foobar'); m.groups[0]\")")
|
|
||||||
;; Alternation
|
|
||||||
(epoch 5020)
|
|
||||||
(eval "(js-eval \"/cat|dog/.test('I have a dog')\")")
|
|
||||||
(epoch 5021)
|
|
||||||
(eval "(js-eval \"/cat|dog/.test('I have a fish')\")")
|
|
||||||
;; Non-capturing group
|
|
||||||
(epoch 5022)
|
|
||||||
(eval "(js-eval \"/(?:foo)+/.test('foofoo')\")")
|
|
||||||
;; Negated char class
|
|
||||||
(epoch 5023)
|
|
||||||
(eval "(js-eval \"/[^abc]/.test('d')\")")
|
|
||||||
(epoch 5024)
|
|
||||||
(eval "(js-eval \"/[^abc]/.test('a')\")")
|
|
||||||
;; Range inside char class
|
|
||||||
(epoch 5025)
|
|
||||||
(eval "(js-eval \"/[a-z]+/.test('hello')\")")
|
|
||||||
;; Word boundary
|
|
||||||
(epoch 5026)
|
|
||||||
(eval "(js-eval \"/\\\\bword\\\\b/.test('a word here')\")")
|
|
||||||
(epoch 5027)
|
|
||||||
(eval "(js-eval \"/\\\\bword\\\\b/.test('password')\")")
|
|
||||||
;; Lazy quantifier
|
|
||||||
(epoch 5028)
|
|
||||||
(eval "(js-eval \"var m = /a+?/.exec('aaa'); m.match\")")
|
|
||||||
;; Global flag exec
|
|
||||||
(epoch 5029)
|
|
||||||
(eval "(js-eval \"var r=/\\\\d+/g; r.exec('a1b2'); r.exec('a1b2').match\")")
|
|
||||||
;; String.prototype.match with regex
|
|
||||||
(epoch 5030)
|
|
||||||
(eval "(js-eval \"'hello world'.match(/\\\\w+/).match\")")
|
|
||||||
;; String.prototype.search
|
|
||||||
(epoch 5031)
|
|
||||||
(eval "(js-eval \"'hello world'.search(/world/)\")")
|
|
||||||
;; String.prototype.replace with regex
|
|
||||||
(epoch 5032)
|
|
||||||
(eval "(js-eval \"'hello world'.replace(/world/, 'there')\")")
|
|
||||||
;; multiline anchor
|
|
||||||
(epoch 5033)
|
|
||||||
(eval "(js-eval \"/^bar/m.test('foo\\nbar')\")")
|
|
||||||
|
|
||||||
;; ── Phase 13: let/const TDZ infrastructure ───────────────────────
|
|
||||||
;; The TDZ sentinel and checker are defined in runtime.sx.
|
|
||||||
;; let/const bindings work normally after initialization.
|
|
||||||
(epoch 5100)
|
|
||||||
(eval "(js-eval \"let x = 5; x\")")
|
|
||||||
(epoch 5101)
|
|
||||||
(eval "(js-eval \"const y = 42; y\")")
|
|
||||||
;; TDZ sentinel exists and is detectable
|
|
||||||
(epoch 5102)
|
|
||||||
(eval "(js-tdz? __js_tdz_sentinel__)")
|
|
||||||
;; js-tdz-check passes through non-sentinel values
|
|
||||||
(epoch 5103)
|
|
||||||
(eval "(js-tdz-check \"x\" 42)")
|
|
||||||
|
|
||||||
EPOCHS
|
EPOCHS
|
||||||
|
|
||||||
|
|
||||||
@@ -2146,48 +2042,6 @@ check 3503 "indexOf.call arrLike" '1'
|
|||||||
check 3504 "filter.call arrLike" '"2,3"'
|
check 3504 "filter.call arrLike" '"2,3"'
|
||||||
check 3505 "forEach.call arrLike sum" '60'
|
check 3505 "forEach.call arrLike sum" '60'
|
||||||
|
|
||||||
# ── Phase 12: Regex engine ────────────────────────────────────────
|
|
||||||
check 5000 "regex platform installed" 'false'
|
|
||||||
check 5001 "/foo/ matches" 'true'
|
|
||||||
check 5002 "/foo/ no match" 'false'
|
|
||||||
check 5003 "/FOO/i case-insensitive" 'true'
|
|
||||||
check 5004 "/^hello/ anchor match" 'true'
|
|
||||||
check 5005 "/^hello/ anchor no-match" 'false'
|
|
||||||
check 5006 "/world$/ end anchor" 'true'
|
|
||||||
check 5007 "/\\d+/ digit class" 'true'
|
|
||||||
check 5008 "/\\w+/ word class" 'true'
|
|
||||||
check 5009 "/[abc]/ class no-match" 'false'
|
|
||||||
check 5010 "/[abc]/ class match" 'true'
|
|
||||||
check 5011 "/a*b/ zero-or-more" 'true'
|
|
||||||
check 5012 "/a+b/ one-or-more no-match" 'false'
|
|
||||||
check 5013 "/a{2,3}/ quant match" 'true'
|
|
||||||
check 5014 "/a{2,3}/ quant no-match" 'false'
|
|
||||||
check 5015 "dot matches any" 'true'
|
|
||||||
check 5016 "dot requires char" 'false'
|
|
||||||
check 5017 "exec match string" '"foobar"'
|
|
||||||
check 5018 "exec match index" '0'
|
|
||||||
check 5019 "exec capture group" '"bar"'
|
|
||||||
check 5020 "alternation cat|dog match" 'true'
|
|
||||||
check 5021 "alternation cat|dog no-match" 'false'
|
|
||||||
check 5022 "non-capturing group" 'true'
|
|
||||||
check 5023 "negated class match" 'true'
|
|
||||||
check 5024 "negated class no-match" 'false'
|
|
||||||
check 5025 "range [a-z]+" 'true'
|
|
||||||
check 5026 "word boundary match" 'true'
|
|
||||||
check 5027 "word boundary no-match" 'false'
|
|
||||||
check 5028 "lazy quantifier" '"a"'
|
|
||||||
check 5029 "global exec advances" '"2"'
|
|
||||||
check 5030 "String.match regex" '"hello"'
|
|
||||||
check 5031 "String.search regex" '6'
|
|
||||||
check 5032 "String.replace regex" '"hello there"'
|
|
||||||
check 5033 "multiline anchor" 'true'
|
|
||||||
|
|
||||||
# ── Phase 13: let/const TDZ infrastructure ───────────────────────
|
|
||||||
check 5100 "let binding initialized" '5'
|
|
||||||
check 5101 "const binding initialized" '42'
|
|
||||||
check 5102 "TDZ sentinel is detectable" 'true'
|
|
||||||
check 5103 "tdz-check passes non-sentinel" '42'
|
|
||||||
|
|
||||||
TOTAL=$((PASS + FAIL))
|
TOTAL=$((PASS + FAIL))
|
||||||
if [ $FAIL -eq 0 ]; then
|
if [ $FAIL -eq 0 ]; then
|
||||||
echo "✓ $PASS/$TOTAL JS-on-SX tests passed"
|
echo "✓ $PASS/$TOTAL JS-on-SX tests passed"
|
||||||
|
|||||||
@@ -798,7 +798,6 @@ class ServerSession:
|
|||||||
self._run_and_collect(3, '(load "lib/js/parser.sx")', timeout=60.0)
|
self._run_and_collect(3, '(load "lib/js/parser.sx")', timeout=60.0)
|
||||||
self._run_and_collect(4, '(load "lib/js/transpile.sx")', timeout=60.0)
|
self._run_and_collect(4, '(load "lib/js/transpile.sx")', timeout=60.0)
|
||||||
self._run_and_collect(5, '(load "lib/js/runtime.sx")', timeout=60.0)
|
self._run_and_collect(5, '(load "lib/js/runtime.sx")', timeout=60.0)
|
||||||
self._run_and_collect(50, '(load "lib/js/regex.sx")', timeout=60.0)
|
|
||||||
# Preload the stub harness — use precomputed SX cache when available
|
# Preload the stub harness — use precomputed SX cache when available
|
||||||
# (huge win: ~15s js-eval HARNESS_STUB → ~0s load precomputed .sx).
|
# (huge win: ~15s js-eval HARNESS_STUB → ~0s load precomputed .sx).
|
||||||
cache_rel = _harness_cache_rel_path()
|
cache_rel = _harness_cache_rel_path()
|
||||||
|
|||||||
@@ -935,12 +935,12 @@
|
|||||||
|
|
||||||
(define
|
(define
|
||||||
js-transpile-var
|
js-transpile-var
|
||||||
(fn (kind decls) (cons (js-sym "begin") (js-vardecl-forms kind decls))))
|
(fn (kind decls) (cons (js-sym "begin") (js-vardecl-forms decls))))
|
||||||
|
|
||||||
(define
|
(define
|
||||||
js-vardecl-forms
|
js-vardecl-forms
|
||||||
(fn
|
(fn
|
||||||
(kind decls)
|
(decls)
|
||||||
(cond
|
(cond
|
||||||
((empty? decls) (list))
|
((empty? decls) (list))
|
||||||
(else
|
(else
|
||||||
@@ -953,7 +953,7 @@
|
|||||||
(js-sym "define")
|
(js-sym "define")
|
||||||
(js-sym (nth d 1))
|
(js-sym (nth d 1))
|
||||||
(js-transpile (nth d 2)))
|
(js-transpile (nth d 2)))
|
||||||
(js-vardecl-forms kind (rest decls))))
|
(js-vardecl-forms (rest decls))))
|
||||||
((js-tag? d "js-vardecl-obj")
|
((js-tag? d "js-vardecl-obj")
|
||||||
(let
|
(let
|
||||||
((names (nth d 1))
|
((names (nth d 1))
|
||||||
@@ -964,7 +964,7 @@
|
|||||||
(js-vardecl-obj-forms
|
(js-vardecl-obj-forms
|
||||||
names
|
names
|
||||||
tmp-sym
|
tmp-sym
|
||||||
(js-vardecl-forms kind (rest decls))))))
|
(js-vardecl-forms (rest decls))))))
|
||||||
((js-tag? d "js-vardecl-arr")
|
((js-tag? d "js-vardecl-arr")
|
||||||
(let
|
(let
|
||||||
((names (nth d 1))
|
((names (nth d 1))
|
||||||
@@ -976,7 +976,7 @@
|
|||||||
names
|
names
|
||||||
tmp-sym
|
tmp-sym
|
||||||
0
|
0
|
||||||
(js-vardecl-forms kind (rest decls))))))
|
(js-vardecl-forms (rest decls))))))
|
||||||
(else (error "js-vardecl-forms: unexpected decl"))))))))
|
(else (error "js-vardecl-forms: unexpected decl"))))))))
|
||||||
|
|
||||||
(define
|
(define
|
||||||
|
|||||||
41
lib/tcl/parser.sx
Normal file
41
lib/tcl/parser.sx
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
; Tcl parser — thin layer over tcl-tokenize
|
||||||
|
; Adds tcl-parse entry point and word utility fns
|
||||||
|
|
||||||
|
; Entry point: parse Tcl source to a list of commands.
|
||||||
|
; Returns same structure as tcl-tokenize.
|
||||||
|
(define tcl-parse (fn (src) (tcl-tokenize src)))
|
||||||
|
|
||||||
|
; True if word has no substitutions — value can be read statically.
|
||||||
|
; braced words are always simple. compound words are simple when all
|
||||||
|
; parts are plain text with no var/cmd parts.
|
||||||
|
(define tcl-word-simple?
|
||||||
|
(fn (word)
|
||||||
|
(cond
|
||||||
|
((= (get word :type) "braced") true)
|
||||||
|
((= (get word :type) "compound")
|
||||||
|
(let ((parts (get word :parts)))
|
||||||
|
(every? (fn (p) (= (get p :type) "text")) parts)))
|
||||||
|
(else false))))
|
||||||
|
|
||||||
|
; Concatenate text parts of a simple word into a single string.
|
||||||
|
; For braced words returns :value directly.
|
||||||
|
; For compound words with only text parts, joins them.
|
||||||
|
; Returns nil for words with substitutions.
|
||||||
|
(define tcl-word-literal
|
||||||
|
(fn (word)
|
||||||
|
(cond
|
||||||
|
((= (get word :type) "braced") (get word :value))
|
||||||
|
((= (get word :type) "compound")
|
||||||
|
(if (tcl-word-simple? word)
|
||||||
|
(join "" (map (fn (p) (get p :value)) (get word :parts)))
|
||||||
|
nil))
|
||||||
|
(else nil))))
|
||||||
|
|
||||||
|
; Number of words in a parsed command.
|
||||||
|
(define tcl-cmd-len
|
||||||
|
(fn (cmd) (len (get cmd :words))))
|
||||||
|
|
||||||
|
; Nth word literal from a command (index 0 = command name).
|
||||||
|
; Returns nil if word has substitutions.
|
||||||
|
(define tcl-nth-literal
|
||||||
|
(fn (cmd n) (tcl-word-literal (nth (get cmd :words) n))))
|
||||||
53
lib/tcl/test.sh
Executable file
53
lib/tcl/test.sh
Executable file
@@ -0,0 +1,53 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Tcl-on-SX test runner — epoch protocol to sx_server.exe
|
||||||
|
set -uo pipefail
|
||||||
|
cd "$(git rev-parse --show-toplevel)"
|
||||||
|
|
||||||
|
SX_SERVER="${SX_SERVER:-hosts/ocaml/_build/default/bin/sx_server.exe}"
|
||||||
|
if [ ! -x "$SX_SERVER" ]; then
|
||||||
|
SX_SERVER="/root/rose-ash/hosts/ocaml/_build/default/bin/sx_server.exe"
|
||||||
|
fi
|
||||||
|
if [ ! -x "$SX_SERVER" ]; then echo "ERROR: sx_server.exe not found"; exit 1; fi
|
||||||
|
|
||||||
|
VERBOSE="${1:-}"
|
||||||
|
TMPFILE=$(mktemp)
|
||||||
|
trap "rm -f $TMPFILE" EXIT
|
||||||
|
|
||||||
|
cat > "$TMPFILE" << 'EPOCHS'
|
||||||
|
(epoch 1)
|
||||||
|
(load "lib/tcl/tokenizer.sx")
|
||||||
|
(epoch 2)
|
||||||
|
(load "lib/tcl/parser.sx")
|
||||||
|
(epoch 3)
|
||||||
|
(load "lib/tcl/tests/parse.sx")
|
||||||
|
(epoch 4)
|
||||||
|
(eval "(tcl-run-parse-tests)")
|
||||||
|
EPOCHS
|
||||||
|
|
||||||
|
OUTPUT=$(timeout 30 "$SX_SERVER" < "$TMPFILE" 2>&1)
|
||||||
|
[ "$VERBOSE" = "-v" ] && echo "$OUTPUT"
|
||||||
|
|
||||||
|
# Result follows an (ok-len 3 N) line
|
||||||
|
RESULT=$(echo "$OUTPUT" | grep -A1 "^(ok-len 4 " | tail -1)
|
||||||
|
if [ -z "$RESULT" ]; then
|
||||||
|
RESULT=$(echo "$OUTPUT" | grep "^(ok 4 " | sed 's/^(ok 3 //' | sed 's/)$//')
|
||||||
|
fi
|
||||||
|
if [ -z "$RESULT" ]; then
|
||||||
|
echo "ERROR: no result from epoch 4"
|
||||||
|
echo "$OUTPUT" | tail -10
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
PASSED=$(echo "$RESULT" | grep -o ':passed [0-9]*' | grep -o '[0-9]*$')
|
||||||
|
FAILED=$(echo "$RESULT" | grep -o ':failed [0-9]*' | grep -o '[0-9]*$')
|
||||||
|
PASSED=${PASSED:-0}; FAILED=${FAILED:-1}
|
||||||
|
TOTAL=$((PASSED + FAILED))
|
||||||
|
|
||||||
|
if [ "$FAILED" = "0" ]; then
|
||||||
|
echo "ok $PASSED/$TOTAL tcl-tokenize tests passed"
|
||||||
|
exit 0
|
||||||
|
else
|
||||||
|
echo "FAIL $PASSED/$TOTAL passed, $FAILED failed"
|
||||||
|
echo "$RESULT"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
186
lib/tcl/tests/parse.sx
Normal file
186
lib/tcl/tests/parse.sx
Normal file
@@ -0,0 +1,186 @@
|
|||||||
|
(define tcl-parse-pass 0)
|
||||||
|
(define tcl-parse-fail 0)
|
||||||
|
(define tcl-parse-failures (list))
|
||||||
|
|
||||||
|
(define tcl-assert
|
||||||
|
(fn (label expected actual)
|
||||||
|
(if (= expected actual)
|
||||||
|
(set! tcl-parse-pass (+ tcl-parse-pass 1))
|
||||||
|
(begin
|
||||||
|
(set! tcl-parse-fail (+ tcl-parse-fail 1))
|
||||||
|
(append! tcl-parse-failures
|
||||||
|
(str label ": expected=" (str expected) " got=" (str actual)))))))
|
||||||
|
|
||||||
|
(define tcl-first-cmd
|
||||||
|
(fn (src) (nth (tcl-tokenize src) 0)))
|
||||||
|
|
||||||
|
(define tcl-cmd-words
|
||||||
|
(fn (src) (get (tcl-first-cmd src) :words)))
|
||||||
|
|
||||||
|
(define tcl-word
|
||||||
|
(fn (src wi) (nth (tcl-cmd-words src) wi)))
|
||||||
|
|
||||||
|
(define tcl-parts
|
||||||
|
(fn (src wi) (get (tcl-word src wi) :parts)))
|
||||||
|
|
||||||
|
(define tcl-part
|
||||||
|
(fn (src wi pi) (nth (tcl-parts src wi) pi)))
|
||||||
|
|
||||||
|
(define tcl-run-parse-tests
|
||||||
|
(fn ()
|
||||||
|
(set! tcl-parse-pass 0)
|
||||||
|
(set! tcl-parse-fail 0)
|
||||||
|
(set! tcl-parse-failures (list))
|
||||||
|
|
||||||
|
; empty / whitespace-only
|
||||||
|
(tcl-assert "empty" 0 (len (tcl-tokenize "")))
|
||||||
|
(tcl-assert "ws-only" 0 (len (tcl-tokenize " ")))
|
||||||
|
(tcl-assert "nl-only" 0 (len (tcl-tokenize "\n\n")))
|
||||||
|
|
||||||
|
; single command word count
|
||||||
|
(tcl-assert "1word" 1 (len (tcl-cmd-words "set")))
|
||||||
|
(tcl-assert "3words" 3 (len (tcl-cmd-words "set x 1")))
|
||||||
|
(tcl-assert "4words" 4 (len (tcl-cmd-words "set a b c")))
|
||||||
|
|
||||||
|
; word type — bare word is compound
|
||||||
|
(tcl-assert "bare-type" "compound" (get (tcl-word "set x 1" 0) :type))
|
||||||
|
(tcl-assert "bare-quoted" false (get (tcl-word "set x 1" 0) :quoted))
|
||||||
|
(tcl-assert "bare-part-type" "text" (get (tcl-part "set x 1" 0 0) :type))
|
||||||
|
(tcl-assert "bare-part-val" "set" (get (tcl-part "set x 1" 0 0) :value))
|
||||||
|
(tcl-assert "bare-part2-val" "x" (get (tcl-part "set x 1" 1 0) :value))
|
||||||
|
(tcl-assert "bare-part3-val" "1" (get (tcl-part "set x 1" 2 0) :value))
|
||||||
|
|
||||||
|
; multiple commands
|
||||||
|
(tcl-assert "semi-sep" 2 (len (tcl-tokenize "set x 1; set y 2")))
|
||||||
|
(tcl-assert "nl-sep" 2 (len (tcl-tokenize "set x 1\nset y 2")))
|
||||||
|
(tcl-assert "multi-nl" 3 (len (tcl-tokenize "a\nb\nc")))
|
||||||
|
|
||||||
|
; comments
|
||||||
|
(tcl-assert "comment-only" 0 (len (tcl-tokenize "# comment")))
|
||||||
|
(tcl-assert "comment-nl" 0 (len (tcl-tokenize "# comment\n")))
|
||||||
|
(tcl-assert "comment-then-cmd" 1 (len (tcl-tokenize "# comment\nset x 1")))
|
||||||
|
(tcl-assert "semi-then-comment" 1 (len (tcl-tokenize "set x 1; # comment")))
|
||||||
|
|
||||||
|
; brace-quoted words
|
||||||
|
(tcl-assert "brace-type" "braced" (get (tcl-word "{hello}" 0) :type))
|
||||||
|
(tcl-assert "brace-value" "hello" (get (tcl-word "{hello}" 0) :value))
|
||||||
|
(tcl-assert "brace-spaces" "hello world" (get (tcl-word "{hello world}" 0) :value))
|
||||||
|
(tcl-assert "brace-nested" "a {b} c" (get (tcl-word "{a {b} c}" 0) :value))
|
||||||
|
(tcl-assert "brace-no-var-sub" "hello $x" (get (tcl-word "{hello $x}" 0) :value))
|
||||||
|
(tcl-assert "brace-no-cmd-sub" "[expr 1]" (get (tcl-word "{[expr 1]}" 0) :value))
|
||||||
|
|
||||||
|
; double-quoted words
|
||||||
|
(tcl-assert "dq-type" "compound" (get (tcl-word "\"hello\"" 0) :type))
|
||||||
|
(tcl-assert "dq-quoted" true (get (tcl-word "\"hello\"" 0) :quoted))
|
||||||
|
(tcl-assert "dq-literal" "hello" (get (tcl-part "\"hello\"" 0 0) :value))
|
||||||
|
|
||||||
|
; variable substitution in bare word
|
||||||
|
(tcl-assert "var-type" "var" (get (tcl-part "$x" 0 0) :type))
|
||||||
|
(tcl-assert "var-name" "x" (get (tcl-part "$x" 0 0) :name))
|
||||||
|
(tcl-assert "var-long" "long_name" (get (tcl-part "$long_name" 0 0) :name))
|
||||||
|
|
||||||
|
; ${name} form
|
||||||
|
(tcl-assert "var-brace-type" "var" (get (tcl-part "${x}" 0 0) :type))
|
||||||
|
(tcl-assert "var-brace-name" "x" (get (tcl-part "${x}" 0 0) :name))
|
||||||
|
|
||||||
|
; array variable substitution
|
||||||
|
(tcl-assert "arr-type" "var-arr" (get (tcl-part "$arr(key)" 0 0) :type))
|
||||||
|
(tcl-assert "arr-name" "arr" (get (tcl-part "$arr(key)" 0 0) :name))
|
||||||
|
(tcl-assert "arr-key-len" 1 (len (get (tcl-part "$arr(key)" 0 0) :key)))
|
||||||
|
(tcl-assert "arr-key-text" "key"
|
||||||
|
(get (nth (get (tcl-part "$arr(key)" 0 0) :key) 0) :value))
|
||||||
|
|
||||||
|
; command substitution
|
||||||
|
(tcl-assert "cmd-type" "cmd" (get (tcl-part "[expr 1+1]" 0 0) :type))
|
||||||
|
(tcl-assert "cmd-src" "expr 1+1" (get (tcl-part "[expr 1+1]" 0 0) :src))
|
||||||
|
|
||||||
|
; nested command substitution
|
||||||
|
(tcl-assert "cmd-nested-src" "expr [string length x]"
|
||||||
|
(get (tcl-part "[expr [string length x]]" 0 0) :src))
|
||||||
|
|
||||||
|
; backslash substitution in double-quoted word
|
||||||
|
(let ((ps (tcl-parts "\"a\\nb\"" 0)))
|
||||||
|
(begin
|
||||||
|
(tcl-assert "bs-n-part0" "a" (get (nth ps 0) :value))
|
||||||
|
(tcl-assert "bs-n-part1" "\n" (get (nth ps 1) :value))
|
||||||
|
(tcl-assert "bs-n-part2" "b" (get (nth ps 2) :value))))
|
||||||
|
|
||||||
|
(let ((ps (tcl-parts "\"a\\tb\"" 0)))
|
||||||
|
(tcl-assert "bs-t-part1" "\t" (get (nth ps 1) :value)))
|
||||||
|
|
||||||
|
(let ((ps (tcl-parts "\"a\\\\b\"" 0)))
|
||||||
|
(tcl-assert "bs-bs-part1" "\\" (get (nth ps 1) :value)))
|
||||||
|
|
||||||
|
; mixed word: text + var + text in double-quoted
|
||||||
|
(let ((ps (tcl-parts "\"hello $name!\"" 0)))
|
||||||
|
(begin
|
||||||
|
(tcl-assert "mixed-text0" "hello " (get (nth ps 0) :value))
|
||||||
|
(tcl-assert "mixed-var1-type" "var" (get (nth ps 1) :type))
|
||||||
|
(tcl-assert "mixed-var1-name" "name" (get (nth ps 1) :name))
|
||||||
|
(tcl-assert "mixed-text2" "!" (get (nth ps 2) :value))))
|
||||||
|
|
||||||
|
; {*} expansion
|
||||||
|
(tcl-assert "expand-type" "expand" (get (tcl-word "{*}$list" 0) :type))
|
||||||
|
|
||||||
|
; line continuation between words
|
||||||
|
(tcl-assert "cont-words" 3 (len (tcl-cmd-words "set x \\\n 1")))
|
||||||
|
|
||||||
|
; continuation — third command word is correct
|
||||||
|
(tcl-assert "cont-word2-val" "1"
|
||||||
|
(get (tcl-part "set x \\\n 1" 2 0) :value))
|
||||||
|
|
||||||
|
|
||||||
|
; --- parser helpers ---
|
||||||
|
; tcl-parse is an alias for tcl-tokenize
|
||||||
|
(tcl-assert "parse-cmd-count" 1 (len (tcl-parse "set x 1")))
|
||||||
|
(tcl-assert "parse-2cmds" 2 (len (tcl-parse "set x 1; set y 2")))
|
||||||
|
|
||||||
|
; tcl-cmd-len
|
||||||
|
(tcl-assert "cmd-len-3" 3 (tcl-cmd-len (nth (tcl-parse "set x 1") 0)))
|
||||||
|
(tcl-assert "cmd-len-1" 1 (tcl-cmd-len (nth (tcl-parse "puts") 0)))
|
||||||
|
|
||||||
|
; tcl-word-simple? on braced word
|
||||||
|
(tcl-assert "simple-braced" true
|
||||||
|
(tcl-word-simple? (nth (get (nth (tcl-parse "{hello}") 0) :words) 0)))
|
||||||
|
|
||||||
|
; tcl-word-simple? on bare word with no subs
|
||||||
|
(tcl-assert "simple-bare" true
|
||||||
|
(tcl-word-simple? (nth (get (nth (tcl-parse "hello") 0) :words) 0)))
|
||||||
|
|
||||||
|
; tcl-word-simple? on word containing a var sub — false
|
||||||
|
(tcl-assert "simple-var-false" false
|
||||||
|
(tcl-word-simple? (nth (get (nth (tcl-parse "$x") 0) :words) 0)))
|
||||||
|
|
||||||
|
; tcl-word-simple? on word containing a cmd sub — false
|
||||||
|
(tcl-assert "simple-cmd-false" false
|
||||||
|
(tcl-word-simple? (nth (get (nth (tcl-parse "[expr 1]") 0) :words) 0)))
|
||||||
|
|
||||||
|
; tcl-word-literal on braced word
|
||||||
|
(tcl-assert "lit-braced" "hello world"
|
||||||
|
(tcl-word-literal (nth (get (nth (tcl-parse "{hello world}") 0) :words) 0)))
|
||||||
|
|
||||||
|
; tcl-word-literal on bare word
|
||||||
|
(tcl-assert "lit-bare" "hello"
|
||||||
|
(tcl-word-literal (nth (get (nth (tcl-parse "hello") 0) :words) 0)))
|
||||||
|
|
||||||
|
; tcl-word-literal on word with var sub returns nil
|
||||||
|
(tcl-assert "lit-var-nil" nil
|
||||||
|
(tcl-word-literal (nth (get (nth (tcl-parse "$x") 0) :words) 0)))
|
||||||
|
|
||||||
|
; tcl-nth-literal
|
||||||
|
(tcl-assert "nth-lit-0" "set"
|
||||||
|
(tcl-nth-literal (nth (tcl-parse "set x 1") 0) 0))
|
||||||
|
(tcl-assert "nth-lit-1" "x"
|
||||||
|
(tcl-nth-literal (nth (tcl-parse "set x 1") 0) 1))
|
||||||
|
(tcl-assert "nth-lit-2" "1"
|
||||||
|
(tcl-nth-literal (nth (tcl-parse "set x 1") 0) 2))
|
||||||
|
|
||||||
|
; tcl-nth-literal returns nil when word has subs
|
||||||
|
(tcl-assert "nth-lit-nil" nil
|
||||||
|
(tcl-nth-literal (nth (tcl-parse "set x $y") 0) 2))
|
||||||
|
|
||||||
|
|
||||||
|
(dict
|
||||||
|
"passed" tcl-parse-pass
|
||||||
|
"failed" tcl-parse-fail
|
||||||
|
"failures" tcl-parse-failures)))
|
||||||
308
lib/tcl/tokenizer.sx
Normal file
308
lib/tcl/tokenizer.sx
Normal file
@@ -0,0 +1,308 @@
|
|||||||
|
(define tcl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\r"))))
|
||||||
|
|
||||||
|
(define tcl-alpha?
|
||||||
|
(fn (c)
|
||||||
|
(and
|
||||||
|
(not (= c nil))
|
||||||
|
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))))
|
||||||
|
|
||||||
|
(define tcl-digit?
|
||||||
|
(fn (c) (and (not (= c nil)) (>= c "0") (<= c "9"))))
|
||||||
|
|
||||||
|
(define tcl-ident-start?
|
||||||
|
(fn (c) (or (tcl-alpha? c) (= c "_"))))
|
||||||
|
|
||||||
|
(define tcl-ident-char?
|
||||||
|
(fn (c) (or (tcl-ident-start? c) (tcl-digit? c))))
|
||||||
|
|
||||||
|
(define tcl-tokenize
|
||||||
|
(fn (src)
|
||||||
|
(let ((pos 0) (src-len (len src)) (commands (list)))
|
||||||
|
|
||||||
|
(define char-at
|
||||||
|
(fn (off)
|
||||||
|
(if (< (+ pos off) src-len) (nth src (+ pos off)) nil)))
|
||||||
|
|
||||||
|
(define cur (fn () (char-at 0)))
|
||||||
|
|
||||||
|
(define advance! (fn (n) (set! pos (+ pos n))))
|
||||||
|
|
||||||
|
(define skip-ws!
|
||||||
|
(fn ()
|
||||||
|
(when (tcl-ws? (cur))
|
||||||
|
(begin (advance! 1) (skip-ws!)))))
|
||||||
|
|
||||||
|
(define skip-to-eol!
|
||||||
|
(fn ()
|
||||||
|
(when (and (< pos src-len) (not (= (cur) "\n")))
|
||||||
|
(begin (advance! 1) (skip-to-eol!)))))
|
||||||
|
|
||||||
|
(define skip-brace-content!
|
||||||
|
(fn (d)
|
||||||
|
(when (and (< pos src-len) (> d 0))
|
||||||
|
(cond
|
||||||
|
((= (cur) "{") (begin (advance! 1) (skip-brace-content! (+ d 1))))
|
||||||
|
((= (cur) "}") (begin (advance! 1) (skip-brace-content! (- d 1))))
|
||||||
|
(else (begin (advance! 1) (skip-brace-content! d)))))))
|
||||||
|
|
||||||
|
(define skip-dquote-content!
|
||||||
|
(fn ()
|
||||||
|
(when (and (< pos src-len) (not (= (cur) "\"")))
|
||||||
|
(begin
|
||||||
|
(when (= (cur) "\\") (advance! 1))
|
||||||
|
(when (< pos src-len) (advance! 1))
|
||||||
|
(skip-dquote-content!)))))
|
||||||
|
|
||||||
|
(define parse-bs
|
||||||
|
(fn ()
|
||||||
|
(advance! 1)
|
||||||
|
(let ((c (cur)))
|
||||||
|
(cond
|
||||||
|
((= c nil) "\\")
|
||||||
|
((= c "n") (begin (advance! 1) "\n"))
|
||||||
|
((= c "t") (begin (advance! 1) "\t"))
|
||||||
|
((= c "r") (begin (advance! 1) "\r"))
|
||||||
|
((= c "\\") (begin (advance! 1) "\\"))
|
||||||
|
((= c "[") (begin (advance! 1) "["))
|
||||||
|
((= c "]") (begin (advance! 1) "]"))
|
||||||
|
((= c "{") (begin (advance! 1) "{"))
|
||||||
|
((= c "}") (begin (advance! 1) "}"))
|
||||||
|
((= c "$") (begin (advance! 1) "$"))
|
||||||
|
((= c ";") (begin (advance! 1) ";"))
|
||||||
|
((= c "\"") (begin (advance! 1) "\""))
|
||||||
|
((= c "'") (begin (advance! 1) "'"))
|
||||||
|
((= c " ") (begin (advance! 1) " "))
|
||||||
|
((= c "\n")
|
||||||
|
(begin
|
||||||
|
(advance! 1)
|
||||||
|
(skip-ws!)
|
||||||
|
" "))
|
||||||
|
(else (begin (advance! 1) (str "\\" c)))))))
|
||||||
|
|
||||||
|
(define parse-cmd-sub
|
||||||
|
(fn ()
|
||||||
|
(advance! 1)
|
||||||
|
(let ((start pos) (depth 1))
|
||||||
|
(define scan!
|
||||||
|
(fn ()
|
||||||
|
(when (and (< pos src-len) (> depth 0))
|
||||||
|
(cond
|
||||||
|
((= (cur) "[")
|
||||||
|
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
|
||||||
|
((= (cur) "]")
|
||||||
|
(begin
|
||||||
|
(set! depth (- depth 1))
|
||||||
|
(when (> depth 0) (advance! 1))
|
||||||
|
(scan!)))
|
||||||
|
((= (cur) "{")
|
||||||
|
(begin (advance! 1) (skip-brace-content! 1) (scan!)))
|
||||||
|
((= (cur) "\"")
|
||||||
|
(begin
|
||||||
|
(advance! 1)
|
||||||
|
(skip-dquote-content!)
|
||||||
|
(when (= (cur) "\"") (advance! 1))
|
||||||
|
(scan!)))
|
||||||
|
((= (cur) "\\")
|
||||||
|
(begin (advance! 1) (when (< pos src-len) (advance! 1)) (scan!)))
|
||||||
|
(else (begin (advance! 1) (scan!)))))))
|
||||||
|
(scan!)
|
||||||
|
(let ((src-text (slice src start pos)))
|
||||||
|
(begin
|
||||||
|
(when (= (cur) "]") (advance! 1))
|
||||||
|
{:type "cmd" :src src-text})))))
|
||||||
|
|
||||||
|
(define scan-name!
|
||||||
|
(fn ()
|
||||||
|
(when (and (< pos src-len) (not (= (cur) "}")))
|
||||||
|
(begin (advance! 1) (scan-name!)))))
|
||||||
|
|
||||||
|
(define scan-ns-name!
|
||||||
|
(fn ()
|
||||||
|
(cond
|
||||||
|
((tcl-ident-char? (cur))
|
||||||
|
(begin (advance! 1) (scan-ns-name!)))
|
||||||
|
((and (= (cur) ":") (= (char-at 1) ":"))
|
||||||
|
(begin (advance! 2) (scan-ns-name!)))
|
||||||
|
(else nil))))
|
||||||
|
|
||||||
|
(define scan-klit!
|
||||||
|
(fn ()
|
||||||
|
(when (and (< pos src-len)
|
||||||
|
(not (= (cur) ")"))
|
||||||
|
(not (= (cur) "$"))
|
||||||
|
(not (= (cur) "["))
|
||||||
|
(not (= (cur) "\\")))
|
||||||
|
(begin (advance! 1) (scan-klit!)))))
|
||||||
|
|
||||||
|
(define scan-key!
|
||||||
|
(fn (kp)
|
||||||
|
(when (and (< pos src-len) (not (= (cur) ")")))
|
||||||
|
(cond
|
||||||
|
((= (cur) "$")
|
||||||
|
(begin (append! kp (parse-var-sub)) (scan-key! kp)))
|
||||||
|
((= (cur) "[")
|
||||||
|
(begin (append! kp (parse-cmd-sub)) (scan-key! kp)))
|
||||||
|
((= (cur) "\\")
|
||||||
|
(begin
|
||||||
|
(append! kp {:type "text" :value (parse-bs)})
|
||||||
|
(scan-key! kp)))
|
||||||
|
(else
|
||||||
|
(let ((kstart pos))
|
||||||
|
(begin
|
||||||
|
(scan-klit!)
|
||||||
|
(append! kp {:type "text" :value (slice src kstart pos)})
|
||||||
|
(scan-key! kp))))))))
|
||||||
|
|
||||||
|
(define parse-var-sub
|
||||||
|
(fn ()
|
||||||
|
(advance! 1)
|
||||||
|
(cond
|
||||||
|
((= (cur) "{")
|
||||||
|
(begin
|
||||||
|
(advance! 1)
|
||||||
|
(let ((start pos))
|
||||||
|
(begin
|
||||||
|
(scan-name!)
|
||||||
|
(let ((name (slice src start pos)))
|
||||||
|
(begin
|
||||||
|
(when (= (cur) "}") (advance! 1))
|
||||||
|
{:type "var" :name name}))))))
|
||||||
|
((tcl-ident-start? (cur))
|
||||||
|
(let ((start pos))
|
||||||
|
(begin
|
||||||
|
(scan-ns-name!)
|
||||||
|
(let ((name (slice src start pos)))
|
||||||
|
(if (= (cur) "(")
|
||||||
|
(begin
|
||||||
|
(advance! 1)
|
||||||
|
(let ((key-parts (list)))
|
||||||
|
(begin
|
||||||
|
(scan-key! key-parts)
|
||||||
|
(when (= (cur) ")") (advance! 1))
|
||||||
|
{:type "var-arr" :name name :key key-parts})))
|
||||||
|
{:type "var" :name name})))))
|
||||||
|
(else {:type "text" :value "$"}))))
|
||||||
|
|
||||||
|
(define scan-lit!
|
||||||
|
(fn (stop?)
|
||||||
|
(when (and (< pos src-len)
|
||||||
|
(not (stop? (cur)))
|
||||||
|
(not (= (cur) "$"))
|
||||||
|
(not (= (cur) "["))
|
||||||
|
(not (= (cur) "\\")))
|
||||||
|
(begin (advance! 1) (scan-lit! stop?)))))
|
||||||
|
|
||||||
|
(define parse-word-parts!
|
||||||
|
(fn (parts stop?)
|
||||||
|
(when (and (< pos src-len) (not (stop? (cur))))
|
||||||
|
(cond
|
||||||
|
((= (cur) "$")
|
||||||
|
(begin (append! parts (parse-var-sub)) (parse-word-parts! parts stop?)))
|
||||||
|
((= (cur) "[")
|
||||||
|
(begin (append! parts (parse-cmd-sub)) (parse-word-parts! parts stop?)))
|
||||||
|
((= (cur) "\\")
|
||||||
|
(begin
|
||||||
|
(append! parts {:type "text" :value (parse-bs)})
|
||||||
|
(parse-word-parts! parts stop?)))
|
||||||
|
(else
|
||||||
|
(let ((start pos))
|
||||||
|
(begin
|
||||||
|
(scan-lit! stop?)
|
||||||
|
(when (> pos start)
|
||||||
|
(append! parts {:type "text" :value (slice src start pos)}))
|
||||||
|
(parse-word-parts! parts stop?))))))))
|
||||||
|
|
||||||
|
(define parse-brace-word
|
||||||
|
(fn ()
|
||||||
|
(advance! 1)
|
||||||
|
(let ((depth 1) (start pos))
|
||||||
|
(define scan!
|
||||||
|
(fn ()
|
||||||
|
(when (and (< pos src-len) (> depth 0))
|
||||||
|
(cond
|
||||||
|
((= (cur) "{")
|
||||||
|
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
|
||||||
|
((= (cur) "}")
|
||||||
|
(begin (set! depth (- depth 1)) (when (> depth 0) (advance! 1)) (scan!)))
|
||||||
|
(else (begin (advance! 1) (scan!)))))))
|
||||||
|
(scan!)
|
||||||
|
(let ((value (slice src start pos)))
|
||||||
|
(begin
|
||||||
|
(when (= (cur) "}") (advance! 1))
|
||||||
|
{:type "braced" :value value})))))
|
||||||
|
|
||||||
|
(define parse-dquote-word
|
||||||
|
(fn ()
|
||||||
|
(advance! 1)
|
||||||
|
(let ((parts (list)))
|
||||||
|
(begin
|
||||||
|
(parse-word-parts! parts (fn (c) (or (= c "\"") (= c nil))))
|
||||||
|
(when (= (cur) "\"") (advance! 1))
|
||||||
|
{:type "compound" :parts parts :quoted true}))))
|
||||||
|
|
||||||
|
(define parse-bare-word
|
||||||
|
(fn ()
|
||||||
|
(let ((parts (list)))
|
||||||
|
(begin
|
||||||
|
(parse-word-parts!
|
||||||
|
parts
|
||||||
|
(fn (c) (or (tcl-ws? c) (= c "\n") (= c ";") (= c nil))))
|
||||||
|
{:type "compound" :parts parts :quoted false}))))
|
||||||
|
|
||||||
|
(define parse-word-no-expand
|
||||||
|
(fn ()
|
||||||
|
(cond
|
||||||
|
((= (cur) "{") (parse-brace-word))
|
||||||
|
((= (cur) "\"") (parse-dquote-word))
|
||||||
|
(else (parse-bare-word)))))
|
||||||
|
|
||||||
|
(define parse-word
|
||||||
|
(fn ()
|
||||||
|
(cond
|
||||||
|
((and (= (cur) "{") (= (char-at 1) "*") (= (char-at 2) "}"))
|
||||||
|
(begin
|
||||||
|
(advance! 3)
|
||||||
|
{:type "expand" :word (parse-word-no-expand)}))
|
||||||
|
((= (cur) "{") (parse-brace-word))
|
||||||
|
((= (cur) "\"") (parse-dquote-word))
|
||||||
|
(else (parse-bare-word)))))
|
||||||
|
|
||||||
|
(define parse-words!
|
||||||
|
(fn (words)
|
||||||
|
(skip-ws!)
|
||||||
|
(cond
|
||||||
|
((or (= (cur) nil) (= (cur) "\n") (= (cur) ";")) nil)
|
||||||
|
((and (= (cur) "\\") (= (char-at 1) "\n"))
|
||||||
|
(begin (advance! 2) (skip-ws!) (parse-words! words)))
|
||||||
|
(else
|
||||||
|
(begin
|
||||||
|
(append! words (parse-word))
|
||||||
|
(parse-words! words))))))
|
||||||
|
|
||||||
|
(define skip-seps!
|
||||||
|
(fn ()
|
||||||
|
(when (< pos src-len)
|
||||||
|
(cond
|
||||||
|
((or (tcl-ws? (cur)) (= (cur) "\n") (= (cur) ";"))
|
||||||
|
(begin (advance! 1) (skip-seps!)))
|
||||||
|
((and (= (cur) "\\") (= (char-at 1) "\n"))
|
||||||
|
(begin (advance! 2) (skip-seps!)))
|
||||||
|
(else nil)))))
|
||||||
|
|
||||||
|
(define parse-all!
|
||||||
|
(fn ()
|
||||||
|
(skip-seps!)
|
||||||
|
(when (< pos src-len)
|
||||||
|
(cond
|
||||||
|
((= (cur) "#")
|
||||||
|
(begin (skip-to-eol!) (parse-all!)))
|
||||||
|
(else
|
||||||
|
(let ((words (list)))
|
||||||
|
(begin
|
||||||
|
(parse-words! words)
|
||||||
|
(when (> (len words) 0)
|
||||||
|
(append! commands {:type "command" :words words}))
|
||||||
|
(parse-all!))))))))
|
||||||
|
|
||||||
|
(parse-all!)
|
||||||
|
commands)))
|
||||||
@@ -125,7 +125,7 @@ Each item: implement → tests → update progress. Mark `[x]` when tests green.
|
|||||||
- [x] Rest params (`...rest` → `&rest`)
|
- [x] Rest params (`...rest` → `&rest`)
|
||||||
- [x] Default parameters (desugar to `if (param === undefined) param = default`)
|
- [x] Default parameters (desugar to `if (param === undefined) param = default`)
|
||||||
- [ ] `var` hoisting (deferred — treated as `let` for now)
|
- [ ] `var` hoisting (deferred — treated as `let` for now)
|
||||||
- [x] `let`/`const` TDZ — sentinel infrastructure (`__js_tdz_sentinel__`, `js-tdz?`, `js-tdz-check` in runtime.sx)
|
- [ ] `let`/`const` TDZ (deferred)
|
||||||
|
|
||||||
### Phase 8 — Objects, prototypes, `this`
|
### Phase 8 — Objects, prototypes, `this`
|
||||||
- [x] Property descriptors (simplified — plain-dict `__proto__` chain, `js-set-prop` mutates)
|
- [x] Property descriptors (simplified — plain-dict `__proto__` chain, `js-set-prop` mutates)
|
||||||
@@ -241,8 +241,6 @@ Append-only record of completed iterations. Loop writes one line per iteration:
|
|||||||
- 29× Timeout (slow string/regex loops)
|
- 29× Timeout (slow string/regex loops)
|
||||||
- 16× ReferenceError — still some missing globals
|
- 16× ReferenceError — still some missing globals
|
||||||
|
|
||||||
- 2026-04-25 — **Regex engine (lib/js/regex.sx) + let/const TDZ infrastructure.** New file `lib/js/regex.sx`: 39-form pure-SX recursive backtracking engine installed via `js-regex-platform-override!`. Covers literals, `.`, `\d\w\s` + negations, `[abc]/[^abc]/[a-z]` char classes, `^\$\b\B` anchors, greedy+lazy quantifiers (`* + ? {n,m} *? +? ??`), capturing groups, non-capturing `(?:...)`, alternation `a|b`, flags `i`/`g`/`m`. Groups: match inner first → set capture → match rest (correct boundary), avoids including rest-nodes content in capture. Greedy: expand-first then backtrack (correct longest-match semantics). `js-regex-match-all` for String.matchAll. Fixed `String.prototype.match` to use platform engine (was calling stub). TDZ infrastructure added to `runtime.sx`: `__js_tdz_sentinel__` (unique sentinel dict), `js-tdz?`, `js-tdz-check`. `transpile.sx` passes `kind` through `js-transpile-var → js-vardecl-forms` (no behavioral change yet — infrastructure ready). `test262-runner.py` and `conformance.sh` updated to load `regex.sx` as epoch 6/50. Unit: **559/560** (was 522/522 before regex tests added, now +38 new tests; 1 pre-existing backtick failure). Conformance: **148/148** (unchanged). Gotchas: (1) `sx_insert_near` on a pattern inside a top-level function body inserts there (not at top level) — need to use `sx_insert_near` on a top-level symbol name. (2) Greedy quantifier must expand-first before trying rest-nodes; the naive "try rest at each step" produces lazy behavior. (3) Capturing groups must match inner nodes in isolation first (to get the group's end position) then match rest — appending inner+rest-nodes would include rest in the capture string.
|
|
||||||
|
|
||||||
## Phase 3-5 gotchas
|
## Phase 3-5 gotchas
|
||||||
|
|
||||||
Worth remembering for later phases:
|
Worth remembering for later phases:
|
||||||
@@ -261,7 +259,17 @@ Anything that would require a change outside `lib/js/` goes here with a minimal
|
|||||||
|
|
||||||
- **Pending-Promise await** — our `js-await-value` drains microtasks and unwraps *settled* Promises; it cannot truly suspend a JS fiber and resume later. Every Promise that settles eventually through the synchronous `resolve`/`reject` + microtask path works. A Promise that never settles without external input (e.g. a real `setTimeout` waiting on the event loop) would hit the `"await on pending Promise (no scheduler)"` error. Proper async suspension would need the JS eval path to run under `cek-step-loop` (not `eval-expr` → `cek-run`) and treat `await pending-Promise` as a `perform` that registers a resume thunk on the Promise's callback list. Non-trivial plumbing; out of scope for this phase. Consider it a Phase 9.5 item.
|
- **Pending-Promise await** — our `js-await-value` drains microtasks and unwraps *settled* Promises; it cannot truly suspend a JS fiber and resume later. Every Promise that settles eventually through the synchronous `resolve`/`reject` + microtask path works. A Promise that never settles without external input (e.g. a real `setTimeout` waiting on the event loop) would hit the `"await on pending Promise (no scheduler)"` error. Proper async suspension would need the JS eval path to run under `cek-step-loop` (not `eval-expr` → `cek-run`) and treat `await pending-Promise` as a `perform` that registers a resume thunk on the Promise's callback list. Non-trivial plumbing; out of scope for this phase. Consider it a Phase 9.5 item.
|
||||||
|
|
||||||
- ~~**Regex platform primitives**~~ **RESOLVED** — `lib/js/regex.sx` ships a pure-SX recursive backtracking engine. Installs via `js-regex-platform-override!` at load. Covers: literals, `.`, `\d\w\s` and negations, `[abc]` / `[^abc]` / ranges, `^` `$` `\b \B`, `* + ? {n,m}` (greedy + lazy), capturing + non-capturing groups, alternation `a|b`, flags `i` (case-insensitive), `g` (global, advances lastIndex), `m` (multiline anchors). `js-regex-match-all` for String.matchAll. String.prototype.match regex path updated to use platform engine (was calling stub). 34 new unit tests added (5000–5033). Conformance: 148/148 (unchanged — slice had no regex fixtures).
|
- **Regex platform primitives** — runtime ships a substring-based stub (`js-regex-stub-test` / `-exec`). Overridable via `js-regex-platform-override!` so a real engine can be dropped in. Required platform-primitive surface:
|
||||||
|
- `regex-compile pattern flags` — build an opaque compiled handle
|
||||||
|
- `regex-test compiled s` → bool
|
||||||
|
- `regex-exec compiled s` → match dict `{match index input groups}` or nil
|
||||||
|
- `regex-match-all compiled s` → list of match dicts (or empty list)
|
||||||
|
- `regex-replace compiled s replacement` → string
|
||||||
|
- `regex-replace-fn compiled s fn` → string (fn receives match+groups, returns string)
|
||||||
|
- `regex-split compiled s` → list of strings
|
||||||
|
- `regex-source compiled` → string
|
||||||
|
- `regex-flags compiled` → string
|
||||||
|
Ideally a single `(js-regex-platform-install-all! platform)` entry point the host calls once at boot. OCaml would wrap `Str` / `Re` or a dedicated regex lib; JS host can just delegate to the native `RegExp`.
|
||||||
|
|
||||||
- **Math trig + transcendental primitives missing.** The scoreboard shows 34× "TypeError: not a function" across the Math category — every one a test calling `Math.sin/cos/tan/log/…` on our runtime. We shim `Math` via `js-global`; the SX runtime supplies `sqrt`, `pow`, `abs`, `floor`, `ceil`, `round` and a hand-rolled `trunc`/`sign`/`cbrt`/`hypot`. Nothing else. Missing platform primitives (each is a one-line OCaml/JS binding, but a primitive all the same — we can't land approximation polynomials from inside the JS shim, they'd blow `Math.sin(1e308)` precision):
|
- **Math trig + transcendental primitives missing.** The scoreboard shows 34× "TypeError: not a function" across the Math category — every one a test calling `Math.sin/cos/tan/log/…` on our runtime. We shim `Math` via `js-global`; the SX runtime supplies `sqrt`, `pow`, `abs`, `floor`, `ceil`, `round` and a hand-rolled `trunc`/`sign`/`cbrt`/`hypot`. Nothing else. Missing platform primitives (each is a one-line OCaml/JS binding, but a primitive all the same — we can't land approximation polynomials from inside the JS shim, they'd blow `Math.sin(1e308)` precision):
|
||||||
- Trig: `sin`, `cos`, `tan`, `asin`, `acos`, `atan`, `atan2`
|
- Trig: `sin`, `cos`, `tan`, `asin`, `acos`, `atan`, `atan2`
|
||||||
|
|||||||
@@ -50,7 +50,7 @@ Core mapping:
|
|||||||
## Roadmap
|
## Roadmap
|
||||||
|
|
||||||
### Phase 1 — tokenizer + parser (the Dodekalogue)
|
### Phase 1 — tokenizer + parser (the Dodekalogue)
|
||||||
- [ ] Tokenizer applying the 12 rules:
|
- [x] Tokenizer applying the 12 rules:
|
||||||
1. Commands separated by `;` or newlines
|
1. Commands separated by `;` or newlines
|
||||||
2. Words separated by whitespace within a command
|
2. Words separated by whitespace within a command
|
||||||
3. Double-quoted words: `\` escapes + `[…]` + `${…}` + `$var` substitution
|
3. Double-quoted words: `\` escapes + `[…]` + `${…}` + `$var` substitution
|
||||||
@@ -63,8 +63,8 @@ Core mapping:
|
|||||||
10. Order of substitution is left-to-right, single-pass
|
10. Order of substitution is left-to-right, single-pass
|
||||||
11. Substitutions don't recurse — substituted text is not re-parsed
|
11. Substitutions don't recurse — substituted text is not re-parsed
|
||||||
12. The result of any substitution is the value, not a new script
|
12. The result of any substitution is the value, not a new script
|
||||||
- [ ] Parser: script = list of commands; command = list of words; word = literal string + list of substitutions
|
- [x] Parser: script = list of commands; command = list of words; word = literal string + list of substitutions
|
||||||
- [ ] Unit tests in `lib/tcl/tests/parse.sx`
|
- [x] Unit tests in `lib/tcl/tests/parse.sx`
|
||||||
|
|
||||||
### Phase 2 — sequential eval + core commands
|
### Phase 2 — sequential eval + core commands
|
||||||
- [ ] `tcl-eval-script`: walk command list, dispatch each first-word into command table
|
- [ ] `tcl-eval-script`: walk command list, dispatch each first-word into command table
|
||||||
@@ -120,7 +120,8 @@ Core mapping:
|
|||||||
|
|
||||||
_Newest first._
|
_Newest first._
|
||||||
|
|
||||||
- _(none yet)_
|
- 2026-04-25: Phase 1 parser — `lib/tcl/parser.sx`, word-simple?/word-literal helpers, 67 tests green, commit 6ee05259
|
||||||
|
- 2026-04-25: Phase 1 tokenizer (Dodekalogue) — `lib/tcl/tokenizer.sx`, 52 tests green, commit 666e29d5
|
||||||
|
|
||||||
## Blockers
|
## Blockers
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user