Files
rose-ash/lib/js/lexer.sx
giles ce46420c2e js-on-sx: regex literal lex+parse+transpile+runtime stub
Lexer: js-regex-context? disambiguates / based on prior token;
read-regex handles [...] classes and \ escapes. Emits
{:type "regex" :value {:pattern :flags}}.

Parser: new primary branch → (js-regex pat flags).

Transpile: (js-regex-new pat flags).

Runtime: js-regex? predicate, js-regex-new builds tagged dict with
source/flags/global/ignoreCase/multiline/sticky/unicode/dotAll/
hasIndices/lastIndex. js-regex-invoke-method dispatches .test/.exec/
.toString. js-invoke-method detects regex receivers. Stub engine
uses js-string-index-of; __js_regex_platform__ + override! let a
real engine plug in later.

Runner: repeatable --filter flags (OR'd).

308/310 unit (+30 regex tests), 148/148 slice unchanged.
2026-04-23 20:27:19 +00:00

610 lines
22 KiB
Plaintext

;; lib/js/lexer.sx — JavaScript source → token stream
;;
;; Tokens: {:type T :value V :pos P}
;; Types:
;; "number" — numeric literals (decoded into value as number)
;; "string" — string literals (decoded, escape sequences processed)
;; "template"— template literal body (no interpolation split yet — deferred)
;; "ident" — identifier (not a reserved word)
;; "keyword" — reserved word
;; "punct" — ( ) [ ] { } , ; : . ...
;; "op" — all operator tokens (incl. = == === !== < > etc.)
;; "eof" — end of input
;;
;; NOTE: `cond` clauses take exactly ONE body expression — multi-body
;; clauses must wrap their body in `(do ...)`.
;; ── Token constructor ─────────────────────────────────────────────
(define js-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
(define js-digit? (fn (c) (and (>= c "0") (<= c "9"))))
(define
js-hex-digit?
(fn
(c)
(or
(js-digit? c)
(and (>= c "a") (<= c "f"))
(and (>= c "A") (<= c "F")))))
(define
js-letter?
(fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
(define js-ident-start? (fn (c) (or (js-letter? c) (= c "_") (= c "$"))))
(define js-ident-char? (fn (c) (or (js-ident-start? c) (js-digit? c))))
(define js-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; ── Reserved words ────────────────────────────────────────────────
(define
js-keywords
(list
"break"
"case"
"catch"
"class"
"const"
"continue"
"debugger"
"default"
"delete"
"do"
"else"
"export"
"extends"
"false"
"finally"
"for"
"function"
"if"
"import"
"in"
"instanceof"
"new"
"null"
"return"
"super"
"switch"
"this"
"throw"
"true"
"try"
"typeof"
"undefined"
"var"
"void"
"while"
"with"
"yield"
"let"
"static"
"async"
"await"
"of"))
(define js-keyword? (fn (word) (contains? js-keywords word)))
;; ── Main tokenizer ────────────────────────────────────────────────
(define
js-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
js-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define cur (fn () (js-peek 0)))
(define advance! (fn (n) (set! pos (+ pos n))))
(define
at?
(fn
(s)
(let
((sl (len s)))
(and (<= (+ pos sl) src-len) (= (slice src pos (+ pos sl)) s)))))
(define
js-emit!
(fn
(type value start)
(append! tokens (js-make-token type value start))))
(define
skip-line-comment!
(fn
()
(when
(and (< pos src-len) (not (= (cur) "\n")))
(do (advance! 1) (skip-line-comment!)))))
(define
skip-block-comment!
(fn
()
(cond
((>= pos src-len) nil)
((and (= (cur) "*") (< (+ pos 1) src-len) (= (js-peek 1) "/"))
(advance! 2))
(else (do (advance! 1) (skip-block-comment!))))))
(define
skip-ws!
(fn
()
(cond
((>= pos src-len) nil)
((js-ws? (cur)) (do (advance! 1) (skip-ws!)))
((and (= (cur) "/") (< (+ pos 1) src-len) (= (js-peek 1) "/"))
(do (advance! 2) (skip-line-comment!) (skip-ws!)))
((and (= (cur) "/") (< (+ pos 1) src-len) (= (js-peek 1) "*"))
(do (advance! 2) (skip-block-comment!) (skip-ws!)))
(else nil))))
(define
read-ident
(fn
(start)
(do
(when
(and (< pos src-len) (js-ident-char? (cur)))
(do (advance! 1) (read-ident start)))
(slice src start pos))))
(define
read-decimal-digits!
(fn
()
(when
(and (< pos src-len) (js-digit? (cur)))
(do (advance! 1) (read-decimal-digits!)))))
(define
read-hex-digits!
(fn
()
(when
(and (< pos src-len) (js-hex-digit? (cur)))
(do (advance! 1) (read-hex-digits!)))))
(define
read-exp-part!
(fn
()
(when
(and (< pos src-len) (or (= (cur) "e") (= (cur) "E")))
(let
((p1 (js-peek 1)))
(when
(or
(and (not (= p1 nil)) (js-digit? p1))
(and
(or (= p1 "+") (= p1 "-"))
(< (+ pos 2) src-len)
(js-digit? (js-peek 2))))
(do
(advance! 1)
(when
(and
(< pos src-len)
(or (= (cur) "+") (= (cur) "-")))
(advance! 1))
(read-decimal-digits!)))))))
(define
read-number
(fn
(start)
(cond
((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (js-peek 1) "x") (= (js-peek 1) "X")))
(do
(advance! 2)
(read-hex-digits!)
(let
((raw (slice src (+ start 2) pos)))
(parse-number (str "0x" raw)))))
(else
(do
(read-decimal-digits!)
(when
(and
(< pos src-len)
(= (cur) ".")
(< (+ pos 1) src-len)
(js-digit? (js-peek 1)))
(do (advance! 1) (read-decimal-digits!)))
(read-exp-part!)
(parse-number (slice src start pos)))))))
(define
read-dot-number
(fn
(start)
(do
(advance! 1)
(read-decimal-digits!)
(read-exp-part!)
(parse-number (slice src start pos)))))
(define
read-string
(fn
(quote-char)
(let
((chars (list)))
(advance! 1)
(define
loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(do
(advance! 1)
(when
(< pos src-len)
(let
((ch (cur)))
(do
(cond
((= ch "n") (append! chars "\n"))
((= ch "t") (append! chars "\t"))
((= ch "r") (append! chars "\r"))
((= ch "\\") (append! chars "\\"))
((= ch "'") (append! chars "'"))
((= ch "\"") (append! chars "\""))
((= ch "`") (append! chars "`"))
((= ch "0") (append! chars "\\0"))
((= ch "b") (append! chars "\\b"))
((= ch "f") (append! chars "\\f"))
((= ch "v") (append! chars "\\v"))
(else (append! chars ch)))
(advance! 1))))
(loop)))
((= (cur) quote-char) (advance! 1))
(else (do (append! chars (cur)) (advance! 1) (loop))))))
(loop)
(join "" chars))))
(define
read-template
(fn
()
(let
((parts (list)) (chars (list)))
(advance! 1)
(define
flush-chars!
(fn
()
(when
(> (len chars) 0)
(do
(append! parts (list "str" (join "" chars)))
(set! chars (list))))))
(define
read-expr-source!
(fn
()
(let
((buf (list)) (depth 1))
(define
expr-loop
(fn
()
(cond
((>= pos src-len) nil)
((and (= (cur) "}") (= depth 1)) (advance! 1))
((= (cur) "}")
(do
(append! buf (cur))
(set! depth (- depth 1))
(advance! 1)
(expr-loop)))
((= (cur) "{")
(do
(append! buf (cur))
(set! depth (+ depth 1))
(advance! 1)
(expr-loop)))
((or (= (cur) "\"") (= (cur) "'"))
(let
((q (cur)))
(do
(append! buf q)
(advance! 1)
(define
sloop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(do
(append! buf (cur))
(advance! 1)
(when
(< pos src-len)
(do
(append! buf (cur))
(advance! 1)))
(sloop)))
((= (cur) q)
(do (append! buf (cur)) (advance! 1)))
(else
(do
(append! buf (cur))
(advance! 1)
(sloop))))))
(sloop)
(expr-loop))))
(else
(do (append! buf (cur)) (advance! 1) (expr-loop))))))
(expr-loop)
(join "" buf))))
(define
loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "`") (advance! 1))
((and (= (cur) "$") (< (+ pos 1) src-len) (= (js-peek 1) "{"))
(do
(flush-chars!)
(advance! 2)
(let
((src (read-expr-source!)))
(append! parts (list "expr" src)))
(loop)))
((= (cur) "\\")
(do
(advance! 1)
(when
(< pos src-len)
(let
((ch (cur)))
(do
(cond
((= ch "n") (append! chars "\n"))
((= ch "t") (append! chars "\t"))
((= ch "r") (append! chars "\r"))
((= ch "\\") (append! chars "\\"))
((= ch "'") (append! chars "'"))
((= ch "\"") (append! chars "\""))
((= ch "`") (append! chars "`"))
((= ch "$") (append! chars "$"))
((= ch "0") (append! chars "0"))
((= ch "b") (append! chars "b"))
((= ch "f") (append! chars "f"))
((= ch "v") (append! chars "v"))
(else (append! chars ch)))
(advance! 1))))
(loop)))
(else (do (append! chars (cur)) (advance! 1) (loop))))))
(loop)
(flush-chars!)
(if
(= (len parts) 0)
""
(if
(and (= (len parts) 1) (= (nth (nth parts 0) 0) "str"))
(nth (nth parts 0) 1)
parts)))))
(define
js-regex-context?
(fn
()
(if
(= (len tokens) 0)
true
(let
((tk (nth tokens (- (len tokens) 1))))
(let
((ty (dict-get tk "type")) (vv (dict-get tk "value")))
(cond
((= ty "punct")
(and (not (= vv ")")) (not (= vv "]"))))
((= ty "op") true)
((= ty "keyword")
(contains?
(list
"return"
"typeof"
"in"
"of"
"throw"
"new"
"delete"
"instanceof"
"void"
"yield"
"await"
"case"
"do"
"else")
vv))
(else false)))))))
(define
read-regex
(fn
()
(let
((buf (list)) (in-class false))
(advance! 1)
(define
body-loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(begin
(append! buf (cur))
(advance! 1)
(when
(< pos src-len)
(begin (append! buf (cur)) (advance! 1)))
(body-loop)))
((= (cur) "[")
(begin
(set! in-class true)
(append! buf (cur))
(advance! 1)
(body-loop)))
((= (cur) "]")
(begin
(set! in-class false)
(append! buf (cur))
(advance! 1)
(body-loop)))
((and (= (cur) "/") (not in-class)) (advance! 1))
(else
(begin (append! buf (cur)) (advance! 1) (body-loop))))))
(body-loop)
(let
((flags-buf (list)))
(define
flags-loop
(fn
()
(when
(and (< pos src-len) (js-ident-char? (cur)))
(begin
(append! flags-buf (cur))
(advance! 1)
(flags-loop)))))
(flags-loop)
{:pattern (join "" buf) :flags (join "" flags-buf)}))))
(define
try-op-4!
(fn
(start)
(cond
((at? ">>>=")
(do (js-emit! "op" ">>>=" start) (advance! 4) true))
(else false))))
(define
try-op-3!
(fn
(start)
(cond
((at? "===")
(do (js-emit! "op" "===" start) (advance! 3) true))
((at? "!==")
(do (js-emit! "op" "!==" start) (advance! 3) true))
((at? "**=")
(do (js-emit! "op" "**=" start) (advance! 3) true))
((at? "<<=")
(do (js-emit! "op" "<<=" start) (advance! 3) true))
((at? ">>=")
(do (js-emit! "op" ">>=" start) (advance! 3) true))
((at? ">>>")
(do (js-emit! "op" ">>>" start) (advance! 3) true))
((at? "&&=")
(do (js-emit! "op" "&&=" start) (advance! 3) true))
((at? "||=")
(do (js-emit! "op" "||=" start) (advance! 3) true))
((at? "??=")
(do (js-emit! "op" "??=" start) (advance! 3) true))
((at? "...")
(do (js-emit! "punct" "..." start) (advance! 3) true))
(else false))))
(define
try-op-2!
(fn
(start)
(cond
((at? "==") (do (js-emit! "op" "==" start) (advance! 2) true))
((at? "!=") (do (js-emit! "op" "!=" start) (advance! 2) true))
((at? "<=") (do (js-emit! "op" "<=" start) (advance! 2) true))
((at? ">=") (do (js-emit! "op" ">=" start) (advance! 2) true))
((at? "&&") (do (js-emit! "op" "&&" start) (advance! 2) true))
((at? "||") (do (js-emit! "op" "||" start) (advance! 2) true))
((at? "??") (do (js-emit! "op" "??" start) (advance! 2) true))
((at? "=>") (do (js-emit! "op" "=>" start) (advance! 2) true))
((at? "**") (do (js-emit! "op" "**" start) (advance! 2) true))
((at? "<<") (do (js-emit! "op" "<<" start) (advance! 2) true))
((at? ">>") (do (js-emit! "op" ">>" start) (advance! 2) true))
((at? "++") (do (js-emit! "op" "++" start) (advance! 2) true))
((at? "--") (do (js-emit! "op" "--" start) (advance! 2) true))
((at? "+=") (do (js-emit! "op" "+=" start) (advance! 2) true))
((at? "-=") (do (js-emit! "op" "-=" start) (advance! 2) true))
((at? "*=") (do (js-emit! "op" "*=" start) (advance! 2) true))
((at? "/=") (do (js-emit! "op" "/=" start) (advance! 2) true))
((at? "%=") (do (js-emit! "op" "%=" start) (advance! 2) true))
((at? "&=") (do (js-emit! "op" "&=" start) (advance! 2) true))
((at? "|=") (do (js-emit! "op" "|=" start) (advance! 2) true))
((at? "^=") (do (js-emit! "op" "^=" start) (advance! 2) true))
((at? "?.") (do (js-emit! "op" "?." start) (advance! 2) true))
(else false))))
(define
emit-one-op!
(fn
(ch start)
(cond
((= ch "(") (do (js-emit! "punct" "(" start) (advance! 1)))
((= ch ")") (do (js-emit! "punct" ")" start) (advance! 1)))
((= ch "[") (do (js-emit! "punct" "[" start) (advance! 1)))
((= ch "]") (do (js-emit! "punct" "]" start) (advance! 1)))
((= ch "{") (do (js-emit! "punct" "{" start) (advance! 1)))
((= ch "}") (do (js-emit! "punct" "}" start) (advance! 1)))
((= ch ",") (do (js-emit! "punct" "," start) (advance! 1)))
((= ch ";") (do (js-emit! "punct" ";" start) (advance! 1)))
((= ch ":") (do (js-emit! "punct" ":" start) (advance! 1)))
((= ch ".") (do (js-emit! "punct" "." start) (advance! 1)))
((= ch "?") (do (js-emit! "op" "?" start) (advance! 1)))
((= ch "+") (do (js-emit! "op" "+" start) (advance! 1)))
((= ch "-") (do (js-emit! "op" "-" start) (advance! 1)))
((= ch "*") (do (js-emit! "op" "*" start) (advance! 1)))
((= ch "/") (do (js-emit! "op" "/" start) (advance! 1)))
((= ch "%") (do (js-emit! "op" "%" start) (advance! 1)))
((= ch "=") (do (js-emit! "op" "=" start) (advance! 1)))
((= ch "<") (do (js-emit! "op" "<" start) (advance! 1)))
((= ch ">") (do (js-emit! "op" ">" start) (advance! 1)))
((= ch "!") (do (js-emit! "op" "!" start) (advance! 1)))
((= ch "&") (do (js-emit! "op" "&" start) (advance! 1)))
((= ch "|") (do (js-emit! "op" "|" start) (advance! 1)))
((= ch "^") (do (js-emit! "op" "^" start) (advance! 1)))
((= ch "~") (do (js-emit! "op" "~" start) (advance! 1)))
(else (advance! 1)))))
(define
scan!
(fn
()
(do
(skip-ws!)
(when
(< pos src-len)
(let
((ch (cur)) (start pos))
(cond
((or (= ch "\"") (= ch "'"))
(do (js-emit! "string" (read-string ch) start) (scan!)))
((= ch "`")
(do (js-emit! "template" (read-template) start) (scan!)))
((js-digit? ch)
(do
(js-emit! "number" (read-number start) start)
(scan!)))
((and (= ch ".") (< (+ pos 1) src-len) (js-digit? (js-peek 1)))
(do
(js-emit! "number" (read-dot-number start) start)
(scan!)))
((js-ident-start? ch)
(do
(let
((word (read-ident start)))
(js-emit!
(if (js-keyword? word) "keyword" "ident")
word
start))
(scan!)))
((and (= ch "/") (js-regex-context?))
(let
((rx (read-regex)))
(js-emit! "regex" rx start)
(scan!)))
((try-op-4! start) (scan!))
((try-op-3! start) (scan!))
((try-op-2! start) (scan!))
(else (do (emit-one-op! ch start) (scan!)))))))))
(scan!)
(js-emit! "eof" nil pos)
tokens)))