Files
rose-ash/lib/js/lexer.sx
giles ae86579ae8 js-on-sx: ASI — :nl token flag + return restricted production (525/526 unit, 148/148 slice)
Lexer: adds :nl (newline-before) boolean to every token. scan! resets the flag
before each skip-ws! call; skip-ws! sets it true when it consumes \n or \r.
Parser: jp-token-nl? reads the flag; jp-parse-return-stmt stops before the
expression when a newline precedes it (return\n42 → return undefined). Four
new tests cover the restricted production and the raw flag.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-25 11:53:33 +00:00

613 lines
22 KiB
Plaintext

;; lib/js/lexer.sx — JavaScript source → token stream
;;
;; Tokens: {:type T :value V :pos P}
;; Types:
;; "number" — numeric literals (decoded into value as number)
;; "string" — string literals (decoded, escape sequences processed)
;; "template"— template literal body (no interpolation split yet — deferred)
;; "ident" — identifier (not a reserved word)
;; "keyword" — reserved word
;; "punct" — ( ) [ ] { } , ; : . ...
;; "op" — all operator tokens (incl. = == === !== < > etc.)
;; "eof" — end of input
;;
;; NOTE: `cond` clauses take exactly ONE body expression — multi-body
;; clauses must wrap their body in `(do ...)`.
;; ── Token constructor ─────────────────────────────────────────────
(define js-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
(define js-digit? (fn (c) (and (>= c "0") (<= c "9"))))
(define
js-hex-digit?
(fn
(c)
(or
(js-digit? c)
(and (>= c "a") (<= c "f"))
(and (>= c "A") (<= c "F")))))
(define
js-letter?
(fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
(define js-ident-start? (fn (c) (or (js-letter? c) (= c "_") (= c "$"))))
(define js-ident-char? (fn (c) (or (js-ident-start? c) (js-digit? c))))
(define js-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; ── Reserved words ────────────────────────────────────────────────
(define
js-keywords
(list
"break"
"case"
"catch"
"class"
"const"
"continue"
"debugger"
"default"
"delete"
"do"
"else"
"export"
"extends"
"false"
"finally"
"for"
"function"
"if"
"import"
"in"
"instanceof"
"new"
"null"
"return"
"super"
"switch"
"this"
"throw"
"true"
"try"
"typeof"
"undefined"
"var"
"void"
"while"
"with"
"yield"
"let"
"static"
"async"
"await"
"of"))
(define js-keyword? (fn (word) (contains? js-keywords word)))
;; ── Main tokenizer ────────────────────────────────────────────────
(define
js-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)) (nl-before false))
(define
js-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define cur (fn () (js-peek 0)))
(define advance! (fn (n) (set! pos (+ pos n))))
(define
at?
(fn
(s)
(let
((sl (len s)))
(and (<= (+ pos sl) src-len) (= (slice src pos (+ pos sl)) s)))))
(define js-emit! (fn (type value start) (append! tokens {:pos start :value value :type type :nl nl-before})))
(define
skip-line-comment!
(fn
()
(when
(and (< pos src-len) (not (= (cur) "\n")))
(do (advance! 1) (skip-line-comment!)))))
(define
skip-block-comment!
(fn
()
(cond
((>= pos src-len) nil)
((and (= (cur) "*") (< (+ pos 1) src-len) (= (js-peek 1) "/"))
(advance! 2))
(else (do (advance! 1) (skip-block-comment!))))))
(define
skip-ws!
(fn
()
(cond
((>= pos src-len) nil)
((js-ws? (cur))
(do
(when
(or (= (cur) "\n") (= (cur) "\r"))
(set! nl-before true))
(advance! 1)
(skip-ws!)))
((and (= (cur) "/") (< (+ pos 1) src-len) (= (js-peek 1) "/"))
(do (advance! 2) (skip-line-comment!) (skip-ws!)))
((and (= (cur) "/") (< (+ pos 1) src-len) (= (js-peek 1) "*"))
(do (advance! 2) (skip-block-comment!) (skip-ws!)))
(else nil))))
(define
read-ident
(fn
(start)
(do
(when
(and (< pos src-len) (js-ident-char? (cur)))
(do (advance! 1) (read-ident start)))
(slice src start pos))))
(define
read-decimal-digits!
(fn
()
(when
(and (< pos src-len) (js-digit? (cur)))
(do (advance! 1) (read-decimal-digits!)))))
(define
read-hex-digits!
(fn
()
(when
(and (< pos src-len) (js-hex-digit? (cur)))
(do (advance! 1) (read-hex-digits!)))))
(define
read-exp-part!
(fn
()
(when
(and (< pos src-len) (or (= (cur) "e") (= (cur) "E")))
(let
((p1 (js-peek 1)))
(when
(or
(and (not (= p1 nil)) (js-digit? p1))
(and
(or (= p1 "+") (= p1 "-"))
(< (+ pos 2) src-len)
(js-digit? (js-peek 2))))
(do
(advance! 1)
(when
(and
(< pos src-len)
(or (= (cur) "+") (= (cur) "-")))
(advance! 1))
(read-decimal-digits!)))))))
(define
read-number
(fn
(start)
(cond
((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (js-peek 1) "x") (= (js-peek 1) "X")))
(do
(advance! 2)
(read-hex-digits!)
(let
((raw (slice src (+ start 2) pos)))
(parse-number (str "0x" raw)))))
(else
(do
(read-decimal-digits!)
(when
(and
(< pos src-len)
(= (cur) ".")
(< (+ pos 1) src-len)
(js-digit? (js-peek 1)))
(do (advance! 1) (read-decimal-digits!)))
(read-exp-part!)
(parse-number (slice src start pos)))))))
(define
read-dot-number
(fn
(start)
(do
(advance! 1)
(read-decimal-digits!)
(read-exp-part!)
(parse-number (slice src start pos)))))
(define
read-string
(fn
(quote-char)
(let
((chars (list)))
(advance! 1)
(define
loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(do
(advance! 1)
(when
(< pos src-len)
(let
((ch (cur)))
(do
(cond
((= ch "n") (append! chars "\n"))
((= ch "t") (append! chars "\t"))
((= ch "r") (append! chars "\r"))
((= ch "\\") (append! chars "\\"))
((= ch "'") (append! chars "'"))
((= ch "\"") (append! chars "\""))
((= ch "`") (append! chars "`"))
((= ch "0") (append! chars "\\0"))
((= ch "b") (append! chars "\\b"))
((= ch "f") (append! chars "\\f"))
((= ch "v") (append! chars "\\v"))
(else (append! chars ch)))
(advance! 1))))
(loop)))
((= (cur) quote-char) (advance! 1))
(else (do (append! chars (cur)) (advance! 1) (loop))))))
(loop)
(join "" chars))))
(define
read-template
(fn
()
(let
((parts (list)) (chars (list)))
(advance! 1)
(define
flush-chars!
(fn
()
(when
(> (len chars) 0)
(do
(append! parts (list "str" (join "" chars)))
(set! chars (list))))))
(define
read-expr-source!
(fn
()
(let
((buf (list)) (depth 1))
(define
expr-loop
(fn
()
(cond
((>= pos src-len) nil)
((and (= (cur) "}") (= depth 1)) (advance! 1))
((= (cur) "}")
(do
(append! buf (cur))
(set! depth (- depth 1))
(advance! 1)
(expr-loop)))
((= (cur) "{")
(do
(append! buf (cur))
(set! depth (+ depth 1))
(advance! 1)
(expr-loop)))
((or (= (cur) "\"") (= (cur) "'"))
(let
((q (cur)))
(do
(append! buf q)
(advance! 1)
(define
sloop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(do
(append! buf (cur))
(advance! 1)
(when
(< pos src-len)
(do
(append! buf (cur))
(advance! 1)))
(sloop)))
((= (cur) q)
(do (append! buf (cur)) (advance! 1)))
(else
(do
(append! buf (cur))
(advance! 1)
(sloop))))))
(sloop)
(expr-loop))))
(else
(do (append! buf (cur)) (advance! 1) (expr-loop))))))
(expr-loop)
(join "" buf))))
(define
loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "`") (advance! 1))
((and (= (cur) "$") (< (+ pos 1) src-len) (= (js-peek 1) "{"))
(do
(flush-chars!)
(advance! 2)
(let
((src (read-expr-source!)))
(append! parts (list "expr" src)))
(loop)))
((= (cur) "\\")
(do
(advance! 1)
(when
(< pos src-len)
(let
((ch (cur)))
(do
(cond
((= ch "n") (append! chars "\n"))
((= ch "t") (append! chars "\t"))
((= ch "r") (append! chars "\r"))
((= ch "\\") (append! chars "\\"))
((= ch "'") (append! chars "'"))
((= ch "\"") (append! chars "\""))
((= ch "`") (append! chars "`"))
((= ch "$") (append! chars "$"))
((= ch "0") (append! chars "0"))
((= ch "b") (append! chars "b"))
((= ch "f") (append! chars "f"))
((= ch "v") (append! chars "v"))
(else (append! chars ch)))
(advance! 1))))
(loop)))
(else (do (append! chars (cur)) (advance! 1) (loop))))))
(loop)
(flush-chars!)
(if
(= (len parts) 0)
""
(if
(and (= (len parts) 1) (= (nth (nth parts 0) 0) "str"))
(nth (nth parts 0) 1)
parts)))))
(define
js-regex-context?
(fn
()
(if
(= (len tokens) 0)
true
(let
((tk (nth tokens (- (len tokens) 1))))
(let
((ty (dict-get tk "type")) (vv (dict-get tk "value")))
(cond
((= ty "punct")
(and (not (= vv ")")) (not (= vv "]"))))
((= ty "op") true)
((= ty "keyword")
(contains?
(list
"return"
"typeof"
"in"
"of"
"throw"
"new"
"delete"
"instanceof"
"void"
"yield"
"await"
"case"
"do"
"else")
vv))
(else false)))))))
(define
read-regex
(fn
()
(let
((buf (list)) (in-class false))
(advance! 1)
(define
body-loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(begin
(append! buf (cur))
(advance! 1)
(when
(< pos src-len)
(begin (append! buf (cur)) (advance! 1)))
(body-loop)))
((= (cur) "[")
(begin
(set! in-class true)
(append! buf (cur))
(advance! 1)
(body-loop)))
((= (cur) "]")
(begin
(set! in-class false)
(append! buf (cur))
(advance! 1)
(body-loop)))
((and (= (cur) "/") (not in-class)) (advance! 1))
(else
(begin (append! buf (cur)) (advance! 1) (body-loop))))))
(body-loop)
(let
((flags-buf (list)))
(define
flags-loop
(fn
()
(when
(and (< pos src-len) (js-ident-char? (cur)))
(begin
(append! flags-buf (cur))
(advance! 1)
(flags-loop)))))
(flags-loop)
{:pattern (join "" buf) :flags (join "" flags-buf)}))))
(define
try-op-4!
(fn
(start)
(cond
((at? ">>>=")
(do (js-emit! "op" ">>>=" start) (advance! 4) true))
(else false))))
(define
try-op-3!
(fn
(start)
(cond
((at? "===")
(do (js-emit! "op" "===" start) (advance! 3) true))
((at? "!==")
(do (js-emit! "op" "!==" start) (advance! 3) true))
((at? "**=")
(do (js-emit! "op" "**=" start) (advance! 3) true))
((at? "<<=")
(do (js-emit! "op" "<<=" start) (advance! 3) true))
((at? ">>=")
(do (js-emit! "op" ">>=" start) (advance! 3) true))
((at? ">>>")
(do (js-emit! "op" ">>>" start) (advance! 3) true))
((at? "&&=")
(do (js-emit! "op" "&&=" start) (advance! 3) true))
((at? "||=")
(do (js-emit! "op" "||=" start) (advance! 3) true))
((at? "??=")
(do (js-emit! "op" "??=" start) (advance! 3) true))
((at? "...")
(do (js-emit! "punct" "..." start) (advance! 3) true))
(else false))))
(define
try-op-2!
(fn
(start)
(cond
((at? "==") (do (js-emit! "op" "==" start) (advance! 2) true))
((at? "!=") (do (js-emit! "op" "!=" start) (advance! 2) true))
((at? "<=") (do (js-emit! "op" "<=" start) (advance! 2) true))
((at? ">=") (do (js-emit! "op" ">=" start) (advance! 2) true))
((at? "&&") (do (js-emit! "op" "&&" start) (advance! 2) true))
((at? "||") (do (js-emit! "op" "||" start) (advance! 2) true))
((at? "??") (do (js-emit! "op" "??" start) (advance! 2) true))
((at? "=>") (do (js-emit! "op" "=>" start) (advance! 2) true))
((at? "**") (do (js-emit! "op" "**" start) (advance! 2) true))
((at? "<<") (do (js-emit! "op" "<<" start) (advance! 2) true))
((at? ">>") (do (js-emit! "op" ">>" start) (advance! 2) true))
((at? "++") (do (js-emit! "op" "++" start) (advance! 2) true))
((at? "--") (do (js-emit! "op" "--" start) (advance! 2) true))
((at? "+=") (do (js-emit! "op" "+=" start) (advance! 2) true))
((at? "-=") (do (js-emit! "op" "-=" start) (advance! 2) true))
((at? "*=") (do (js-emit! "op" "*=" start) (advance! 2) true))
((at? "/=") (do (js-emit! "op" "/=" start) (advance! 2) true))
((at? "%=") (do (js-emit! "op" "%=" start) (advance! 2) true))
((at? "&=") (do (js-emit! "op" "&=" start) (advance! 2) true))
((at? "|=") (do (js-emit! "op" "|=" start) (advance! 2) true))
((at? "^=") (do (js-emit! "op" "^=" start) (advance! 2) true))
((at? "?.") (do (js-emit! "op" "?." start) (advance! 2) true))
(else false))))
(define
emit-one-op!
(fn
(ch start)
(cond
((= ch "(") (do (js-emit! "punct" "(" start) (advance! 1)))
((= ch ")") (do (js-emit! "punct" ")" start) (advance! 1)))
((= ch "[") (do (js-emit! "punct" "[" start) (advance! 1)))
((= ch "]") (do (js-emit! "punct" "]" start) (advance! 1)))
((= ch "{") (do (js-emit! "punct" "{" start) (advance! 1)))
((= ch "}") (do (js-emit! "punct" "}" start) (advance! 1)))
((= ch ",") (do (js-emit! "punct" "," start) (advance! 1)))
((= ch ";") (do (js-emit! "punct" ";" start) (advance! 1)))
((= ch ":") (do (js-emit! "punct" ":" start) (advance! 1)))
((= ch ".") (do (js-emit! "punct" "." start) (advance! 1)))
((= ch "?") (do (js-emit! "op" "?" start) (advance! 1)))
((= ch "+") (do (js-emit! "op" "+" start) (advance! 1)))
((= ch "-") (do (js-emit! "op" "-" start) (advance! 1)))
((= ch "*") (do (js-emit! "op" "*" start) (advance! 1)))
((= ch "/") (do (js-emit! "op" "/" start) (advance! 1)))
((= ch "%") (do (js-emit! "op" "%" start) (advance! 1)))
((= ch "=") (do (js-emit! "op" "=" start) (advance! 1)))
((= ch "<") (do (js-emit! "op" "<" start) (advance! 1)))
((= ch ">") (do (js-emit! "op" ">" start) (advance! 1)))
((= ch "!") (do (js-emit! "op" "!" start) (advance! 1)))
((= ch "&") (do (js-emit! "op" "&" start) (advance! 1)))
((= ch "|") (do (js-emit! "op" "|" start) (advance! 1)))
((= ch "^") (do (js-emit! "op" "^" start) (advance! 1)))
((= ch "~") (do (js-emit! "op" "~" start) (advance! 1)))
(else (advance! 1)))))
(define
scan!
(fn
()
(do
(set! nl-before false)
(skip-ws!)
(when
(< pos src-len)
(let
((ch (cur)) (start pos))
(cond
((or (= ch "\"") (= ch "'"))
(do (js-emit! "string" (read-string ch) start) (scan!)))
((= ch "`")
(do (js-emit! "template" (read-template) start) (scan!)))
((js-digit? ch)
(do
(js-emit! "number" (read-number start) start)
(scan!)))
((and (= ch ".") (< (+ pos 1) src-len) (js-digit? (js-peek 1)))
(do
(js-emit! "number" (read-dot-number start) start)
(scan!)))
((js-ident-start? ch)
(do
(let
((word (read-ident start)))
(js-emit!
(if (js-keyword? word) "keyword" "ident")
word
start))
(scan!)))
((and (= ch "/") (js-regex-context?))
(let
((rx (read-regex)))
(js-emit! "regex" rx start)
(scan!)))
((try-op-4! start) (scan!))
((try-op-3! start) (scan!))
((try-op-2! start) (scan!))
(else (do (emit-one-op! ch start) (scan!)))))))))
(scan!)
(js-emit! "eof" nil pos)
tokens)))