Files
rose-ash/lib/js/lexer.sx
giles 9e568ad886 js-on-sx: baseline commit (278/280 unit, 148/148 slice, runner stub)
Initial commit of the lib/js/ tree and plans/ directory. A previous
session left template-string work in progress — 278/280 unit tests pass
(2 failing: tpl part-count off-by-one, escaped-backtick ident lookup).
test262-runner.py and scoreboard are placeholders (0/8 with 7 timeouts);
fixing the runner is the next queue item.
2026-04-23 19:42:16 +00:00

520 lines
19 KiB
Plaintext

;; lib/js/lexer.sx — JavaScript source → token stream
;;
;; Tokens: {:type T :value V :pos P}
;; Types:
;; "number" — numeric literals (decoded into value as number)
;; "string" — string literals (decoded, escape sequences processed)
;; "template"— template literal body (no interpolation split yet — deferred)
;; "ident" — identifier (not a reserved word)
;; "keyword" — reserved word
;; "punct" — ( ) [ ] { } , ; : . ...
;; "op" — all operator tokens (incl. = == === !== < > etc.)
;; "eof" — end of input
;;
;; NOTE: `cond` clauses take exactly ONE body expression — multi-body
;; clauses must wrap their body in `(do ...)`.
;; ── Token constructor ─────────────────────────────────────────────
(define js-make-token (fn (type value pos) {:pos pos :value value :type type}))
;; ── Character predicates ──────────────────────────────────────────
(define js-digit? (fn (c) (and (>= c "0") (<= c "9"))))
(define
js-hex-digit?
(fn
(c)
(or
(js-digit? c)
(and (>= c "a") (<= c "f"))
(and (>= c "A") (<= c "F")))))
(define
js-letter?
(fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))
(define js-ident-start? (fn (c) (or (js-letter? c) (= c "_") (= c "$"))))
(define js-ident-char? (fn (c) (or (js-ident-start? c) (js-digit? c))))
(define js-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; ── Reserved words ────────────────────────────────────────────────
(define
js-keywords
(list
"break"
"case"
"catch"
"class"
"const"
"continue"
"debugger"
"default"
"delete"
"do"
"else"
"export"
"extends"
"false"
"finally"
"for"
"function"
"if"
"import"
"in"
"instanceof"
"new"
"null"
"return"
"super"
"switch"
"this"
"throw"
"true"
"try"
"typeof"
"undefined"
"var"
"void"
"while"
"with"
"yield"
"let"
"static"
"async"
"await"
"of"))
(define js-keyword? (fn (word) (contains? js-keywords word)))
;; ── Main tokenizer ────────────────────────────────────────────────
(define
js-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
js-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define cur (fn () (js-peek 0)))
(define advance! (fn (n) (set! pos (+ pos n))))
(define
at?
(fn
(s)
(let
((sl (len s)))
(and (<= (+ pos sl) src-len) (= (slice src pos (+ pos sl)) s)))))
(define
js-emit!
(fn
(type value start)
(append! tokens (js-make-token type value start))))
(define
skip-line-comment!
(fn
()
(when
(and (< pos src-len) (not (= (cur) "\n")))
(do (advance! 1) (skip-line-comment!)))))
(define
skip-block-comment!
(fn
()
(cond
((>= pos src-len) nil)
((and (= (cur) "*") (< (+ pos 1) src-len) (= (js-peek 1) "/"))
(advance! 2))
(else (do (advance! 1) (skip-block-comment!))))))
(define
skip-ws!
(fn
()
(cond
((>= pos src-len) nil)
((js-ws? (cur)) (do (advance! 1) (skip-ws!)))
((and (= (cur) "/") (< (+ pos 1) src-len) (= (js-peek 1) "/"))
(do (advance! 2) (skip-line-comment!) (skip-ws!)))
((and (= (cur) "/") (< (+ pos 1) src-len) (= (js-peek 1) "*"))
(do (advance! 2) (skip-block-comment!) (skip-ws!)))
(else nil))))
(define
read-ident
(fn
(start)
(do
(when
(and (< pos src-len) (js-ident-char? (cur)))
(do (advance! 1) (read-ident start)))
(slice src start pos))))
(define
read-decimal-digits!
(fn
()
(when
(and (< pos src-len) (js-digit? (cur)))
(do (advance! 1) (read-decimal-digits!)))))
(define
read-hex-digits!
(fn
()
(when
(and (< pos src-len) (js-hex-digit? (cur)))
(do (advance! 1) (read-hex-digits!)))))
(define
read-exp-part!
(fn
()
(when
(and (< pos src-len) (or (= (cur) "e") (= (cur) "E")))
(let
((p1 (js-peek 1)))
(when
(or
(and (not (= p1 nil)) (js-digit? p1))
(and
(or (= p1 "+") (= p1 "-"))
(< (+ pos 2) src-len)
(js-digit? (js-peek 2))))
(do
(advance! 1)
(when
(and
(< pos src-len)
(or (= (cur) "+") (= (cur) "-")))
(advance! 1))
(read-decimal-digits!)))))))
(define
read-number
(fn
(start)
(cond
((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (js-peek 1) "x") (= (js-peek 1) "X")))
(do
(advance! 2)
(read-hex-digits!)
(let
((raw (slice src (+ start 2) pos)))
(parse-number (str "0x" raw)))))
(else
(do
(read-decimal-digits!)
(when
(and
(< pos src-len)
(= (cur) ".")
(< (+ pos 1) src-len)
(js-digit? (js-peek 1)))
(do (advance! 1) (read-decimal-digits!)))
(read-exp-part!)
(parse-number (slice src start pos)))))))
(define
read-dot-number
(fn
(start)
(do
(advance! 1)
(read-decimal-digits!)
(read-exp-part!)
(parse-number (slice src start pos)))))
(define
read-string
(fn
(quote-char)
(let
((chars (list)))
(advance! 1)
(define
loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(do
(advance! 1)
(when
(< pos src-len)
(let
((ch (cur)))
(do
(cond
((= ch "n") (append! chars "\n"))
((= ch "t") (append! chars "\t"))
((= ch "r") (append! chars "\r"))
((= ch "\\") (append! chars "\\"))
((= ch "'") (append! chars "'"))
((= ch "\"") (append! chars "\""))
((= ch "`") (append! chars "`"))
((= ch "0") (append! chars "\\0"))
((= ch "b") (append! chars "\\b"))
((= ch "f") (append! chars "\\f"))
((= ch "v") (append! chars "\\v"))
(else (append! chars ch)))
(advance! 1))))
(loop)))
((= (cur) quote-char) (advance! 1))
(else (do (append! chars (cur)) (advance! 1) (loop))))))
(loop)
(join "" chars))))
(define
read-template
(fn
()
(let
((parts (list)) (chars (list)))
(advance! 1)
(define
flush-chars!
(fn
()
(when
(> (len chars) 0)
(do
(append! parts (list "str" (join "" chars)))
(set! chars (list))))))
(define
read-expr-source!
(fn
()
(let
((buf (list)) (depth 1))
(define
expr-loop
(fn
()
(cond
((>= pos src-len) nil)
((and (= (cur) "}") (= depth 1)) (advance! 1))
((= (cur) "}")
(do
(append! buf (cur))
(set! depth (- depth 1))
(advance! 1)
(expr-loop)))
((= (cur) "{")
(do
(append! buf (cur))
(set! depth (+ depth 1))
(advance! 1)
(expr-loop)))
((or (= (cur) "\"") (= (cur) "'"))
(let
((q (cur)))
(do
(append! buf q)
(advance! 1)
(define
sloop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(do
(append! buf (cur))
(advance! 1)
(when
(< pos src-len)
(do
(append! buf (cur))
(advance! 1)))
(sloop)))
((= (cur) q)
(do (append! buf (cur)) (advance! 1)))
(else
(do
(append! buf (cur))
(advance! 1)
(sloop))))))
(sloop)
(expr-loop))))
(else
(do (append! buf (cur)) (advance! 1) (expr-loop))))))
(expr-loop)
(join "" buf))))
(define
loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "`") (advance! 1))
((and (= (cur) "$") (< (+ pos 1) src-len) (= (js-peek 1) "{"))
(do
(flush-chars!)
(advance! 2)
(let
((src (read-expr-source!)))
(append! parts (list "expr" src)))
(loop)))
((= (cur) "\\")
(do
(advance! 1)
(when
(< pos src-len)
(let
((ch (cur)))
(do
(cond
((= ch "n") (append! chars "\n"))
((= ch "t") (append! chars "\t"))
((= ch "r") (append! chars "\r"))
((= ch "\\") (append! chars "\\"))
((= ch "'") (append! chars "'"))
((= ch "\"") (append! chars "\""))
((= ch "`") (append! chars "`"))
((= ch "$") (append! chars "$"))
((= ch "0") (append! chars "0"))
((= ch "b") (append! chars "b"))
((= ch "f") (append! chars "f"))
((= ch "v") (append! chars "v"))
(else (append! chars ch)))
(advance! 1))))
(loop)))
(else (do (append! chars (cur)) (advance! 1) (loop))))))
(loop)
(flush-chars!)
(if
(= (len parts) 0)
""
(if
(and (= (len parts) 1) (= (nth (nth parts 0) 0) "str"))
(nth (nth parts 0) 1)
parts)))))
(define
try-op-4!
(fn
(start)
(cond
((at? ">>>=")
(do (js-emit! "op" ">>>=" start) (advance! 4) true))
(else false))))
(define
try-op-3!
(fn
(start)
(cond
((at? "===")
(do (js-emit! "op" "===" start) (advance! 3) true))
((at? "!==")
(do (js-emit! "op" "!==" start) (advance! 3) true))
((at? "**=")
(do (js-emit! "op" "**=" start) (advance! 3) true))
((at? "<<=")
(do (js-emit! "op" "<<=" start) (advance! 3) true))
((at? ">>=")
(do (js-emit! "op" ">>=" start) (advance! 3) true))
((at? ">>>")
(do (js-emit! "op" ">>>" start) (advance! 3) true))
((at? "&&=")
(do (js-emit! "op" "&&=" start) (advance! 3) true))
((at? "||=")
(do (js-emit! "op" "||=" start) (advance! 3) true))
((at? "??=")
(do (js-emit! "op" "??=" start) (advance! 3) true))
((at? "...")
(do (js-emit! "punct" "..." start) (advance! 3) true))
(else false))))
(define
try-op-2!
(fn
(start)
(cond
((at? "==") (do (js-emit! "op" "==" start) (advance! 2) true))
((at? "!=") (do (js-emit! "op" "!=" start) (advance! 2) true))
((at? "<=") (do (js-emit! "op" "<=" start) (advance! 2) true))
((at? ">=") (do (js-emit! "op" ">=" start) (advance! 2) true))
((at? "&&") (do (js-emit! "op" "&&" start) (advance! 2) true))
((at? "||") (do (js-emit! "op" "||" start) (advance! 2) true))
((at? "??") (do (js-emit! "op" "??" start) (advance! 2) true))
((at? "=>") (do (js-emit! "op" "=>" start) (advance! 2) true))
((at? "**") (do (js-emit! "op" "**" start) (advance! 2) true))
((at? "<<") (do (js-emit! "op" "<<" start) (advance! 2) true))
((at? ">>") (do (js-emit! "op" ">>" start) (advance! 2) true))
((at? "++") (do (js-emit! "op" "++" start) (advance! 2) true))
((at? "--") (do (js-emit! "op" "--" start) (advance! 2) true))
((at? "+=") (do (js-emit! "op" "+=" start) (advance! 2) true))
((at? "-=") (do (js-emit! "op" "-=" start) (advance! 2) true))
((at? "*=") (do (js-emit! "op" "*=" start) (advance! 2) true))
((at? "/=") (do (js-emit! "op" "/=" start) (advance! 2) true))
((at? "%=") (do (js-emit! "op" "%=" start) (advance! 2) true))
((at? "&=") (do (js-emit! "op" "&=" start) (advance! 2) true))
((at? "|=") (do (js-emit! "op" "|=" start) (advance! 2) true))
((at? "^=") (do (js-emit! "op" "^=" start) (advance! 2) true))
((at? "?.") (do (js-emit! "op" "?." start) (advance! 2) true))
(else false))))
(define
emit-one-op!
(fn
(ch start)
(cond
((= ch "(") (do (js-emit! "punct" "(" start) (advance! 1)))
((= ch ")") (do (js-emit! "punct" ")" start) (advance! 1)))
((= ch "[") (do (js-emit! "punct" "[" start) (advance! 1)))
((= ch "]") (do (js-emit! "punct" "]" start) (advance! 1)))
((= ch "{") (do (js-emit! "punct" "{" start) (advance! 1)))
((= ch "}") (do (js-emit! "punct" "}" start) (advance! 1)))
((= ch ",") (do (js-emit! "punct" "," start) (advance! 1)))
((= ch ";") (do (js-emit! "punct" ";" start) (advance! 1)))
((= ch ":") (do (js-emit! "punct" ":" start) (advance! 1)))
((= ch ".") (do (js-emit! "punct" "." start) (advance! 1)))
((= ch "?") (do (js-emit! "op" "?" start) (advance! 1)))
((= ch "+") (do (js-emit! "op" "+" start) (advance! 1)))
((= ch "-") (do (js-emit! "op" "-" start) (advance! 1)))
((= ch "*") (do (js-emit! "op" "*" start) (advance! 1)))
((= ch "/") (do (js-emit! "op" "/" start) (advance! 1)))
((= ch "%") (do (js-emit! "op" "%" start) (advance! 1)))
((= ch "=") (do (js-emit! "op" "=" start) (advance! 1)))
((= ch "<") (do (js-emit! "op" "<" start) (advance! 1)))
((= ch ">") (do (js-emit! "op" ">" start) (advance! 1)))
((= ch "!") (do (js-emit! "op" "!" start) (advance! 1)))
((= ch "&") (do (js-emit! "op" "&" start) (advance! 1)))
((= ch "|") (do (js-emit! "op" "|" start) (advance! 1)))
((= ch "^") (do (js-emit! "op" "^" start) (advance! 1)))
((= ch "~") (do (js-emit! "op" "~" start) (advance! 1)))
(else (advance! 1)))))
(define
scan!
(fn
()
(do
(skip-ws!)
(when
(< pos src-len)
(let
((ch (cur)) (start pos))
(cond
((or (= ch "\"") (= ch "'"))
(do (js-emit! "string" (read-string ch) start) (scan!)))
((= ch "`")
(do (js-emit! "template" (read-template) start) (scan!)))
((js-digit? ch)
(do
(js-emit! "number" (read-number start) start)
(scan!)))
((and (= ch ".") (< (+ pos 1) src-len) (js-digit? (js-peek 1)))
(do
(js-emit! "number" (read-dot-number start) start)
(scan!)))
((js-ident-start? ch)
(do
(let
((word (read-ident start)))
(js-emit!
(if (js-keyword? word) "keyword" "ident")
word
start))
(scan!)))
((try-op-4! start) (scan!))
((try-op-3! start) (scan!))
((try-op-2! start) (scan!))
(else (do (emit-one-op! ch start) (scan!)))))))))
(scan!)
(js-emit! "eof" nil pos)
tokens)))