;; lib/js/lexer.sx — JavaScript source → token stream ;; ;; Tokens: {:type T :value V :pos P} ;; Types: ;; "number" — numeric literals (decoded into value as number) ;; "string" — string literals (decoded, escape sequences processed) ;; "template"— template literal body (no interpolation split yet — deferred) ;; "ident" — identifier (not a reserved word) ;; "keyword" — reserved word ;; "punct" — ( ) [ ] { } , ; : . ... ;; "op" — all operator tokens (incl. = == === !== < > etc.) ;; "eof" — end of input ;; ;; NOTE: `cond` clauses take exactly ONE body expression — multi-body ;; clauses must wrap their body in `(do ...)`. ;; ── Token constructor ───────────────────────────────────────────── (define js-make-token (fn (type value pos) {:pos pos :value value :type type})) ;; ── Character predicates ────────────────────────────────────────── (define js-digit? (fn (c) (and (>= c "0") (<= c "9")))) (define js-hex-digit? (fn (c) (or (js-digit? c) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F"))))) (define js-letter? (fn (c) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))) (define js-ident-start? (fn (c) (or (js-letter? c) (= c "_") (= c "$")))) (define js-ident-char? (fn (c) (or (js-ident-start? c) (js-digit? c)))) (define js-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) ;; ── Reserved words ──────────────────────────────────────────────── (define js-keywords (list "break" "case" "catch" "class" "const" "continue" "debugger" "default" "delete" "do" "else" "export" "extends" "false" "finally" "for" "function" "if" "import" "in" "instanceof" "new" "null" "return" "super" "switch" "this" "throw" "true" "try" "typeof" "undefined" "var" "void" "while" "with" "yield" "let" "static" "async" "await" "of")) (define js-keyword? (fn (word) (contains? js-keywords word))) ;; ── Main tokenizer ──────────────────────────────────────────────── (define js-tokenize (fn (src) (let ((tokens (list)) (pos 0) (src-len (len src))) (define js-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define cur (fn () (js-peek 0))) (define advance! (fn (n) (set! pos (+ pos n)))) (define at? (fn (s) (let ((sl (len s))) (and (<= (+ pos sl) src-len) (= (slice src pos (+ pos sl)) s))))) (define js-emit! (fn (type value start) (append! tokens (js-make-token type value start)))) (define skip-line-comment! (fn () (when (and (< pos src-len) (not (= (cur) "\n"))) (do (advance! 1) (skip-line-comment!))))) (define skip-block-comment! (fn () (cond ((>= pos src-len) nil) ((and (= (cur) "*") (< (+ pos 1) src-len) (= (js-peek 1) "/")) (advance! 2)) (else (do (advance! 1) (skip-block-comment!)))))) (define skip-ws! (fn () (cond ((>= pos src-len) nil) ((js-ws? (cur)) (do (advance! 1) (skip-ws!))) ((and (= (cur) "/") (< (+ pos 1) src-len) (= (js-peek 1) "/")) (do (advance! 2) (skip-line-comment!) (skip-ws!))) ((and (= (cur) "/") (< (+ pos 1) src-len) (= (js-peek 1) "*")) (do (advance! 2) (skip-block-comment!) (skip-ws!))) (else nil)))) (define read-ident (fn (start) (do (when (and (< pos src-len) (js-ident-char? (cur))) (do (advance! 1) (read-ident start))) (slice src start pos)))) (define read-decimal-digits! (fn () (when (and (< pos src-len) (js-digit? (cur))) (do (advance! 1) (read-decimal-digits!))))) (define read-hex-digits! (fn () (when (and (< pos src-len) (js-hex-digit? (cur))) (do (advance! 1) (read-hex-digits!))))) (define read-exp-part! (fn () (when (and (< pos src-len) (or (= (cur) "e") (= (cur) "E"))) (let ((p1 (js-peek 1))) (when (or (and (not (= p1 nil)) (js-digit? p1)) (and (or (= p1 "+") (= p1 "-")) (< (+ pos 2) src-len) (js-digit? (js-peek 2)))) (do (advance! 1) (when (and (< pos src-len) (or (= (cur) "+") (= (cur) "-"))) (advance! 1)) (read-decimal-digits!))))))) (define read-number (fn (start) (cond ((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (js-peek 1) "x") (= (js-peek 1) "X"))) (do (advance! 2) (read-hex-digits!) (let ((raw (slice src (+ start 2) pos))) (parse-number (str "0x" raw))))) (else (do (read-decimal-digits!) (when (and (< pos src-len) (= (cur) ".") (< (+ pos 1) src-len) (js-digit? (js-peek 1))) (do (advance! 1) (read-decimal-digits!))) (read-exp-part!) (parse-number (slice src start pos))))))) (define read-dot-number (fn (start) (do (advance! 1) (read-decimal-digits!) (read-exp-part!) (parse-number (slice src start pos))))) (define read-string (fn (quote-char) (let ((chars (list))) (advance! 1) (define loop (fn () (cond ((>= pos src-len) nil) ((= (cur) "\\") (do (advance! 1) (when (< pos src-len) (let ((ch (cur))) (do (cond ((= ch "n") (append! chars "\n")) ((= ch "t") (append! chars "\t")) ((= ch "r") (append! chars "\r")) ((= ch "\\") (append! chars "\\")) ((= ch "'") (append! chars "'")) ((= ch "\"") (append! chars "\"")) ((= ch "`") (append! chars "`")) ((= ch "0") (append! chars "\\0")) ((= ch "b") (append! chars "\\b")) ((= ch "f") (append! chars "\\f")) ((= ch "v") (append! chars "\\v")) (else (append! chars ch))) (advance! 1)))) (loop))) ((= (cur) quote-char) (advance! 1)) (else (do (append! chars (cur)) (advance! 1) (loop)))))) (loop) (join "" chars)))) (define read-template (fn () (let ((parts (list)) (chars (list))) (advance! 1) (define flush-chars! (fn () (when (> (len chars) 0) (do (append! parts (list "str" (join "" chars))) (set! chars (list)))))) (define read-expr-source! (fn () (let ((buf (list)) (depth 1)) (define expr-loop (fn () (cond ((>= pos src-len) nil) ((and (= (cur) "}") (= depth 1)) (advance! 1)) ((= (cur) "}") (do (append! buf (cur)) (set! depth (- depth 1)) (advance! 1) (expr-loop))) ((= (cur) "{") (do (append! buf (cur)) (set! depth (+ depth 1)) (advance! 1) (expr-loop))) ((or (= (cur) "\"") (= (cur) "'")) (let ((q (cur))) (do (append! buf q) (advance! 1) (define sloop (fn () (cond ((>= pos src-len) nil) ((= (cur) "\\") (do (append! buf (cur)) (advance! 1) (when (< pos src-len) (do (append! buf (cur)) (advance! 1))) (sloop))) ((= (cur) q) (do (append! buf (cur)) (advance! 1))) (else (do (append! buf (cur)) (advance! 1) (sloop)))))) (sloop) (expr-loop)))) (else (do (append! buf (cur)) (advance! 1) (expr-loop)))))) (expr-loop) (join "" buf)))) (define loop (fn () (cond ((>= pos src-len) nil) ((= (cur) "`") (advance! 1)) ((and (= (cur) "$") (< (+ pos 1) src-len) (= (js-peek 1) "{")) (do (flush-chars!) (advance! 2) (let ((src (read-expr-source!))) (append! parts (list "expr" src))) (loop))) ((= (cur) "\\") (do (advance! 1) (when (< pos src-len) (let ((ch (cur))) (do (cond ((= ch "n") (append! chars "\n")) ((= ch "t") (append! chars "\t")) ((= ch "r") (append! chars "\r")) ((= ch "\\") (append! chars "\\")) ((= ch "'") (append! chars "'")) ((= ch "\"") (append! chars "\"")) ((= ch "`") (append! chars "`")) ((= ch "$") (append! chars "$")) ((= ch "0") (append! chars "0")) ((= ch "b") (append! chars "b")) ((= ch "f") (append! chars "f")) ((= ch "v") (append! chars "v")) (else (append! chars ch))) (advance! 1)))) (loop))) (else (do (append! chars (cur)) (advance! 1) (loop)))))) (loop) (flush-chars!) (if (= (len parts) 0) "" (if (and (= (len parts) 1) (= (nth (nth parts 0) 0) "str")) (nth (nth parts 0) 1) parts))))) (define js-regex-context? (fn () (if (= (len tokens) 0) true (let ((tk (nth tokens (- (len tokens) 1)))) (let ((ty (dict-get tk "type")) (vv (dict-get tk "value"))) (cond ((= ty "punct") (and (not (= vv ")")) (not (= vv "]")))) ((= ty "op") true) ((= ty "keyword") (contains? (list "return" "typeof" "in" "of" "throw" "new" "delete" "instanceof" "void" "yield" "await" "case" "do" "else") vv)) (else false))))))) (define read-regex (fn () (let ((buf (list)) (in-class false)) (advance! 1) (define body-loop (fn () (cond ((>= pos src-len) nil) ((= (cur) "\\") (begin (append! buf (cur)) (advance! 1) (when (< pos src-len) (begin (append! buf (cur)) (advance! 1))) (body-loop))) ((= (cur) "[") (begin (set! in-class true) (append! buf (cur)) (advance! 1) (body-loop))) ((= (cur) "]") (begin (set! in-class false) (append! buf (cur)) (advance! 1) (body-loop))) ((and (= (cur) "/") (not in-class)) (advance! 1)) (else (begin (append! buf (cur)) (advance! 1) (body-loop)))))) (body-loop) (let ((flags-buf (list))) (define flags-loop (fn () (when (and (< pos src-len) (js-ident-char? (cur))) (begin (append! flags-buf (cur)) (advance! 1) (flags-loop))))) (flags-loop) {:pattern (join "" buf) :flags (join "" flags-buf)})))) (define try-op-4! (fn (start) (cond ((at? ">>>=") (do (js-emit! "op" ">>>=" start) (advance! 4) true)) (else false)))) (define try-op-3! (fn (start) (cond ((at? "===") (do (js-emit! "op" "===" start) (advance! 3) true)) ((at? "!==") (do (js-emit! "op" "!==" start) (advance! 3) true)) ((at? "**=") (do (js-emit! "op" "**=" start) (advance! 3) true)) ((at? "<<=") (do (js-emit! "op" "<<=" start) (advance! 3) true)) ((at? ">>=") (do (js-emit! "op" ">>=" start) (advance! 3) true)) ((at? ">>>") (do (js-emit! "op" ">>>" start) (advance! 3) true)) ((at? "&&=") (do (js-emit! "op" "&&=" start) (advance! 3) true)) ((at? "||=") (do (js-emit! "op" "||=" start) (advance! 3) true)) ((at? "??=") (do (js-emit! "op" "??=" start) (advance! 3) true)) ((at? "...") (do (js-emit! "punct" "..." start) (advance! 3) true)) (else false)))) (define try-op-2! (fn (start) (cond ((at? "==") (do (js-emit! "op" "==" start) (advance! 2) true)) ((at? "!=") (do (js-emit! "op" "!=" start) (advance! 2) true)) ((at? "<=") (do (js-emit! "op" "<=" start) (advance! 2) true)) ((at? ">=") (do (js-emit! "op" ">=" start) (advance! 2) true)) ((at? "&&") (do (js-emit! "op" "&&" start) (advance! 2) true)) ((at? "||") (do (js-emit! "op" "||" start) (advance! 2) true)) ((at? "??") (do (js-emit! "op" "??" start) (advance! 2) true)) ((at? "=>") (do (js-emit! "op" "=>" start) (advance! 2) true)) ((at? "**") (do (js-emit! "op" "**" start) (advance! 2) true)) ((at? "<<") (do (js-emit! "op" "<<" start) (advance! 2) true)) ((at? ">>") (do (js-emit! "op" ">>" start) (advance! 2) true)) ((at? "++") (do (js-emit! "op" "++" start) (advance! 2) true)) ((at? "--") (do (js-emit! "op" "--" start) (advance! 2) true)) ((at? "+=") (do (js-emit! "op" "+=" start) (advance! 2) true)) ((at? "-=") (do (js-emit! "op" "-=" start) (advance! 2) true)) ((at? "*=") (do (js-emit! "op" "*=" start) (advance! 2) true)) ((at? "/=") (do (js-emit! "op" "/=" start) (advance! 2) true)) ((at? "%=") (do (js-emit! "op" "%=" start) (advance! 2) true)) ((at? "&=") (do (js-emit! "op" "&=" start) (advance! 2) true)) ((at? "|=") (do (js-emit! "op" "|=" start) (advance! 2) true)) ((at? "^=") (do (js-emit! "op" "^=" start) (advance! 2) true)) ((at? "?.") (do (js-emit! "op" "?." start) (advance! 2) true)) (else false)))) (define emit-one-op! (fn (ch start) (cond ((= ch "(") (do (js-emit! "punct" "(" start) (advance! 1))) ((= ch ")") (do (js-emit! "punct" ")" start) (advance! 1))) ((= ch "[") (do (js-emit! "punct" "[" start) (advance! 1))) ((= ch "]") (do (js-emit! "punct" "]" start) (advance! 1))) ((= ch "{") (do (js-emit! "punct" "{" start) (advance! 1))) ((= ch "}") (do (js-emit! "punct" "}" start) (advance! 1))) ((= ch ",") (do (js-emit! "punct" "," start) (advance! 1))) ((= ch ";") (do (js-emit! "punct" ";" start) (advance! 1))) ((= ch ":") (do (js-emit! "punct" ":" start) (advance! 1))) ((= ch ".") (do (js-emit! "punct" "." start) (advance! 1))) ((= ch "?") (do (js-emit! "op" "?" start) (advance! 1))) ((= ch "+") (do (js-emit! "op" "+" start) (advance! 1))) ((= ch "-") (do (js-emit! "op" "-" start) (advance! 1))) ((= ch "*") (do (js-emit! "op" "*" start) (advance! 1))) ((= ch "/") (do (js-emit! "op" "/" start) (advance! 1))) ((= ch "%") (do (js-emit! "op" "%" start) (advance! 1))) ((= ch "=") (do (js-emit! "op" "=" start) (advance! 1))) ((= ch "<") (do (js-emit! "op" "<" start) (advance! 1))) ((= ch ">") (do (js-emit! "op" ">" start) (advance! 1))) ((= ch "!") (do (js-emit! "op" "!" start) (advance! 1))) ((= ch "&") (do (js-emit! "op" "&" start) (advance! 1))) ((= ch "|") (do (js-emit! "op" "|" start) (advance! 1))) ((= ch "^") (do (js-emit! "op" "^" start) (advance! 1))) ((= ch "~") (do (js-emit! "op" "~" start) (advance! 1))) (else (advance! 1))))) (define scan! (fn () (do (skip-ws!) (when (< pos src-len) (let ((ch (cur)) (start pos)) (cond ((or (= ch "\"") (= ch "'")) (do (js-emit! "string" (read-string ch) start) (scan!))) ((= ch "`") (do (js-emit! "template" (read-template) start) (scan!))) ((js-digit? ch) (do (js-emit! "number" (read-number start) start) (scan!))) ((and (= ch ".") (< (+ pos 1) src-len) (js-digit? (js-peek 1))) (do (js-emit! "number" (read-dot-number start) start) (scan!))) ((js-ident-start? ch) (do (let ((word (read-ident start))) (js-emit! (if (js-keyword? word) "keyword" "ident") word start)) (scan!))) ((and (= ch "/") (js-regex-context?)) (let ((rx (read-regex))) (js-emit! "regex" rx start) (scan!))) ((try-op-4! start) (scan!)) ((try-op-3! start) (scan!)) ((try-op-2! start) (scan!)) (else (do (emit-one-op! ch start) (scan!))))))))) (scan!) (js-emit! "eof" nil pos) tokens)))