(define __ascii-tok " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~") (define lua-byte-to-char (fn (n) (cond ((= n 9) "\t") ((= n 10) "\n") ((= n 13) "\r") ((and (>= n 32) (<= n 126)) (char-at __ascii-tok (- n 32))) (else "?")))) (define lua-make-token (fn (type value pos) {:pos pos :value value :type type})) (define lua-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9")))) (define lua-hex-digit? (fn (c) (and (not (= c nil)) (or (lua-digit? c) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F")))))) (define lua-letter? (fn (c) (and (not (= c nil)) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))) (define lua-ident-start? (fn (c) (or (lua-letter? c) (= c "_")))) (define lua-ident-char? (fn (c) (or (lua-ident-start? c) (lua-digit? c)))) (define lua-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) (define lua-keywords (list "and" "break" "do" "else" "elseif" "end" "false" "for" "function" "goto" "if" "in" "local" "nil" "not" "or" "repeat" "return" "then" "true" "until" "while")) (define lua-keyword? (fn (word) (contains? lua-keywords word))) (define lua-tokenize (fn (src) (let ((tokens (list)) (pos 0) (src-len (len src))) (define lua-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define cur (fn () (lua-peek 0))) (define advance! (fn (n) (set! pos (+ pos n)))) (define push! (fn (type value start) (append! tokens (lua-make-token type value start)))) (define match-long-open (fn () (if (= (cur) "[") (let ((p (+ pos 1)) (level 0)) (begin (define count-eq (fn () (when (and (< p src-len) (= (nth src p) "=")) (begin (set! level (+ level 1)) (set! p (+ p 1)) (count-eq))))) (count-eq) (if (and (< p src-len) (= (nth src p) "[")) level -1))) -1))) (define read-long-body (fn (level) (let ((start pos) (result nil)) (begin (define scan (fn () (cond ((>= pos src-len) (set! result (slice src start pos))) ((= (cur) "]") (let ((p (+ pos 1)) (eq-count 0)) (begin (define count-eq (fn () (when (and (< p src-len) (= (nth src p) "=")) (begin (set! eq-count (+ eq-count 1)) (set! p (+ p 1)) (count-eq))))) (count-eq) (if (and (= eq-count level) (< p src-len) (= (nth src p) "]")) (begin (set! result (slice src start pos)) (set! pos (+ p 1))) (begin (advance! 1) (scan)))))) (else (begin (advance! 1) (scan)))))) (scan) result)))) (define skip-line-comment! (fn () (when (and (< pos src-len) (not (= (cur) "\n"))) (begin (advance! 1) (skip-line-comment!))))) (define skip-ws! (fn () (cond ((>= pos src-len) nil) ((lua-ws? (cur)) (begin (advance! 1) (skip-ws!))) ((and (= (cur) "-") (< (+ pos 1) src-len) (= (lua-peek 1) "-")) (begin (advance! 2) (let ((lvl (match-long-open))) (cond ((>= lvl 0) (begin (advance! (+ 2 lvl)) (read-long-body lvl) (skip-ws!))) (else (begin (skip-line-comment!) (skip-ws!))))))) (else nil)))) (define read-ident (fn (start) (begin (when (and (< pos src-len) (lua-ident-char? (cur))) (begin (advance! 1) (read-ident start))) (slice src start pos)))) (define read-decimal-digits! (fn () (when (and (< pos src-len) (lua-digit? (cur))) (begin (advance! 1) (read-decimal-digits!))))) (define read-hex-digits! (fn () (when (and (< pos src-len) (lua-hex-digit? (cur))) (begin (advance! 1) (read-hex-digits!))))) (define read-exp-part! (fn () (when (and (< pos src-len) (or (= (cur) "e") (= (cur) "E"))) (let ((p1 (lua-peek 1))) (when (or (and (not (= p1 nil)) (lua-digit? p1)) (and (or (= p1 "+") (= p1 "-")) (< (+ pos 2) src-len) (lua-digit? (lua-peek 2)))) (begin (advance! 1) (when (and (< pos src-len) (or (= (cur) "+") (= (cur) "-"))) (advance! 1)) (read-decimal-digits!))))))) (define read-number (fn (start) (cond ((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (lua-peek 1) "x") (= (lua-peek 1) "X"))) (begin (advance! 2) (read-hex-digits!) (let ((raw (slice src (+ start 2) pos))) (parse-number (str "0x" raw))))) (else (begin (read-decimal-digits!) (when (and (< pos src-len) (= (cur) ".")) (begin (advance! 1) (read-decimal-digits!))) (read-exp-part!) (parse-number (slice src start pos))))))) (define lua-char-one-tok (fn (n) (cond ((= n 7) (str (list n))) ((= n 8) (str (list n))) ((= n 11) (str (list n))) ((= n 12) (str (list n))) (else (str (list n)))))) (define read-decimal-escape! (fn (chars) (let ((d0 (cur))) (begin (advance! 1) (let ((n (- (char-code d0) (char-code "0")))) (begin (when (and (< pos src-len) (lua-digit? (cur))) (begin (set! n (+ (* n 10) (- (char-code (cur)) (char-code "0")))) (advance! 1) (when (and (< pos src-len) (lua-digit? (cur)) (<= (+ (* n 10) (- (char-code (cur)) (char-code "0"))) 255)) (begin (set! n (+ (* n 10) (- (char-code (cur)) (char-code "0")))) (advance! 1))))) (append! chars (lua-byte-to-char n)))))))) (define read-string (fn (quote-char) (let ((chars (list))) (begin (advance! 1) (define loop (fn () (cond ((>= pos src-len) nil) ((= (cur) "\\") (begin (advance! 1) (when (< pos src-len) (let ((ch (cur))) (begin (cond ((= ch "n") (begin (append! chars "\n") (advance! 1))) ((= ch "t") (begin (append! chars "\t") (advance! 1))) ((= ch "r") (begin (append! chars "\r") (advance! 1))) ((= ch "a") (begin (append! chars (lua-char-one-tok 7)) (advance! 1))) ((= ch "b") (begin (append! chars (lua-char-one-tok 8)) (advance! 1))) ((= ch "f") (begin (append! chars (lua-char-one-tok 12)) (advance! 1))) ((= ch "v") (begin (append! chars (lua-char-one-tok 11)) (advance! 1))) ((= ch "\\") (begin (append! chars "\\") (advance! 1))) ((= ch "'") (begin (append! chars "'") (advance! 1))) ((= ch "\"") (begin (append! chars "\"") (advance! 1))) ((lua-digit? ch) (read-decimal-escape! chars)) (else (begin (append! chars ch) (advance! 1))))))) (loop))) ((= (cur) quote-char) (advance! 1)) (else (begin (append! chars (cur)) (advance! 1) (loop)))))) (loop) (join "" chars))))) (define try-punct (fn (start) (let ((c (cur)) (c1 (lua-peek 1)) (c2 (lua-peek 2))) (cond ((and (= c ".") (= c1 ".") (= c2 ".")) (begin (advance! 3) (push! "op" "..." start) true)) ((and (= c ".") (= c1 ".")) (begin (advance! 2) (push! "op" ".." start) true)) ((and (= c "=") (= c1 "=")) (begin (advance! 2) (push! "op" "==" start) true)) ((and (= c "~") (= c1 "=")) (begin (advance! 2) (push! "op" "~=" start) true)) ((and (= c "<") (= c1 "=")) (begin (advance! 2) (push! "op" "<=" start) true)) ((and (= c ">") (= c1 "=")) (begin (advance! 2) (push! "op" ">=" start) true)) ((and (= c ":") (= c1 ":")) (begin (advance! 2) (push! "op" "::" start) true)) ((or (= c "+") (= c "-") (= c "*") (= c "/") (= c "%") (= c "^") (= c "#") (= c "<") (= c ">") (= c "=") (= c "(") (= c ")") (= c "{") (= c "}") (= c "[") (= c "]") (= c ";") (= c ":") (= c ",") (= c ".")) (begin (advance! 1) (push! "op" c start) true)) (else false))))) (define step (fn () (begin (skip-ws!) (when (< pos src-len) (let ((start pos) (c (cur))) (cond ((lua-ident-start? c) (let ((word (read-ident start))) (begin (if (lua-keyword? word) (push! "keyword" word start) (push! "ident" word start)) (step)))) ((lua-digit? c) (let ((v (read-number start))) (begin (push! "number" v start) (step)))) ((and (= c ".") (< (+ pos 1) src-len) (lua-digit? (lua-peek 1))) (begin (advance! 1) (read-decimal-digits!) (read-exp-part!) (push! "number" (parse-number (slice src start pos)) start) (step))) ((or (= c "\"") (= c "'")) (let ((s (read-string c))) (begin (push! "string" s start) (step)))) ((= c "[") (let ((lvl (match-long-open))) (cond ((>= lvl 0) (begin (advance! (+ 2 lvl)) (when (= (cur) "\n") (advance! 1)) (let ((s (read-long-body lvl))) (begin (push! "string" s start) (step))))) (else (begin (advance! 1) (push! "op" "[" start) (step)))))) ((try-punct start) (step)) (else (error (str "lua-tokenize: unexpected char " c " at " pos))))))))) (step) (push! "eof" nil pos) tokens)))