(define lua-make-token (fn (type value pos) {:pos pos :value value :type type})) (define lua-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9")))) (define lua-hex-digit? (fn (c) (and (not (= c nil)) (or (lua-digit? c) (and (>= c "a") (<= c "f")) (and (>= c "A") (<= c "F")))))) (define lua-letter? (fn (c) (and (not (= c nil)) (or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z")))))) (define lua-ident-start? (fn (c) (or (lua-letter? c) (= c "_")))) (define lua-ident-char? (fn (c) (or (lua-ident-start? c) (lua-digit? c)))) (define lua-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) (define lua-keywords (list "and" "break" "do" "else" "elseif" "end" "false" "for" "function" "goto" "if" "in" "local" "nil" "not" "or" "repeat" "return" "then" "true" "until" "while")) (define lua-keyword? (fn (word) (contains? lua-keywords word))) (define lua-tokenize (fn (src) (let ((tokens (list)) (pos 0) (src-len (len src))) (define lua-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define cur (fn () (lua-peek 0))) (define advance! (fn (n) (set! pos (+ pos n)))) (define push! (fn (type value start) (append! tokens (lua-make-token type value start)))) (define match-long-open (fn () (if (= (cur) "[") (let ((p (+ pos 1)) (level 0)) (begin (define count-eq (fn () (when (and (< p src-len) (= (nth src p) "=")) (begin (set! level (+ level 1)) (set! p (+ p 1)) (count-eq))))) (count-eq) (if (and (< p src-len) (= (nth src p) "[")) level -1))) -1))) (define read-long-body (fn (level) (let ((start pos) (result nil)) (begin (define scan (fn () (cond ((>= pos src-len) (set! result (slice src start pos))) ((= (cur) "]") (let ((p (+ pos 1)) (eq-count 0)) (begin (define count-eq (fn () (when (and (< p src-len) (= (nth src p) "=")) (begin (set! eq-count (+ eq-count 1)) (set! p (+ p 1)) (count-eq))))) (count-eq) (if (and (= eq-count level) (< p src-len) (= (nth src p) "]")) (begin (set! result (slice src start pos)) (set! pos (+ p 1))) (begin (advance! 1) (scan)))))) (else (begin (advance! 1) (scan)))))) (scan) result)))) (define skip-line-comment! (fn () (when (and (< pos src-len) (not (= (cur) "\n"))) (begin (advance! 1) (skip-line-comment!))))) (define skip-ws! (fn () (cond ((>= pos src-len) nil) ((lua-ws? (cur)) (begin (advance! 1) (skip-ws!))) ((and (= (cur) "-") (< (+ pos 1) src-len) (= (lua-peek 1) "-")) (begin (advance! 2) (let ((lvl (match-long-open))) (cond ((>= lvl 0) (begin (advance! (+ 2 lvl)) (read-long-body lvl) (skip-ws!))) (else (begin (skip-line-comment!) (skip-ws!))))))) (else nil)))) (define read-ident (fn (start) (begin (when (and (< pos src-len) (lua-ident-char? (cur))) (begin (advance! 1) (read-ident start))) (slice src start pos)))) (define read-decimal-digits! (fn () (when (and (< pos src-len) (lua-digit? (cur))) (begin (advance! 1) (read-decimal-digits!))))) (define read-hex-digits! (fn () (when (and (< pos src-len) (lua-hex-digit? (cur))) (begin (advance! 1) (read-hex-digits!))))) (define read-exp-part! (fn () (when (and (< pos src-len) (or (= (cur) "e") (= (cur) "E"))) (let ((p1 (lua-peek 1))) (when (or (and (not (= p1 nil)) (lua-digit? p1)) (and (or (= p1 "+") (= p1 "-")) (< (+ pos 2) src-len) (lua-digit? (lua-peek 2)))) (begin (advance! 1) (when (and (< pos src-len) (or (= (cur) "+") (= (cur) "-"))) (advance! 1)) (read-decimal-digits!))))))) (define read-number (fn (start) (cond ((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (lua-peek 1) "x") (= (lua-peek 1) "X"))) (begin (advance! 2) (read-hex-digits!) (let ((raw (slice src (+ start 2) pos))) (parse-number (str "0x" raw))))) (else (begin (read-decimal-digits!) (when (and (< pos src-len) (= (cur) ".") (< (+ pos 1) src-len) (lua-digit? (lua-peek 1))) (begin (advance! 1) (read-decimal-digits!))) (read-exp-part!) (parse-number (slice src start pos))))))) (define read-string (fn (quote-char) (let ((chars (list))) (begin (advance! 1) (define loop (fn () (cond ((>= pos src-len) nil) ((= (cur) "\\") (begin (advance! 1) (when (< pos src-len) (let ((ch (cur))) (begin (cond ((= ch "n") (append! chars "\n")) ((= ch "t") (append! chars "\t")) ((= ch "r") (append! chars "\r")) ((= ch "\\") (append! chars "\\")) ((= ch "'") (append! chars "'")) ((= ch "\"") (append! chars "\"")) (else (append! chars ch))) (advance! 1)))) (loop))) ((= (cur) quote-char) (advance! 1)) (else (begin (append! chars (cur)) (advance! 1) (loop)))))) (loop) (join "" chars))))) (define try-punct (fn (start) (let ((c (cur)) (c1 (lua-peek 1)) (c2 (lua-peek 2))) (cond ((and (= c ".") (= c1 ".") (= c2 ".")) (begin (advance! 3) (push! "op" "..." start) true)) ((and (= c ".") (= c1 ".")) (begin (advance! 2) (push! "op" ".." start) true)) ((and (= c "=") (= c1 "=")) (begin (advance! 2) (push! "op" "==" start) true)) ((and (= c "~") (= c1 "=")) (begin (advance! 2) (push! "op" "~=" start) true)) ((and (= c "<") (= c1 "=")) (begin (advance! 2) (push! "op" "<=" start) true)) ((and (= c ">") (= c1 "=")) (begin (advance! 2) (push! "op" ">=" start) true)) ((and (= c ":") (= c1 ":")) (begin (advance! 2) (push! "op" "::" start) true)) ((or (= c "+") (= c "-") (= c "*") (= c "/") (= c "%") (= c "^") (= c "#") (= c "<") (= c ">") (= c "=") (= c "(") (= c ")") (= c "{") (= c "}") (= c "[") (= c "]") (= c ";") (= c ":") (= c ",") (= c ".")) (begin (advance! 1) (push! "op" c start) true)) (else false))))) (define step (fn () (begin (skip-ws!) (when (< pos src-len) (let ((start pos) (c (cur))) (cond ((lua-ident-start? c) (let ((word (read-ident start))) (begin (if (lua-keyword? word) (push! "keyword" word start) (push! "ident" word start)) (step)))) ((lua-digit? c) (let ((v (read-number start))) (begin (push! "number" v start) (step)))) ((and (= c ".") (< (+ pos 1) src-len) (lua-digit? (lua-peek 1))) (begin (advance! 1) (read-decimal-digits!) (read-exp-part!) (push! "number" (parse-number (slice src start pos)) start) (step))) ((or (= c "\"") (= c "'")) (let ((s (read-string c))) (begin (push! "string" s start) (step)))) ((= c "[") (let ((lvl (match-long-open))) (cond ((>= lvl 0) (begin (advance! (+ 2 lvl)) (when (= (cur) "\n") (advance! 1)) (let ((s (read-long-body lvl))) (begin (push! "string" s start) (step))))) (else (begin (advance! 1) (push! "op" "[" start) (step)))))) ((try-punct start) (step)) (else (error (str "lua-tokenize: unexpected char " c " at " pos))))))))) (step) (push! "eof" nil pos) tokens)))