Files
rose-ash/lib/lua/tokenizer.sx
giles 781bd36eeb
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
lua: scoreboard iter — trailing-dot numbers, stdlib preload, arg/debug stubs (8x assertion-depth)
2026-04-24 19:49:32 +00:00

350 lines
11 KiB
Plaintext

(define lua-make-token (fn (type value pos) {:pos pos :value value :type type}))
(define lua-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9"))))
(define
lua-hex-digit?
(fn
(c)
(and
(not (= c nil))
(or
(lua-digit? c)
(and (>= c "a") (<= c "f"))
(and (>= c "A") (<= c "F"))))))
(define
lua-letter?
(fn
(c)
(and
(not (= c nil))
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))))
(define lua-ident-start? (fn (c) (or (lua-letter? c) (= c "_"))))
(define lua-ident-char? (fn (c) (or (lua-ident-start? c) (lua-digit? c))))
(define lua-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
(define
lua-keywords
(list
"and"
"break"
"do"
"else"
"elseif"
"end"
"false"
"for"
"function"
"goto"
"if"
"in"
"local"
"nil"
"not"
"or"
"repeat"
"return"
"then"
"true"
"until"
"while"))
(define lua-keyword? (fn (word) (contains? lua-keywords word)))
(define
lua-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
lua-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define cur (fn () (lua-peek 0)))
(define advance! (fn (n) (set! pos (+ pos n))))
(define
push!
(fn
(type value start)
(append! tokens (lua-make-token type value start))))
(define
match-long-open
(fn
()
(if
(= (cur) "[")
(let
((p (+ pos 1)) (level 0))
(begin
(define
count-eq
(fn
()
(when
(and (< p src-len) (= (nth src p) "="))
(begin
(set! level (+ level 1))
(set! p (+ p 1))
(count-eq)))))
(count-eq)
(if (and (< p src-len) (= (nth src p) "[")) level -1)))
-1)))
(define
read-long-body
(fn
(level)
(let
((start pos) (result nil))
(begin
(define
scan
(fn
()
(cond
((>= pos src-len) (set! result (slice src start pos)))
((= (cur) "]")
(let
((p (+ pos 1)) (eq-count 0))
(begin
(define
count-eq
(fn
()
(when
(and (< p src-len) (= (nth src p) "="))
(begin
(set! eq-count (+ eq-count 1))
(set! p (+ p 1))
(count-eq)))))
(count-eq)
(if
(and
(= eq-count level)
(< p src-len)
(= (nth src p) "]"))
(begin
(set! result (slice src start pos))
(set! pos (+ p 1)))
(begin (advance! 1) (scan))))))
(else (begin (advance! 1) (scan))))))
(scan)
result))))
(define
skip-line-comment!
(fn
()
(when
(and (< pos src-len) (not (= (cur) "\n")))
(begin (advance! 1) (skip-line-comment!)))))
(define
skip-ws!
(fn
()
(cond
((>= pos src-len) nil)
((lua-ws? (cur)) (begin (advance! 1) (skip-ws!)))
((and (= (cur) "-") (< (+ pos 1) src-len) (= (lua-peek 1) "-"))
(begin
(advance! 2)
(let
((lvl (match-long-open)))
(cond
((>= lvl 0)
(begin
(advance! (+ 2 lvl))
(read-long-body lvl)
(skip-ws!)))
(else (begin (skip-line-comment!) (skip-ws!)))))))
(else nil))))
(define
read-ident
(fn
(start)
(begin
(when
(and (< pos src-len) (lua-ident-char? (cur)))
(begin (advance! 1) (read-ident start)))
(slice src start pos))))
(define
read-decimal-digits!
(fn
()
(when
(and (< pos src-len) (lua-digit? (cur)))
(begin (advance! 1) (read-decimal-digits!)))))
(define
read-hex-digits!
(fn
()
(when
(and (< pos src-len) (lua-hex-digit? (cur)))
(begin (advance! 1) (read-hex-digits!)))))
(define
read-exp-part!
(fn
()
(when
(and (< pos src-len) (or (= (cur) "e") (= (cur) "E")))
(let
((p1 (lua-peek 1)))
(when
(or
(and (not (= p1 nil)) (lua-digit? p1))
(and
(or (= p1 "+") (= p1 "-"))
(< (+ pos 2) src-len)
(lua-digit? (lua-peek 2))))
(begin
(advance! 1)
(when
(and
(< pos src-len)
(or (= (cur) "+") (= (cur) "-")))
(advance! 1))
(read-decimal-digits!)))))))
(define
read-number
(fn
(start)
(cond
((and (= (cur) "0") (< (+ pos 1) src-len) (or (= (lua-peek 1) "x") (= (lua-peek 1) "X")))
(begin
(advance! 2)
(read-hex-digits!)
(let
((raw (slice src (+ start 2) pos)))
(parse-number (str "0x" raw)))))
(else
(begin
(read-decimal-digits!)
(when
(and (< pos src-len) (= (cur) "."))
(begin (advance! 1) (read-decimal-digits!)))
(read-exp-part!)
(parse-number (slice src start pos)))))))
(define
read-string
(fn
(quote-char)
(let
((chars (list)))
(begin
(advance! 1)
(define
loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\\")
(begin
(advance! 1)
(when
(< pos src-len)
(let
((ch (cur)))
(begin
(cond
((= ch "n") (append! chars "\n"))
((= ch "t") (append! chars "\t"))
((= ch "r") (append! chars "\r"))
((= ch "\\") (append! chars "\\"))
((= ch "'") (append! chars "'"))
((= ch "\"") (append! chars "\""))
(else (append! chars ch)))
(advance! 1))))
(loop)))
((= (cur) quote-char) (advance! 1))
(else
(begin (append! chars (cur)) (advance! 1) (loop))))))
(loop)
(join "" chars)))))
(define
try-punct
(fn
(start)
(let
((c (cur)) (c1 (lua-peek 1)) (c2 (lua-peek 2)))
(cond
((and (= c ".") (= c1 ".") (= c2 "."))
(begin (advance! 3) (push! "op" "..." start) true))
((and (= c ".") (= c1 "."))
(begin (advance! 2) (push! "op" ".." start) true))
((and (= c "=") (= c1 "="))
(begin (advance! 2) (push! "op" "==" start) true))
((and (= c "~") (= c1 "="))
(begin (advance! 2) (push! "op" "~=" start) true))
((and (= c "<") (= c1 "="))
(begin (advance! 2) (push! "op" "<=" start) true))
((and (= c ">") (= c1 "="))
(begin (advance! 2) (push! "op" ">=" start) true))
((and (= c ":") (= c1 ":"))
(begin (advance! 2) (push! "op" "::" start) true))
((or (= c "+") (= c "-") (= c "*") (= c "/") (= c "%") (= c "^") (= c "#") (= c "<") (= c ">") (= c "=") (= c "(") (= c ")") (= c "{") (= c "}") (= c "[") (= c "]") (= c ";") (= c ":") (= c ",") (= c "."))
(begin (advance! 1) (push! "op" c start) true))
(else false)))))
(define
step
(fn
()
(begin
(skip-ws!)
(when
(< pos src-len)
(let
((start pos) (c (cur)))
(cond
((lua-ident-start? c)
(let
((word (read-ident start)))
(begin
(if
(lua-keyword? word)
(push! "keyword" word start)
(push! "ident" word start))
(step))))
((lua-digit? c)
(let
((v (read-number start)))
(begin (push! "number" v start) (step))))
((and (= c ".") (< (+ pos 1) src-len) (lua-digit? (lua-peek 1)))
(begin
(advance! 1)
(read-decimal-digits!)
(read-exp-part!)
(push!
"number"
(parse-number (slice src start pos))
start)
(step)))
((or (= c "\"") (= c "'"))
(let
((s (read-string c)))
(begin (push! "string" s start) (step))))
((= c "[")
(let
((lvl (match-long-open)))
(cond
((>= lvl 0)
(begin
(advance! (+ 2 lvl))
(when (= (cur) "\n") (advance! 1))
(let
((s (read-long-body lvl)))
(begin (push! "string" s start) (step)))))
(else
(begin (advance! 1) (push! "op" "[" start) (step))))))
((try-punct start) (step))
(else
(error
(str "lua-tokenize: unexpected char " c " at " pos)))))))))
(step)
(push! "eof" nil pos)
tokens)))