Files
rose-ash/lib/tcl/tokenizer.sx

309 lines
10 KiB
Plaintext

(define tcl-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\r"))))
(define tcl-alpha?
(fn (c)
(and
(not (= c nil))
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))))
(define tcl-digit?
(fn (c) (and (not (= c nil)) (>= c "0") (<= c "9"))))
(define tcl-ident-start?
(fn (c) (or (tcl-alpha? c) (= c "_"))))
(define tcl-ident-char?
(fn (c) (or (tcl-ident-start? c) (tcl-digit? c))))
(define tcl-tokenize
(fn (src)
(let ((pos 0) (src-len (len src)) (commands (list)))
(define char-at
(fn (off)
(if (< (+ pos off) src-len) (nth src (+ pos off)) nil)))
(define cur (fn () (char-at 0)))
(define advance! (fn (n) (set! pos (+ pos n))))
(define skip-ws!
(fn ()
(when (tcl-ws? (cur))
(begin (advance! 1) (skip-ws!)))))
(define skip-to-eol!
(fn ()
(when (and (< pos src-len) (not (= (cur) "\n")))
(begin (advance! 1) (skip-to-eol!)))))
(define skip-brace-content!
(fn (d)
(when (and (< pos src-len) (> d 0))
(cond
((= (cur) "{") (begin (advance! 1) (skip-brace-content! (+ d 1))))
((= (cur) "}") (begin (advance! 1) (skip-brace-content! (- d 1))))
(else (begin (advance! 1) (skip-brace-content! d)))))))
(define skip-dquote-content!
(fn ()
(when (and (< pos src-len) (not (= (cur) "\"")))
(begin
(when (= (cur) "\\") (advance! 1))
(when (< pos src-len) (advance! 1))
(skip-dquote-content!)))))
(define parse-bs
(fn ()
(advance! 1)
(let ((c (cur)))
(cond
((= c nil) "\\")
((= c "n") (begin (advance! 1) "\n"))
((= c "t") (begin (advance! 1) "\t"))
((= c "r") (begin (advance! 1) "\r"))
((= c "\\") (begin (advance! 1) "\\"))
((= c "[") (begin (advance! 1) "["))
((= c "]") (begin (advance! 1) "]"))
((= c "{") (begin (advance! 1) "{"))
((= c "}") (begin (advance! 1) "}"))
((= c "$") (begin (advance! 1) "$"))
((= c ";") (begin (advance! 1) ";"))
((= c "\"") (begin (advance! 1) "\""))
((= c "'") (begin (advance! 1) "'"))
((= c " ") (begin (advance! 1) " "))
((= c "\n")
(begin
(advance! 1)
(skip-ws!)
" "))
(else (begin (advance! 1) (str "\\" c)))))))
(define parse-cmd-sub
(fn ()
(advance! 1)
(let ((start pos) (depth 1))
(define scan!
(fn ()
(when (and (< pos src-len) (> depth 0))
(cond
((= (cur) "[")
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
((= (cur) "]")
(begin
(set! depth (- depth 1))
(when (> depth 0) (advance! 1))
(scan!)))
((= (cur) "{")
(begin (advance! 1) (skip-brace-content! 1) (scan!)))
((= (cur) "\"")
(begin
(advance! 1)
(skip-dquote-content!)
(when (= (cur) "\"") (advance! 1))
(scan!)))
((= (cur) "\\")
(begin (advance! 1) (when (< pos src-len) (advance! 1)) (scan!)))
(else (begin (advance! 1) (scan!)))))))
(scan!)
(let ((src-text (slice src start pos)))
(begin
(when (= (cur) "]") (advance! 1))
{:type "cmd" :src src-text})))))
(define scan-name!
(fn ()
(when (and (< pos src-len) (not (= (cur) "}")))
(begin (advance! 1) (scan-name!)))))
(define scan-ns-name!
(fn ()
(cond
((tcl-ident-char? (cur))
(begin (advance! 1) (scan-ns-name!)))
((and (= (cur) ":") (= (char-at 1) ":"))
(begin (advance! 2) (scan-ns-name!)))
(else nil))))
(define scan-klit!
(fn ()
(when (and (< pos src-len)
(not (= (cur) ")"))
(not (= (cur) "$"))
(not (= (cur) "["))
(not (= (cur) "\\")))
(begin (advance! 1) (scan-klit!)))))
(define scan-key!
(fn (kp)
(when (and (< pos src-len) (not (= (cur) ")")))
(cond
((= (cur) "$")
(begin (append! kp (parse-var-sub)) (scan-key! kp)))
((= (cur) "[")
(begin (append! kp (parse-cmd-sub)) (scan-key! kp)))
((= (cur) "\\")
(begin
(append! kp {:type "text" :value (parse-bs)})
(scan-key! kp)))
(else
(let ((kstart pos))
(begin
(scan-klit!)
(append! kp {:type "text" :value (slice src kstart pos)})
(scan-key! kp))))))))
(define parse-var-sub
(fn ()
(advance! 1)
(cond
((= (cur) "{")
(begin
(advance! 1)
(let ((start pos))
(begin
(scan-name!)
(let ((name (slice src start pos)))
(begin
(when (= (cur) "}") (advance! 1))
{:type "var" :name name}))))))
((tcl-ident-start? (cur))
(let ((start pos))
(begin
(scan-ns-name!)
(let ((name (slice src start pos)))
(if (= (cur) "(")
(begin
(advance! 1)
(let ((key-parts (list)))
(begin
(scan-key! key-parts)
(when (= (cur) ")") (advance! 1))
{:type "var-arr" :name name :key key-parts})))
{:type "var" :name name})))))
(else {:type "text" :value "$"}))))
(define scan-lit!
(fn (stop?)
(when (and (< pos src-len)
(not (stop? (cur)))
(not (= (cur) "$"))
(not (= (cur) "["))
(not (= (cur) "\\")))
(begin (advance! 1) (scan-lit! stop?)))))
(define parse-word-parts!
(fn (parts stop?)
(when (and (< pos src-len) (not (stop? (cur))))
(cond
((= (cur) "$")
(begin (append! parts (parse-var-sub)) (parse-word-parts! parts stop?)))
((= (cur) "[")
(begin (append! parts (parse-cmd-sub)) (parse-word-parts! parts stop?)))
((= (cur) "\\")
(begin
(append! parts {:type "text" :value (parse-bs)})
(parse-word-parts! parts stop?)))
(else
(let ((start pos))
(begin
(scan-lit! stop?)
(when (> pos start)
(append! parts {:type "text" :value (slice src start pos)}))
(parse-word-parts! parts stop?))))))))
(define parse-brace-word
(fn ()
(advance! 1)
(let ((depth 1) (start pos))
(define scan!
(fn ()
(when (and (< pos src-len) (> depth 0))
(cond
((= (cur) "{")
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
((= (cur) "}")
(begin (set! depth (- depth 1)) (when (> depth 0) (advance! 1)) (scan!)))
(else (begin (advance! 1) (scan!)))))))
(scan!)
(let ((value (slice src start pos)))
(begin
(when (= (cur) "}") (advance! 1))
{:type "braced" :value value})))))
(define parse-dquote-word
(fn ()
(advance! 1)
(let ((parts (list)))
(begin
(parse-word-parts! parts (fn (c) (or (= c "\"") (= c nil))))
(when (= (cur) "\"") (advance! 1))
{:type "compound" :parts parts :quoted true}))))
(define parse-bare-word
(fn ()
(let ((parts (list)))
(begin
(parse-word-parts!
parts
(fn (c) (or (tcl-ws? c) (= c "\n") (= c ";") (= c nil))))
{:type "compound" :parts parts :quoted false}))))
(define parse-word-no-expand
(fn ()
(cond
((= (cur) "{") (parse-brace-word))
((= (cur) "\"") (parse-dquote-word))
(else (parse-bare-word)))))
(define parse-word
(fn ()
(cond
((and (= (cur) "{") (= (char-at 1) "*") (= (char-at 2) "}"))
(begin
(advance! 3)
{:type "expand" :word (parse-word-no-expand)}))
((= (cur) "{") (parse-brace-word))
((= (cur) "\"") (parse-dquote-word))
(else (parse-bare-word)))))
(define parse-words!
(fn (words)
(skip-ws!)
(cond
((or (= (cur) nil) (= (cur) "\n") (= (cur) ";")) nil)
((and (= (cur) "\\") (= (char-at 1) "\n"))
(begin (advance! 2) (skip-ws!) (parse-words! words)))
(else
(begin
(append! words (parse-word))
(parse-words! words))))))
(define skip-seps!
(fn ()
(when (< pos src-len)
(cond
((or (tcl-ws? (cur)) (= (cur) "\n") (= (cur) ";"))
(begin (advance! 1) (skip-seps!)))
((and (= (cur) "\\") (= (char-at 1) "\n"))
(begin (advance! 2) (skip-seps!)))
(else nil)))))
(define parse-all!
(fn ()
(skip-seps!)
(when (< pos src-len)
(cond
((= (cur) "#")
(begin (skip-to-eol!) (parse-all!)))
(else
(let ((words (list)))
(begin
(parse-words! words)
(when (> (len words) 0)
(append! commands {:type "command" :words words}))
(parse-all!))))))))
(parse-all!)
commands)))