Extracted shared tokeniser primitives:
- Char-class predicates: lex-digit?, lex-hex-digit?, lex-alpha?
(alias lex-letter?), lex-alnum?, lex-ident-start?, lex-ident-char?,
lex-space? (no newline), lex-whitespace? (incl newline). All nil-safe.
- Token record: lex-make-token, lex-make-token-spanning, accessors.
Ported lib/lua/tokenizer.sx and lib/tcl/tokenizer.sx — 7 lua and 5 tcl
predicate definitions collapsed into prefix-rename calls that alias
lua-/tcl- names to lex- primitives. Test scripts (lua/test.sh,
tcl/test.sh, tcl/conformance.sh) load lib/guest/lex.sx and prefix.sx
before the per-language tokenizer.
Verification:
- lua/test.sh: 185/185 = baseline
- tcl/test.sh: 342/342 (parse 67 + eval 169 + error 39 + namespace 22
+ coro 20 + idiom 25)
- tcl/conformance.sh: 3/4 = baseline (event-loop failure is pre-existing)
Two consumers verified — step complete.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
300 lines
10 KiB
Plaintext
300 lines
10 KiB
Plaintext
(prefix-rename "tcl-"
|
|
'((ws? lex-space?)
|
|
(alpha? lex-alpha?)
|
|
(digit? lex-digit?)
|
|
(ident-start? lex-ident-start?)
|
|
(ident-char? lex-ident-char?)))
|
|
|
|
|
|
(define tcl-tokenize
|
|
(fn (src)
|
|
(let ((pos 0) (src-len (len src)) (commands (list)))
|
|
|
|
(define char-at
|
|
(fn (off)
|
|
(if (< (+ pos off) src-len) (nth src (+ pos off)) nil)))
|
|
|
|
(define cur (fn () (char-at 0)))
|
|
|
|
(define advance! (fn (n) (set! pos (+ pos n))))
|
|
|
|
(define skip-ws!
|
|
(fn ()
|
|
(when (tcl-ws? (cur))
|
|
(begin (advance! 1) (skip-ws!)))))
|
|
|
|
(define skip-to-eol!
|
|
(fn ()
|
|
(when (and (< pos src-len) (not (= (cur) "\n")))
|
|
(begin (advance! 1) (skip-to-eol!)))))
|
|
|
|
(define skip-brace-content!
|
|
(fn (d)
|
|
(when (and (< pos src-len) (> d 0))
|
|
(cond
|
|
((= (cur) "{") (begin (advance! 1) (skip-brace-content! (+ d 1))))
|
|
((= (cur) "}") (begin (advance! 1) (skip-brace-content! (- d 1))))
|
|
(else (begin (advance! 1) (skip-brace-content! d)))))))
|
|
|
|
(define skip-dquote-content!
|
|
(fn ()
|
|
(when (and (< pos src-len) (not (= (cur) "\"")))
|
|
(begin
|
|
(when (= (cur) "\\") (advance! 1))
|
|
(when (< pos src-len) (advance! 1))
|
|
(skip-dquote-content!)))))
|
|
|
|
(define parse-bs
|
|
(fn ()
|
|
(advance! 1)
|
|
(let ((c (cur)))
|
|
(cond
|
|
((= c nil) "\\")
|
|
((= c "n") (begin (advance! 1) "\n"))
|
|
((= c "t") (begin (advance! 1) "\t"))
|
|
((= c "r") (begin (advance! 1) "\r"))
|
|
((= c "\\") (begin (advance! 1) "\\"))
|
|
((= c "[") (begin (advance! 1) "["))
|
|
((= c "]") (begin (advance! 1) "]"))
|
|
((= c "{") (begin (advance! 1) "{"))
|
|
((= c "}") (begin (advance! 1) "}"))
|
|
((= c "$") (begin (advance! 1) "$"))
|
|
((= c ";") (begin (advance! 1) ";"))
|
|
((= c "\"") (begin (advance! 1) "\""))
|
|
((= c "'") (begin (advance! 1) "'"))
|
|
((= c " ") (begin (advance! 1) " "))
|
|
((= c "\n")
|
|
(begin
|
|
(advance! 1)
|
|
(skip-ws!)
|
|
" "))
|
|
(else (begin (advance! 1) (str "\\" c)))))))
|
|
|
|
(define parse-cmd-sub
|
|
(fn ()
|
|
(advance! 1)
|
|
(let ((start pos) (depth 1))
|
|
(define scan!
|
|
(fn ()
|
|
(when (and (< pos src-len) (> depth 0))
|
|
(cond
|
|
((= (cur) "[")
|
|
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
|
|
((= (cur) "]")
|
|
(begin
|
|
(set! depth (- depth 1))
|
|
(when (> depth 0) (advance! 1))
|
|
(scan!)))
|
|
((= (cur) "{")
|
|
(begin (advance! 1) (skip-brace-content! 1) (scan!)))
|
|
((= (cur) "\"")
|
|
(begin
|
|
(advance! 1)
|
|
(skip-dquote-content!)
|
|
(when (= (cur) "\"") (advance! 1))
|
|
(scan!)))
|
|
((= (cur) "\\")
|
|
(begin (advance! 1) (when (< pos src-len) (advance! 1)) (scan!)))
|
|
(else (begin (advance! 1) (scan!)))))))
|
|
(scan!)
|
|
(let ((src-text (slice src start pos)))
|
|
(begin
|
|
(when (= (cur) "]") (advance! 1))
|
|
{:type "cmd" :src src-text})))))
|
|
|
|
(define scan-name!
|
|
(fn ()
|
|
(when (and (< pos src-len) (not (= (cur) "}")))
|
|
(begin (advance! 1) (scan-name!)))))
|
|
|
|
(define scan-ns-name!
|
|
(fn ()
|
|
(cond
|
|
((tcl-ident-char? (cur))
|
|
(begin (advance! 1) (scan-ns-name!)))
|
|
((and (= (cur) ":") (= (char-at 1) ":"))
|
|
(begin (advance! 2) (scan-ns-name!)))
|
|
(else nil))))
|
|
|
|
(define scan-klit!
|
|
(fn ()
|
|
(when (and (< pos src-len)
|
|
(not (= (cur) ")"))
|
|
(not (= (cur) "$"))
|
|
(not (= (cur) "["))
|
|
(not (= (cur) "\\")))
|
|
(begin (advance! 1) (scan-klit!)))))
|
|
|
|
(define scan-key!
|
|
(fn (kp)
|
|
(when (and (< pos src-len) (not (= (cur) ")")))
|
|
(cond
|
|
((= (cur) "$")
|
|
(begin (append! kp (parse-var-sub)) (scan-key! kp)))
|
|
((= (cur) "[")
|
|
(begin (append! kp (parse-cmd-sub)) (scan-key! kp)))
|
|
((= (cur) "\\")
|
|
(begin
|
|
(append! kp {:type "text" :value (parse-bs)})
|
|
(scan-key! kp)))
|
|
(else
|
|
(let ((kstart pos))
|
|
(begin
|
|
(scan-klit!)
|
|
(append! kp {:type "text" :value (slice src kstart pos)})
|
|
(scan-key! kp))))))))
|
|
|
|
(define parse-var-sub
|
|
(fn ()
|
|
(advance! 1)
|
|
(cond
|
|
((= (cur) "{")
|
|
(begin
|
|
(advance! 1)
|
|
(let ((start pos))
|
|
(begin
|
|
(scan-name!)
|
|
(let ((name (slice src start pos)))
|
|
(begin
|
|
(when (= (cur) "}") (advance! 1))
|
|
{:type "var" :name name}))))))
|
|
((tcl-ident-start? (cur))
|
|
(let ((start pos))
|
|
(begin
|
|
(scan-ns-name!)
|
|
(let ((name (slice src start pos)))
|
|
(if (= (cur) "(")
|
|
(begin
|
|
(advance! 1)
|
|
(let ((key-parts (list)))
|
|
(begin
|
|
(scan-key! key-parts)
|
|
(when (= (cur) ")") (advance! 1))
|
|
{:type "var-arr" :name name :key key-parts})))
|
|
{:type "var" :name name})))))
|
|
(else {:type "text" :value "$"}))))
|
|
|
|
(define scan-lit!
|
|
(fn (stop?)
|
|
(when (and (< pos src-len)
|
|
(not (stop? (cur)))
|
|
(not (= (cur) "$"))
|
|
(not (= (cur) "["))
|
|
(not (= (cur) "\\")))
|
|
(begin (advance! 1) (scan-lit! stop?)))))
|
|
|
|
(define parse-word-parts!
|
|
(fn (parts stop?)
|
|
(when (and (< pos src-len) (not (stop? (cur))))
|
|
(cond
|
|
((= (cur) "$")
|
|
(begin (append! parts (parse-var-sub)) (parse-word-parts! parts stop?)))
|
|
((= (cur) "[")
|
|
(begin (append! parts (parse-cmd-sub)) (parse-word-parts! parts stop?)))
|
|
((= (cur) "\\")
|
|
(begin
|
|
(append! parts {:type "text" :value (parse-bs)})
|
|
(parse-word-parts! parts stop?)))
|
|
(else
|
|
(let ((start pos))
|
|
(begin
|
|
(scan-lit! stop?)
|
|
(when (> pos start)
|
|
(append! parts {:type "text" :value (slice src start pos)}))
|
|
(parse-word-parts! parts stop?))))))))
|
|
|
|
(define parse-brace-word
|
|
(fn ()
|
|
(advance! 1)
|
|
(let ((depth 1) (start pos))
|
|
(define scan!
|
|
(fn ()
|
|
(when (and (< pos src-len) (> depth 0))
|
|
(cond
|
|
((= (cur) "{")
|
|
(begin (set! depth (+ depth 1)) (advance! 1) (scan!)))
|
|
((= (cur) "}")
|
|
(begin (set! depth (- depth 1)) (when (> depth 0) (advance! 1)) (scan!)))
|
|
(else (begin (advance! 1) (scan!)))))))
|
|
(scan!)
|
|
(let ((value (slice src start pos)))
|
|
(begin
|
|
(when (= (cur) "}") (advance! 1))
|
|
{:type "braced" :value value})))))
|
|
|
|
(define parse-dquote-word
|
|
(fn ()
|
|
(advance! 1)
|
|
(let ((parts (list)))
|
|
(begin
|
|
(parse-word-parts! parts (fn (c) (or (= c "\"") (= c nil))))
|
|
(when (= (cur) "\"") (advance! 1))
|
|
{:type "compound" :parts parts :quoted true}))))
|
|
|
|
(define parse-bare-word
|
|
(fn ()
|
|
(let ((parts (list)))
|
|
(begin
|
|
(parse-word-parts!
|
|
parts
|
|
(fn (c) (or (tcl-ws? c) (= c "\n") (= c ";") (= c nil))))
|
|
{:type "compound" :parts parts :quoted false}))))
|
|
|
|
(define parse-word-no-expand
|
|
(fn ()
|
|
(cond
|
|
((= (cur) "{") (parse-brace-word))
|
|
((= (cur) "\"") (parse-dquote-word))
|
|
(else (parse-bare-word)))))
|
|
|
|
(define parse-word
|
|
(fn ()
|
|
(cond
|
|
((and (= (cur) "{") (= (char-at 1) "*") (= (char-at 2) "}"))
|
|
(begin
|
|
(advance! 3)
|
|
{:type "expand" :word (parse-word-no-expand)}))
|
|
((= (cur) "{") (parse-brace-word))
|
|
((= (cur) "\"") (parse-dquote-word))
|
|
(else (parse-bare-word)))))
|
|
|
|
(define parse-words!
|
|
(fn (words)
|
|
(skip-ws!)
|
|
(cond
|
|
((or (= (cur) nil) (= (cur) "\n") (= (cur) ";")) nil)
|
|
((and (= (cur) "\\") (= (char-at 1) "\n"))
|
|
(begin (advance! 2) (skip-ws!) (parse-words! words)))
|
|
(else
|
|
(begin
|
|
(append! words (parse-word))
|
|
(parse-words! words))))))
|
|
|
|
(define skip-seps!
|
|
(fn ()
|
|
(when (< pos src-len)
|
|
(cond
|
|
((or (tcl-ws? (cur)) (= (cur) "\n") (= (cur) ";"))
|
|
(begin (advance! 1) (skip-seps!)))
|
|
((and (= (cur) "\\") (= (char-at 1) "\n"))
|
|
(begin (advance! 2) (skip-seps!)))
|
|
(else nil)))))
|
|
|
|
(define parse-all!
|
|
(fn ()
|
|
(skip-seps!)
|
|
(when (< pos src-len)
|
|
(cond
|
|
((= (cur) "#")
|
|
(begin (skip-to-eol!) (parse-all!)))
|
|
(else
|
|
(let ((words (list)))
|
|
(begin
|
|
(parse-words! words)
|
|
(when (> (len words) 0)
|
|
(append! commands {:type "command" :words words}))
|
|
(parse-all!))))))))
|
|
|
|
(parse-all!)
|
|
commands)))
|