;; lib/go/lex.sx — Go tokenizer with automatic semicolon insertion. ;; ;; Consumes lib/guest/lex.sx character-class predicates. ;; ;; Tokens: {:type T :value V :pos P} ;; Types: ;; "ident" — identifiers (foo, _bar, mixedCase) ;; "keyword" — one of the 25 Go keywords ;; "int" — integer literals (decimal, 0x.. hex, 0b.. binary, 0o.. octal, ;; legacy 0123 octal; underscores between digits allowed) ;; "float" — decimal float literals (3.14, .5, 1., 1e10, 1.5e-3, 1E5) ;; "imag" — imaginary literals (2i, 3.14i, 1e2i) ;; "string" — interpreted string literals "..." OR raw string literals `...` ;; "rune" — rune literals 'x' (single char + simple escapes) ;; "op" — operators & punctuation; :value is the literal text ;; "semi" — explicit ';' or auto-inserted (Go spec § Semicolons) ;; "eof" — end-of-input sentinel ;; ;; ASI (Go spec § Semicolons): a newline (or EOF, or a block comment ;; containing a newline) emits a ";semi" if the previous emitted token's ;; type is ident/int/float/imag/string/rune, or its value is one of ;; {break, continue, fallthrough, return, ++, --, ), ], }}. ;; ;; All scanner locals are gl- prefixed: SX host primitives (peek/emit/etc.) ;; silently shadow guest-language defines. See feedback_sx_bind_clash. (define go-keywords (list "break" "case" "chan" "const" "continue" "default" "defer" "else" "fallthrough" "for" "func" "go" "goto" "if" "import" "interface" "map" "package" "range" "return" "select" "struct" "switch" "type" "var")) (define go-keyword? (fn (s) (some (fn (k) (= k s)) go-keywords))) (define go-asi-keywords (list "break" "continue" "fallthrough" "return")) (define go-asi-ops (list "++" "--" ")" "]" "}")) (define go-asi-lit-types (list "ident" "int" "float" "imag" "string" "rune")) (define go-asi-trigger? (fn (tok) (if (= tok nil) false (let ((ty (get tok :type)) (v (get tok :value))) (or (some (fn (lt) (= lt ty)) go-asi-lit-types) (and (= ty "keyword") (some (fn (k) (= k v)) go-asi-keywords)) (and (= ty "op") (some (fn (o) (= o v)) go-asi-ops))))))) (define go-tokenize (fn (src) (let ((tokens (list)) (pos 0) (src-len (len src))) (define gl-peek (fn (offset) (if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil))) (define gl-cur (fn () (gl-peek 0))) (define gl-advance! (fn (n) (set! pos (+ pos n)))) (define gl-last (fn () (if (= (len tokens) 0) nil (nth tokens (- (len tokens) 1))))) (define gl-emit! (fn (type value start) (append! tokens {:type type :value value :pos start}))) (define gl-maybe-asi! (fn (at) (when (go-asi-trigger? (gl-last)) (gl-emit! "semi" "\n" at)))) (define gl-oct-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "7")))) (define gl-bin-digit? (fn (c) (or (= c "0") (= c "1")))) (define gl-skip-line! (fn () (when (and (< pos src-len) (not (= (gl-cur) "\n"))) (gl-advance! 1) (gl-skip-line!)))) (define gl-skip-block! (fn (saw-nl) (cond (>= pos src-len) saw-nl (and (= (gl-cur) "*") (= (gl-peek 1) "/")) (do (gl-advance! 2) saw-nl) :else (let ((is-nl (= (gl-cur) "\n"))) (gl-advance! 1) (gl-skip-block! (or saw-nl is-nl)))))) (define gl-read-ident! (fn (start) (when (and (< pos src-len) (lex-ident-char? (gl-cur))) (gl-advance! 1) (gl-read-ident! start)) (slice src start pos))) (define gl-read-digit-run! (fn (digit?) (when (and (< pos src-len) (or (digit? (gl-cur)) (= (gl-cur) "_"))) (gl-advance! 1) (gl-read-digit-run! digit?)))) (define gl-finish-number! (fn (has-fraction?) (let ((typ (if has-fraction? "float" "int"))) (when (or (= (gl-cur) "e") (= (gl-cur) "E")) (gl-advance! 1) (when (or (= (gl-cur) "+") (= (gl-cur) "-")) (gl-advance! 1)) (gl-read-digit-run! lex-digit?) (set! typ "float")) (cond (= (gl-cur) "i") (do (gl-advance! 1) "imag") :else typ)))) (define gl-read-number! (fn () (cond (and (= (gl-cur) ".") (lex-digit? (gl-peek 1))) (do (gl-advance! 1) (gl-read-digit-run! lex-digit?) (gl-finish-number! true)) (and (= (gl-cur) "0") (or (= (gl-peek 1) "x") (= (gl-peek 1) "X"))) (do (gl-advance! 2) (gl-read-digit-run! lex-hex-digit?) "int") (and (= (gl-cur) "0") (or (= (gl-peek 1) "b") (= (gl-peek 1) "B"))) (do (gl-advance! 2) (gl-read-digit-run! gl-bin-digit?) "int") (and (= (gl-cur) "0") (or (= (gl-peek 1) "o") (= (gl-peek 1) "O"))) (do (gl-advance! 2) (gl-read-digit-run! gl-oct-digit?) "int") :else (do (gl-read-digit-run! lex-digit?) (cond (and (= (gl-cur) ".") (not (= (gl-peek 1) "."))) (do (gl-advance! 1) (gl-read-digit-run! lex-digit?) (gl-finish-number! true)) :else (gl-finish-number! false)))))) (define gl-read-string! (fn () (gl-advance! 1) (let ((chars (list))) (define gl-string-loop (fn () (cond (>= pos src-len) nil (= (gl-cur) "\"") (gl-advance! 1) (= (gl-cur) "\\") (do (gl-advance! 1) (when (< pos src-len) (let ((ch (gl-cur))) (cond (= ch "n") (append! chars "\n") (= ch "t") (append! chars "\t") (= ch "r") (append! chars "\r") (= ch "\\") (append! chars "\\") (= ch "\"") (append! chars "\"") (= ch "'") (append! chars "'") :else (append! chars ch)) (gl-advance! 1))) (gl-string-loop)) :else (do (append! chars (gl-cur)) (gl-advance! 1) (gl-string-loop))))) (gl-string-loop) (join "" chars)))) (define gl-read-raw-string! (fn () (gl-advance! 1) (let ((chars (list))) (define gl-raw-loop (fn () (cond (>= pos src-len) nil (= (gl-cur) "`") (gl-advance! 1) (= (gl-cur) "\r") (do (gl-advance! 1) (gl-raw-loop)) :else (do (append! chars (gl-cur)) (gl-advance! 1) (gl-raw-loop))))) (gl-raw-loop) (join "" chars)))) (define gl-read-rune! (fn () (gl-advance! 1) (let ((chars (list))) (cond (and (< pos src-len) (= (gl-cur) "\\")) (do (gl-advance! 1) (when (< pos src-len) (let ((ch (gl-cur))) (cond (= ch "n") (append! chars "\n") (= ch "t") (append! chars "\t") (= ch "r") (append! chars "\r") (= ch "\\") (append! chars "\\") (= ch "'") (append! chars "'") (= ch "\"") (append! chars "\"") :else (append! chars ch)) (gl-advance! 1)))) (< pos src-len) (do (append! chars (gl-cur)) (gl-advance! 1))) (when (and (< pos src-len) (= (gl-cur) "'")) (gl-advance! 1)) (join "" chars)))) (define gl-match-op (fn () (let ((c0 (gl-cur)) (c1 (gl-peek 1)) (c2 (gl-peek 2))) (cond (and (= c0 "<") (= c1 "<") (= c2 "=")) "<<=" (and (= c0 ">") (= c1 ">") (= c2 "=")) ">>=" (and (= c0 "&") (= c1 "^") (= c2 "=")) "&^=" (and (= c0 ".") (= c1 ".") (= c2 ".")) "..." (and (= c0 "=") (= c1 "=")) "==" (and (= c0 "!") (= c1 "=")) "!=" (and (= c0 "<") (= c1 "=")) "<=" (and (= c0 ">") (= c1 "=")) ">=" (and (= c0 "&") (= c1 "&")) "&&" (and (= c0 "|") (= c1 "|")) "||" (and (= c0 "+") (= c1 "+")) "++" (and (= c0 "-") (= c1 "-")) "--" (and (= c0 "<") (= c1 "<")) "<<" (and (= c0 ">") (= c1 ">")) ">>" (and (= c0 "+") (= c1 "=")) "+=" (and (= c0 "-") (= c1 "=")) "-=" (and (= c0 "*") (= c1 "=")) "*=" (and (= c0 "/") (= c1 "=")) "/=" (and (= c0 "%") (= c1 "=")) "%=" (and (= c0 "&") (= c1 "=")) "&=" (and (= c0 "|") (= c1 "=")) "|=" (and (= c0 "^") (= c1 "=")) "^=" (and (= c0 ":") (= c1 "=")) ":=" (and (= c0 "<") (= c1 "-")) "<-" (and (= c0 "&") (= c1 "^")) "&^" (or (= c0 "+") (= c0 "-") (= c0 "*") (= c0 "/") (= c0 "%") (= c0 "&") (= c0 "|") (= c0 "^") (= c0 "<") (= c0 ">") (= c0 "=") (= c0 "!") (= c0 "(") (= c0 ")") (= c0 "{") (= c0 "}") (= c0 "[") (= c0 "]") (= c0 ",") (= c0 ".") (= c0 ":")) c0 :else nil)))) (define gl-scan! (fn () (cond (>= pos src-len) nil (= (gl-cur) "\n") (do (gl-maybe-asi! pos) (gl-advance! 1) (gl-scan!)) (lex-space? (gl-cur)) (do (gl-advance! 1) (gl-scan!)) (and (= (gl-cur) "/") (= (gl-peek 1) "/")) (do (gl-advance! 2) (gl-skip-line!) (gl-scan!)) (and (= (gl-cur) "/") (= (gl-peek 1) "*")) (do (gl-advance! 2) (let ((saw-nl (gl-skip-block! false))) (when saw-nl (gl-maybe-asi! pos))) (gl-scan!)) (= (gl-cur) ";") (do (gl-emit! "semi" ";" pos) (gl-advance! 1) (gl-scan!)) (lex-ident-start? (gl-cur)) (do (let ((start pos)) (gl-read-ident! start) (let ((word (slice src start pos))) (gl-emit! (if (go-keyword? word) "keyword" "ident") word start))) (gl-scan!)) (lex-digit? (gl-cur)) (do (let ((start pos) (typ (gl-read-number!))) (gl-emit! typ (slice src start pos) start)) (gl-scan!)) (and (= (gl-cur) ".") (lex-digit? (gl-peek 1))) (do (let ((start pos) (typ (gl-read-number!))) (gl-emit! typ (slice src start pos) start)) (gl-scan!)) (= (gl-cur) "\"") (let ((start pos) (v (gl-read-string!))) (gl-emit! "string" v start) (gl-scan!)) (= (gl-cur) "`") (let ((start pos) (v (gl-read-raw-string!))) (gl-emit! "string" v start) (gl-scan!)) (= (gl-cur) "'") (let ((start pos) (v (gl-read-rune!))) (gl-emit! "rune" v start) (gl-scan!)) :else (let ((op (gl-match-op))) (cond op (do (gl-emit! "op" op pos) (gl-advance! (len op)) (gl-scan!)) :else (do (gl-advance! 1) (gl-scan!))))))) (gl-scan!) (gl-maybe-asi! pos) (gl-emit! "eof" nil pos) tokens)))