Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 22s
Adds Go raw string literals per Go spec § String literals: backtick-delimited, no escape processing, may span multiple lines, '\r' chars discarded from the value. gl-read-raw-string! mirrors gl-read-string! but skips escape handling and the \r filter. scan! routes the leading backtick to it; emits "string" type (same as interpreted strings — no need to distinguish at parse/type time). lex 123/123. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
476 lines
14 KiB
Plaintext
476 lines
14 KiB
Plaintext
;; lib/go/lex.sx — Go tokenizer with automatic semicolon insertion.
|
|
;;
|
|
;; Consumes lib/guest/lex.sx character-class predicates.
|
|
;;
|
|
;; Tokens: {:type T :value V :pos P}
|
|
;; Types:
|
|
;; "ident" — identifiers (foo, _bar, mixedCase)
|
|
;; "keyword" — one of the 25 Go keywords
|
|
;; "int" — integer literals (decimal, 0x.. hex, 0b.. binary, 0o.. octal,
|
|
;; legacy 0123 octal; underscores between digits allowed)
|
|
;; "float" — decimal float literals (3.14, .5, 1., 1e10, 1.5e-3, 1E5)
|
|
;; "imag" — imaginary literals (2i, 3.14i, 1e2i)
|
|
;; "string" — interpreted string literals "..." OR raw string literals `...`
|
|
;; "rune" — rune literals 'x' (single char + simple escapes)
|
|
;; "op" — operators & punctuation; :value is the literal text
|
|
;; "semi" — explicit ';' or auto-inserted (Go spec § Semicolons)
|
|
;; "eof" — end-of-input sentinel
|
|
;;
|
|
;; ASI (Go spec § Semicolons): a newline (or EOF, or a block comment
|
|
;; containing a newline) emits a ";semi" if the previous emitted token's
|
|
;; type is ident/int/float/imag/string/rune, or its value is one of
|
|
;; {break, continue, fallthrough, return, ++, --, ), ], }}.
|
|
;;
|
|
;; All scanner locals are gl- prefixed: SX host primitives (peek/emit/etc.)
|
|
;; silently shadow guest-language defines. See feedback_sx_bind_clash.
|
|
|
|
(define
|
|
go-keywords
|
|
(list
|
|
"break"
|
|
"case"
|
|
"chan"
|
|
"const"
|
|
"continue"
|
|
"default"
|
|
"defer"
|
|
"else"
|
|
"fallthrough"
|
|
"for"
|
|
"func"
|
|
"go"
|
|
"goto"
|
|
"if"
|
|
"import"
|
|
"interface"
|
|
"map"
|
|
"package"
|
|
"range"
|
|
"return"
|
|
"select"
|
|
"struct"
|
|
"switch"
|
|
"type"
|
|
"var"))
|
|
|
|
(define go-keyword? (fn (s) (some (fn (k) (= k s)) go-keywords)))
|
|
|
|
(define go-asi-keywords (list "break" "continue" "fallthrough" "return"))
|
|
|
|
(define go-asi-ops (list "++" "--" ")" "]" "}"))
|
|
|
|
(define go-asi-lit-types (list "ident" "int" "float" "imag" "string" "rune"))
|
|
|
|
(define
|
|
go-asi-trigger?
|
|
(fn
|
|
(tok)
|
|
(if
|
|
(= tok nil)
|
|
false
|
|
(let
|
|
((ty (get tok :type)) (v (get tok :value)))
|
|
(or
|
|
(some (fn (lt) (= lt ty)) go-asi-lit-types)
|
|
(and (= ty "keyword") (some (fn (k) (= k v)) go-asi-keywords))
|
|
(and (= ty "op") (some (fn (o) (= o v)) go-asi-ops)))))))
|
|
|
|
(define
|
|
go-tokenize
|
|
(fn
|
|
(src)
|
|
(let
|
|
((tokens (list)) (pos 0) (src-len (len src)))
|
|
(define
|
|
gl-peek
|
|
(fn
|
|
(offset)
|
|
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
|
(define gl-cur (fn () (gl-peek 0)))
|
|
(define gl-advance! (fn (n) (set! pos (+ pos n))))
|
|
(define
|
|
gl-last
|
|
(fn
|
|
()
|
|
(if
|
|
(= (len tokens) 0)
|
|
nil
|
|
(nth tokens (- (len tokens) 1)))))
|
|
(define gl-emit! (fn (type value start) (append! tokens {:type type :value value :pos start})))
|
|
(define
|
|
gl-maybe-asi!
|
|
(fn
|
|
(at)
|
|
(when (go-asi-trigger? (gl-last)) (gl-emit! "semi" "\n" at))))
|
|
(define
|
|
gl-oct-digit?
|
|
(fn (c) (and (not (= c nil)) (>= c "0") (<= c "7"))))
|
|
(define gl-bin-digit? (fn (c) (or (= c "0") (= c "1"))))
|
|
(define
|
|
gl-skip-line!
|
|
(fn
|
|
()
|
|
(when
|
|
(and (< pos src-len) (not (= (gl-cur) "\n")))
|
|
(gl-advance! 1)
|
|
(gl-skip-line!))))
|
|
(define
|
|
gl-skip-block!
|
|
(fn
|
|
(saw-nl)
|
|
(cond
|
|
(>= pos src-len)
|
|
saw-nl
|
|
(and (= (gl-cur) "*") (= (gl-peek 1) "/"))
|
|
(do (gl-advance! 2) saw-nl)
|
|
:else (let
|
|
((is-nl (= (gl-cur) "\n")))
|
|
(gl-advance! 1)
|
|
(gl-skip-block! (or saw-nl is-nl))))))
|
|
(define
|
|
gl-read-ident!
|
|
(fn
|
|
(start)
|
|
(when
|
|
(and (< pos src-len) (lex-ident-char? (gl-cur)))
|
|
(gl-advance! 1)
|
|
(gl-read-ident! start))
|
|
(slice src start pos)))
|
|
(define
|
|
gl-read-digit-run!
|
|
(fn
|
|
(digit?)
|
|
(when
|
|
(and (< pos src-len) (or (digit? (gl-cur)) (= (gl-cur) "_")))
|
|
(gl-advance! 1)
|
|
(gl-read-digit-run! digit?))))
|
|
(define
|
|
gl-finish-number!
|
|
(fn
|
|
(has-fraction?)
|
|
(let
|
|
((typ (if has-fraction? "float" "int")))
|
|
(when
|
|
(or (= (gl-cur) "e") (= (gl-cur) "E"))
|
|
(gl-advance! 1)
|
|
(when
|
|
(or (= (gl-cur) "+") (= (gl-cur) "-"))
|
|
(gl-advance! 1))
|
|
(gl-read-digit-run! lex-digit?)
|
|
(set! typ "float"))
|
|
(cond
|
|
(= (gl-cur) "i")
|
|
(do (gl-advance! 1) "imag")
|
|
:else typ))))
|
|
(define
|
|
gl-read-number!
|
|
(fn
|
|
()
|
|
(cond
|
|
(and (= (gl-cur) ".") (lex-digit? (gl-peek 1)))
|
|
(do
|
|
(gl-advance! 1)
|
|
(gl-read-digit-run! lex-digit?)
|
|
(gl-finish-number! true))
|
|
(and
|
|
(= (gl-cur) "0")
|
|
(or
|
|
(= (gl-peek 1) "x")
|
|
(= (gl-peek 1) "X")))
|
|
(do
|
|
(gl-advance! 2)
|
|
(gl-read-digit-run! lex-hex-digit?)
|
|
"int")
|
|
(and
|
|
(= (gl-cur) "0")
|
|
(or
|
|
(= (gl-peek 1) "b")
|
|
(= (gl-peek 1) "B")))
|
|
(do
|
|
(gl-advance! 2)
|
|
(gl-read-digit-run! gl-bin-digit?)
|
|
"int")
|
|
(and
|
|
(= (gl-cur) "0")
|
|
(or
|
|
(= (gl-peek 1) "o")
|
|
(= (gl-peek 1) "O")))
|
|
(do
|
|
(gl-advance! 2)
|
|
(gl-read-digit-run! gl-oct-digit?)
|
|
"int")
|
|
:else (do
|
|
(gl-read-digit-run! lex-digit?)
|
|
(cond
|
|
(and (= (gl-cur) ".") (not (= (gl-peek 1) ".")))
|
|
(do
|
|
(gl-advance! 1)
|
|
(gl-read-digit-run! lex-digit?)
|
|
(gl-finish-number! true))
|
|
:else (gl-finish-number! false))))))
|
|
(define
|
|
gl-read-string!
|
|
(fn
|
|
()
|
|
(gl-advance! 1)
|
|
(let
|
|
((chars (list)))
|
|
(define
|
|
gl-string-loop
|
|
(fn
|
|
()
|
|
(cond
|
|
(>= pos src-len)
|
|
nil
|
|
(= (gl-cur) "\"")
|
|
(gl-advance! 1)
|
|
(= (gl-cur) "\\")
|
|
(do
|
|
(gl-advance! 1)
|
|
(when
|
|
(< pos src-len)
|
|
(let
|
|
((ch (gl-cur)))
|
|
(cond
|
|
(= ch "n")
|
|
(append! chars "\n")
|
|
(= ch "t")
|
|
(append! chars "\t")
|
|
(= ch "r")
|
|
(append! chars "\r")
|
|
(= ch "\\")
|
|
(append! chars "\\")
|
|
(= ch "\"")
|
|
(append! chars "\"")
|
|
(= ch "'")
|
|
(append! chars "'")
|
|
:else (append! chars ch))
|
|
(gl-advance! 1)))
|
|
(gl-string-loop))
|
|
:else (do
|
|
(append! chars (gl-cur))
|
|
(gl-advance! 1)
|
|
(gl-string-loop)))))
|
|
(gl-string-loop)
|
|
(join "" chars))))
|
|
(define
|
|
gl-read-raw-string!
|
|
(fn
|
|
()
|
|
(gl-advance! 1)
|
|
(let
|
|
((chars (list)))
|
|
(define
|
|
gl-raw-loop
|
|
(fn
|
|
()
|
|
(cond
|
|
(>= pos src-len)
|
|
nil
|
|
(= (gl-cur) "`")
|
|
(gl-advance! 1)
|
|
(= (gl-cur) "\r")
|
|
(do (gl-advance! 1) (gl-raw-loop))
|
|
:else (do
|
|
(append! chars (gl-cur))
|
|
(gl-advance! 1)
|
|
(gl-raw-loop)))))
|
|
(gl-raw-loop)
|
|
(join "" chars))))
|
|
(define
|
|
gl-read-rune!
|
|
(fn
|
|
()
|
|
(gl-advance! 1)
|
|
(let
|
|
((chars (list)))
|
|
(cond
|
|
(and (< pos src-len) (= (gl-cur) "\\"))
|
|
(do
|
|
(gl-advance! 1)
|
|
(when
|
|
(< pos src-len)
|
|
(let
|
|
((ch (gl-cur)))
|
|
(cond
|
|
(= ch "n")
|
|
(append! chars "\n")
|
|
(= ch "t")
|
|
(append! chars "\t")
|
|
(= ch "r")
|
|
(append! chars "\r")
|
|
(= ch "\\")
|
|
(append! chars "\\")
|
|
(= ch "'")
|
|
(append! chars "'")
|
|
(= ch "\"")
|
|
(append! chars "\"")
|
|
:else (append! chars ch))
|
|
(gl-advance! 1))))
|
|
(< pos src-len)
|
|
(do (append! chars (gl-cur)) (gl-advance! 1)))
|
|
(when
|
|
(and (< pos src-len) (= (gl-cur) "'"))
|
|
(gl-advance! 1))
|
|
(join "" chars))))
|
|
(define
|
|
gl-match-op
|
|
(fn
|
|
()
|
|
(let
|
|
((c0 (gl-cur))
|
|
(c1 (gl-peek 1))
|
|
(c2 (gl-peek 2)))
|
|
(cond
|
|
(and (= c0 "<") (= c1 "<") (= c2 "="))
|
|
"<<="
|
|
(and (= c0 ">") (= c1 ">") (= c2 "="))
|
|
">>="
|
|
(and (= c0 "&") (= c1 "^") (= c2 "="))
|
|
"&^="
|
|
(and (= c0 ".") (= c1 ".") (= c2 "."))
|
|
"..."
|
|
(and (= c0 "=") (= c1 "="))
|
|
"=="
|
|
(and (= c0 "!") (= c1 "="))
|
|
"!="
|
|
(and (= c0 "<") (= c1 "="))
|
|
"<="
|
|
(and (= c0 ">") (= c1 "="))
|
|
">="
|
|
(and (= c0 "&") (= c1 "&"))
|
|
"&&"
|
|
(and (= c0 "|") (= c1 "|"))
|
|
"||"
|
|
(and (= c0 "+") (= c1 "+"))
|
|
"++"
|
|
(and (= c0 "-") (= c1 "-"))
|
|
"--"
|
|
(and (= c0 "<") (= c1 "<"))
|
|
"<<"
|
|
(and (= c0 ">") (= c1 ">"))
|
|
">>"
|
|
(and (= c0 "+") (= c1 "="))
|
|
"+="
|
|
(and (= c0 "-") (= c1 "="))
|
|
"-="
|
|
(and (= c0 "*") (= c1 "="))
|
|
"*="
|
|
(and (= c0 "/") (= c1 "="))
|
|
"/="
|
|
(and (= c0 "%") (= c1 "="))
|
|
"%="
|
|
(and (= c0 "&") (= c1 "="))
|
|
"&="
|
|
(and (= c0 "|") (= c1 "="))
|
|
"|="
|
|
(and (= c0 "^") (= c1 "="))
|
|
"^="
|
|
(and (= c0 ":") (= c1 "="))
|
|
":="
|
|
(and (= c0 "<") (= c1 "-"))
|
|
"<-"
|
|
(and (= c0 "&") (= c1 "^"))
|
|
"&^"
|
|
(or
|
|
(= c0 "+")
|
|
(= c0 "-")
|
|
(= c0 "*")
|
|
(= c0 "/")
|
|
(= c0 "%")
|
|
(= c0 "&")
|
|
(= c0 "|")
|
|
(= c0 "^")
|
|
(= c0 "<")
|
|
(= c0 ">")
|
|
(= c0 "=")
|
|
(= c0 "!")
|
|
(= c0 "(")
|
|
(= c0 ")")
|
|
(= c0 "{")
|
|
(= c0 "}")
|
|
(= c0 "[")
|
|
(= c0 "]")
|
|
(= c0 ",")
|
|
(= c0 ".")
|
|
(= c0 ":"))
|
|
c0
|
|
:else nil))))
|
|
(define
|
|
gl-scan!
|
|
(fn
|
|
()
|
|
(cond
|
|
(>= pos src-len)
|
|
nil
|
|
(= (gl-cur) "\n")
|
|
(do (gl-maybe-asi! pos) (gl-advance! 1) (gl-scan!))
|
|
(lex-space? (gl-cur))
|
|
(do (gl-advance! 1) (gl-scan!))
|
|
(and (= (gl-cur) "/") (= (gl-peek 1) "/"))
|
|
(do (gl-advance! 2) (gl-skip-line!) (gl-scan!))
|
|
(and (= (gl-cur) "/") (= (gl-peek 1) "*"))
|
|
(do
|
|
(gl-advance! 2)
|
|
(let
|
|
((saw-nl (gl-skip-block! false)))
|
|
(when saw-nl (gl-maybe-asi! pos)))
|
|
(gl-scan!))
|
|
(= (gl-cur) ";")
|
|
(do
|
|
(gl-emit! "semi" ";" pos)
|
|
(gl-advance! 1)
|
|
(gl-scan!))
|
|
(lex-ident-start? (gl-cur))
|
|
(do
|
|
(let
|
|
((start pos))
|
|
(gl-read-ident! start)
|
|
(let
|
|
((word (slice src start pos)))
|
|
(gl-emit!
|
|
(if (go-keyword? word) "keyword" "ident")
|
|
word
|
|
start)))
|
|
(gl-scan!))
|
|
(lex-digit? (gl-cur))
|
|
(do
|
|
(let
|
|
((start pos) (typ (gl-read-number!)))
|
|
(gl-emit! typ (slice src start pos) start))
|
|
(gl-scan!))
|
|
(and (= (gl-cur) ".") (lex-digit? (gl-peek 1)))
|
|
(do
|
|
(let
|
|
((start pos) (typ (gl-read-number!)))
|
|
(gl-emit! typ (slice src start pos) start))
|
|
(gl-scan!))
|
|
(= (gl-cur) "\"")
|
|
(let
|
|
((start pos) (v (gl-read-string!)))
|
|
(gl-emit! "string" v start)
|
|
(gl-scan!))
|
|
(= (gl-cur) "`")
|
|
(let
|
|
((start pos) (v (gl-read-raw-string!)))
|
|
(gl-emit! "string" v start)
|
|
(gl-scan!))
|
|
(= (gl-cur) "'")
|
|
(let
|
|
((start pos) (v (gl-read-rune!)))
|
|
(gl-emit! "rune" v start)
|
|
(gl-scan!))
|
|
:else (let
|
|
((op (gl-match-op)))
|
|
(cond
|
|
op
|
|
(do
|
|
(gl-emit! "op" op pos)
|
|
(gl-advance! (len op))
|
|
(gl-scan!))
|
|
:else (do (gl-advance! 1) (gl-scan!)))))))
|
|
(gl-scan!)
|
|
(gl-maybe-asi! pos)
|
|
(gl-emit! "eof" nil pos)
|
|
tokens)))
|