Files
rose-ash/lib/go/lex.sx
giles 65467c232b
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 22s
go: lex.sx — raw string literals (backtick) + 9 tests [nothing]
Adds Go raw string literals per Go spec § String literals:
backtick-delimited, no escape processing, may span multiple
lines, '\r' chars discarded from the value.

gl-read-raw-string! mirrors gl-read-string! but skips escape
handling and the \r filter. scan! routes the leading backtick
to it; emits "string" type (same as interpreted strings — no
need to distinguish at parse/type time).

lex 123/123.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 07:22:01 +00:00

476 lines
14 KiB
Plaintext

;; lib/go/lex.sx — Go tokenizer with automatic semicolon insertion.
;;
;; Consumes lib/guest/lex.sx character-class predicates.
;;
;; Tokens: {:type T :value V :pos P}
;; Types:
;; "ident" — identifiers (foo, _bar, mixedCase)
;; "keyword" — one of the 25 Go keywords
;; "int" — integer literals (decimal, 0x.. hex, 0b.. binary, 0o.. octal,
;; legacy 0123 octal; underscores between digits allowed)
;; "float" — decimal float literals (3.14, .5, 1., 1e10, 1.5e-3, 1E5)
;; "imag" — imaginary literals (2i, 3.14i, 1e2i)
;; "string" — interpreted string literals "..." OR raw string literals `...`
;; "rune" — rune literals 'x' (single char + simple escapes)
;; "op" — operators & punctuation; :value is the literal text
;; "semi" — explicit ';' or auto-inserted (Go spec § Semicolons)
;; "eof" — end-of-input sentinel
;;
;; ASI (Go spec § Semicolons): a newline (or EOF, or a block comment
;; containing a newline) emits a ";semi" if the previous emitted token's
;; type is ident/int/float/imag/string/rune, or its value is one of
;; {break, continue, fallthrough, return, ++, --, ), ], }}.
;;
;; All scanner locals are gl- prefixed: SX host primitives (peek/emit/etc.)
;; silently shadow guest-language defines. See feedback_sx_bind_clash.
(define
go-keywords
(list
"break"
"case"
"chan"
"const"
"continue"
"default"
"defer"
"else"
"fallthrough"
"for"
"func"
"go"
"goto"
"if"
"import"
"interface"
"map"
"package"
"range"
"return"
"select"
"struct"
"switch"
"type"
"var"))
(define go-keyword? (fn (s) (some (fn (k) (= k s)) go-keywords)))
(define go-asi-keywords (list "break" "continue" "fallthrough" "return"))
(define go-asi-ops (list "++" "--" ")" "]" "}"))
(define go-asi-lit-types (list "ident" "int" "float" "imag" "string" "rune"))
(define
go-asi-trigger?
(fn
(tok)
(if
(= tok nil)
false
(let
((ty (get tok :type)) (v (get tok :value)))
(or
(some (fn (lt) (= lt ty)) go-asi-lit-types)
(and (= ty "keyword") (some (fn (k) (= k v)) go-asi-keywords))
(and (= ty "op") (some (fn (o) (= o v)) go-asi-ops)))))))
(define
go-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
gl-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define gl-cur (fn () (gl-peek 0)))
(define gl-advance! (fn (n) (set! pos (+ pos n))))
(define
gl-last
(fn
()
(if
(= (len tokens) 0)
nil
(nth tokens (- (len tokens) 1)))))
(define gl-emit! (fn (type value start) (append! tokens {:type type :value value :pos start})))
(define
gl-maybe-asi!
(fn
(at)
(when (go-asi-trigger? (gl-last)) (gl-emit! "semi" "\n" at))))
(define
gl-oct-digit?
(fn (c) (and (not (= c nil)) (>= c "0") (<= c "7"))))
(define gl-bin-digit? (fn (c) (or (= c "0") (= c "1"))))
(define
gl-skip-line!
(fn
()
(when
(and (< pos src-len) (not (= (gl-cur) "\n")))
(gl-advance! 1)
(gl-skip-line!))))
(define
gl-skip-block!
(fn
(saw-nl)
(cond
(>= pos src-len)
saw-nl
(and (= (gl-cur) "*") (= (gl-peek 1) "/"))
(do (gl-advance! 2) saw-nl)
:else (let
((is-nl (= (gl-cur) "\n")))
(gl-advance! 1)
(gl-skip-block! (or saw-nl is-nl))))))
(define
gl-read-ident!
(fn
(start)
(when
(and (< pos src-len) (lex-ident-char? (gl-cur)))
(gl-advance! 1)
(gl-read-ident! start))
(slice src start pos)))
(define
gl-read-digit-run!
(fn
(digit?)
(when
(and (< pos src-len) (or (digit? (gl-cur)) (= (gl-cur) "_")))
(gl-advance! 1)
(gl-read-digit-run! digit?))))
(define
gl-finish-number!
(fn
(has-fraction?)
(let
((typ (if has-fraction? "float" "int")))
(when
(or (= (gl-cur) "e") (= (gl-cur) "E"))
(gl-advance! 1)
(when
(or (= (gl-cur) "+") (= (gl-cur) "-"))
(gl-advance! 1))
(gl-read-digit-run! lex-digit?)
(set! typ "float"))
(cond
(= (gl-cur) "i")
(do (gl-advance! 1) "imag")
:else typ))))
(define
gl-read-number!
(fn
()
(cond
(and (= (gl-cur) ".") (lex-digit? (gl-peek 1)))
(do
(gl-advance! 1)
(gl-read-digit-run! lex-digit?)
(gl-finish-number! true))
(and
(= (gl-cur) "0")
(or
(= (gl-peek 1) "x")
(= (gl-peek 1) "X")))
(do
(gl-advance! 2)
(gl-read-digit-run! lex-hex-digit?)
"int")
(and
(= (gl-cur) "0")
(or
(= (gl-peek 1) "b")
(= (gl-peek 1) "B")))
(do
(gl-advance! 2)
(gl-read-digit-run! gl-bin-digit?)
"int")
(and
(= (gl-cur) "0")
(or
(= (gl-peek 1) "o")
(= (gl-peek 1) "O")))
(do
(gl-advance! 2)
(gl-read-digit-run! gl-oct-digit?)
"int")
:else (do
(gl-read-digit-run! lex-digit?)
(cond
(and (= (gl-cur) ".") (not (= (gl-peek 1) ".")))
(do
(gl-advance! 1)
(gl-read-digit-run! lex-digit?)
(gl-finish-number! true))
:else (gl-finish-number! false))))))
(define
gl-read-string!
(fn
()
(gl-advance! 1)
(let
((chars (list)))
(define
gl-string-loop
(fn
()
(cond
(>= pos src-len)
nil
(= (gl-cur) "\"")
(gl-advance! 1)
(= (gl-cur) "\\")
(do
(gl-advance! 1)
(when
(< pos src-len)
(let
((ch (gl-cur)))
(cond
(= ch "n")
(append! chars "\n")
(= ch "t")
(append! chars "\t")
(= ch "r")
(append! chars "\r")
(= ch "\\")
(append! chars "\\")
(= ch "\"")
(append! chars "\"")
(= ch "'")
(append! chars "'")
:else (append! chars ch))
(gl-advance! 1)))
(gl-string-loop))
:else (do
(append! chars (gl-cur))
(gl-advance! 1)
(gl-string-loop)))))
(gl-string-loop)
(join "" chars))))
(define
gl-read-raw-string!
(fn
()
(gl-advance! 1)
(let
((chars (list)))
(define
gl-raw-loop
(fn
()
(cond
(>= pos src-len)
nil
(= (gl-cur) "`")
(gl-advance! 1)
(= (gl-cur) "\r")
(do (gl-advance! 1) (gl-raw-loop))
:else (do
(append! chars (gl-cur))
(gl-advance! 1)
(gl-raw-loop)))))
(gl-raw-loop)
(join "" chars))))
(define
gl-read-rune!
(fn
()
(gl-advance! 1)
(let
((chars (list)))
(cond
(and (< pos src-len) (= (gl-cur) "\\"))
(do
(gl-advance! 1)
(when
(< pos src-len)
(let
((ch (gl-cur)))
(cond
(= ch "n")
(append! chars "\n")
(= ch "t")
(append! chars "\t")
(= ch "r")
(append! chars "\r")
(= ch "\\")
(append! chars "\\")
(= ch "'")
(append! chars "'")
(= ch "\"")
(append! chars "\"")
:else (append! chars ch))
(gl-advance! 1))))
(< pos src-len)
(do (append! chars (gl-cur)) (gl-advance! 1)))
(when
(and (< pos src-len) (= (gl-cur) "'"))
(gl-advance! 1))
(join "" chars))))
(define
gl-match-op
(fn
()
(let
((c0 (gl-cur))
(c1 (gl-peek 1))
(c2 (gl-peek 2)))
(cond
(and (= c0 "<") (= c1 "<") (= c2 "="))
"<<="
(and (= c0 ">") (= c1 ">") (= c2 "="))
">>="
(and (= c0 "&") (= c1 "^") (= c2 "="))
"&^="
(and (= c0 ".") (= c1 ".") (= c2 "."))
"..."
(and (= c0 "=") (= c1 "="))
"=="
(and (= c0 "!") (= c1 "="))
"!="
(and (= c0 "<") (= c1 "="))
"<="
(and (= c0 ">") (= c1 "="))
">="
(and (= c0 "&") (= c1 "&"))
"&&"
(and (= c0 "|") (= c1 "|"))
"||"
(and (= c0 "+") (= c1 "+"))
"++"
(and (= c0 "-") (= c1 "-"))
"--"
(and (= c0 "<") (= c1 "<"))
"<<"
(and (= c0 ">") (= c1 ">"))
">>"
(and (= c0 "+") (= c1 "="))
"+="
(and (= c0 "-") (= c1 "="))
"-="
(and (= c0 "*") (= c1 "="))
"*="
(and (= c0 "/") (= c1 "="))
"/="
(and (= c0 "%") (= c1 "="))
"%="
(and (= c0 "&") (= c1 "="))
"&="
(and (= c0 "|") (= c1 "="))
"|="
(and (= c0 "^") (= c1 "="))
"^="
(and (= c0 ":") (= c1 "="))
":="
(and (= c0 "<") (= c1 "-"))
"<-"
(and (= c0 "&") (= c1 "^"))
"&^"
(or
(= c0 "+")
(= c0 "-")
(= c0 "*")
(= c0 "/")
(= c0 "%")
(= c0 "&")
(= c0 "|")
(= c0 "^")
(= c0 "<")
(= c0 ">")
(= c0 "=")
(= c0 "!")
(= c0 "(")
(= c0 ")")
(= c0 "{")
(= c0 "}")
(= c0 "[")
(= c0 "]")
(= c0 ",")
(= c0 ".")
(= c0 ":"))
c0
:else nil))))
(define
gl-scan!
(fn
()
(cond
(>= pos src-len)
nil
(= (gl-cur) "\n")
(do (gl-maybe-asi! pos) (gl-advance! 1) (gl-scan!))
(lex-space? (gl-cur))
(do (gl-advance! 1) (gl-scan!))
(and (= (gl-cur) "/") (= (gl-peek 1) "/"))
(do (gl-advance! 2) (gl-skip-line!) (gl-scan!))
(and (= (gl-cur) "/") (= (gl-peek 1) "*"))
(do
(gl-advance! 2)
(let
((saw-nl (gl-skip-block! false)))
(when saw-nl (gl-maybe-asi! pos)))
(gl-scan!))
(= (gl-cur) ";")
(do
(gl-emit! "semi" ";" pos)
(gl-advance! 1)
(gl-scan!))
(lex-ident-start? (gl-cur))
(do
(let
((start pos))
(gl-read-ident! start)
(let
((word (slice src start pos)))
(gl-emit!
(if (go-keyword? word) "keyword" "ident")
word
start)))
(gl-scan!))
(lex-digit? (gl-cur))
(do
(let
((start pos) (typ (gl-read-number!)))
(gl-emit! typ (slice src start pos) start))
(gl-scan!))
(and (= (gl-cur) ".") (lex-digit? (gl-peek 1)))
(do
(let
((start pos) (typ (gl-read-number!)))
(gl-emit! typ (slice src start pos) start))
(gl-scan!))
(= (gl-cur) "\"")
(let
((start pos) (v (gl-read-string!)))
(gl-emit! "string" v start)
(gl-scan!))
(= (gl-cur) "`")
(let
((start pos) (v (gl-read-raw-string!)))
(gl-emit! "string" v start)
(gl-scan!))
(= (gl-cur) "'")
(let
((start pos) (v (gl-read-rune!)))
(gl-emit! "rune" v start)
(gl-scan!))
:else (let
((op (gl-match-op)))
(cond
op
(do
(gl-emit! "op" op pos)
(gl-advance! (len op))
(gl-scan!))
:else (do (gl-advance! 1) (gl-scan!)))))))
(gl-scan!)
(gl-maybe-asi! pos)
(gl-emit! "eof" nil pos)
tokens)))