Files
rose-ash/lib/go/lex.sx
giles fe614fc531
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 29s
go: lex.sx — hex/octal/binary integer literals + underscores, +14 tests [consumes-lex]
Adds prefixed integer forms per Go spec § Integer literals:
0x.. / 0X.. (hex), 0b.. / 0B.. (binary), 0o.. / 0O.. (octal),
legacy 0123 octal also accepted. Underscores allowed between digits
in any run; lexer is permissive (parser/types phase can enforce
strict placement).

Dispatch lives in gl-read-number! against the first 1-2 chars;
hex digit run consumes lex-hex-digit? from lib/guest/lex.sx. Octal
and binary use local gl-oct-digit?/gl-bin-digit? — narrow enough
that promoting them to the kit is premature.

lex 92/92.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 06:57:47 +00:00

401 lines
12 KiB
Plaintext

;; lib/go/lex.sx — Go tokenizer with automatic semicolon insertion.
;;
;; Consumes lib/guest/lex.sx character-class predicates.
;;
;; Tokens: {:type T :value V :pos P}
;; Types:
;; "ident" — identifiers (foo, _bar, mixedCase)
;; "keyword" — one of the 25 Go keywords
;; "int" — integer literals (decimal, 0x.. hex, 0b.. binary, 0o.. octal,
;; legacy 0123 octal; underscores between digits allowed)
;; "string" — interpreted string literals "..."
;; "rune" — rune literals 'x' (single char + simple escapes)
;; "op" — operators & punctuation; :value is the literal text
;; "semi" — explicit ';' or auto-inserted (Go spec § Semicolons)
;; "eof" — end-of-input sentinel
;;
;; ASI (Go spec § Semicolons): a newline (or EOF, or a block comment
;; containing a newline) emits a ";semi" if the previous emitted token's
;; type is ident/int/string/rune, or its value is one of
;; {break, continue, fallthrough, return, ++, --, ), ], }}.
;;
;; All scanner locals are gl- prefixed: SX host primitives (peek/emit/etc.)
;; silently shadow guest-language defines. See feedback_sx_bind_clash.
(define
go-keywords
(list
"break"
"case"
"chan"
"const"
"continue"
"default"
"defer"
"else"
"fallthrough"
"for"
"func"
"go"
"goto"
"if"
"import"
"interface"
"map"
"package"
"range"
"return"
"select"
"struct"
"switch"
"type"
"var"))
(define go-keyword? (fn (s) (some (fn (k) (= k s)) go-keywords)))
(define go-asi-keywords (list "break" "continue" "fallthrough" "return"))
(define go-asi-ops (list "++" "--" ")" "]" "}"))
(define
go-asi-trigger?
(fn
(tok)
(if
(= tok nil)
false
(let
((ty (get tok :type)) (v (get tok :value)))
(or
(= ty "ident")
(= ty "int")
(= ty "string")
(= ty "rune")
(and (= ty "keyword") (some (fn (k) (= k v)) go-asi-keywords))
(and (= ty "op") (some (fn (o) (= o v)) go-asi-ops)))))))
(define
go-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
gl-peek
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define gl-cur (fn () (gl-peek 0)))
(define gl-advance! (fn (n) (set! pos (+ pos n))))
(define
gl-last
(fn
()
(if
(= (len tokens) 0)
nil
(nth tokens (- (len tokens) 1)))))
(define gl-emit! (fn (type value start) (append! tokens {:type type :value value :pos start})))
(define
gl-maybe-asi!
(fn
(at)
(when (go-asi-trigger? (gl-last)) (gl-emit! "semi" "\n" at))))
(define
gl-oct-digit?
(fn (c) (and (not (= c nil)) (>= c "0") (<= c "7"))))
(define gl-bin-digit? (fn (c) (or (= c "0") (= c "1"))))
(define
gl-skip-line!
(fn
()
(when
(and (< pos src-len) (not (= (gl-cur) "\n")))
(gl-advance! 1)
(gl-skip-line!))))
(define
gl-skip-block!
(fn
(saw-nl)
(cond
(>= pos src-len)
saw-nl
(and (= (gl-cur) "*") (= (gl-peek 1) "/"))
(do (gl-advance! 2) saw-nl)
:else (let
((is-nl (= (gl-cur) "\n")))
(gl-advance! 1)
(gl-skip-block! (or saw-nl is-nl))))))
(define
gl-read-ident!
(fn
(start)
(when
(and (< pos src-len) (lex-ident-char? (gl-cur)))
(gl-advance! 1)
(gl-read-ident! start))
(slice src start pos)))
(define
gl-read-digit-run!
(fn
(digit?)
(when
(and (< pos src-len) (or (digit? (gl-cur)) (= (gl-cur) "_")))
(gl-advance! 1)
(gl-read-digit-run! digit?))))
(define
gl-read-number!
(fn
()
(cond
(and
(= (gl-cur) "0")
(or
(= (gl-peek 1) "x")
(= (gl-peek 1) "X")))
(do (gl-advance! 2) (gl-read-digit-run! lex-hex-digit?))
(and
(= (gl-cur) "0")
(or
(= (gl-peek 1) "b")
(= (gl-peek 1) "B")))
(do (gl-advance! 2) (gl-read-digit-run! gl-bin-digit?))
(and
(= (gl-cur) "0")
(or
(= (gl-peek 1) "o")
(= (gl-peek 1) "O")))
(do (gl-advance! 2) (gl-read-digit-run! gl-oct-digit?))
:else (gl-read-digit-run! lex-digit?))))
(define
gl-read-string!
(fn
()
(gl-advance! 1)
(let
((chars (list)))
(define
gl-string-loop
(fn
()
(cond
(>= pos src-len)
nil
(= (gl-cur) "\"")
(gl-advance! 1)
(= (gl-cur) "\\")
(do
(gl-advance! 1)
(when
(< pos src-len)
(let
((ch (gl-cur)))
(cond
(= ch "n")
(append! chars "\n")
(= ch "t")
(append! chars "\t")
(= ch "r")
(append! chars "\r")
(= ch "\\")
(append! chars "\\")
(= ch "\"")
(append! chars "\"")
(= ch "'")
(append! chars "'")
:else (append! chars ch))
(gl-advance! 1)))
(gl-string-loop))
:else (do
(append! chars (gl-cur))
(gl-advance! 1)
(gl-string-loop)))))
(gl-string-loop)
(join "" chars))))
(define
gl-read-rune!
(fn
()
(gl-advance! 1)
(let
((chars (list)))
(cond
(and (< pos src-len) (= (gl-cur) "\\"))
(do
(gl-advance! 1)
(when
(< pos src-len)
(let
((ch (gl-cur)))
(cond
(= ch "n")
(append! chars "\n")
(= ch "t")
(append! chars "\t")
(= ch "r")
(append! chars "\r")
(= ch "\\")
(append! chars "\\")
(= ch "'")
(append! chars "'")
(= ch "\"")
(append! chars "\"")
:else (append! chars ch))
(gl-advance! 1))))
(< pos src-len)
(do (append! chars (gl-cur)) (gl-advance! 1)))
(when
(and (< pos src-len) (= (gl-cur) "'"))
(gl-advance! 1))
(join "" chars))))
(define
gl-match-op
(fn
()
(let
((c0 (gl-cur))
(c1 (gl-peek 1))
(c2 (gl-peek 2)))
(cond
(and (= c0 "<") (= c1 "<") (= c2 "="))
"<<="
(and (= c0 ">") (= c1 ">") (= c2 "="))
">>="
(and (= c0 "&") (= c1 "^") (= c2 "="))
"&^="
(and (= c0 ".") (= c1 ".") (= c2 "."))
"..."
(and (= c0 "=") (= c1 "="))
"=="
(and (= c0 "!") (= c1 "="))
"!="
(and (= c0 "<") (= c1 "="))
"<="
(and (= c0 ">") (= c1 "="))
">="
(and (= c0 "&") (= c1 "&"))
"&&"
(and (= c0 "|") (= c1 "|"))
"||"
(and (= c0 "+") (= c1 "+"))
"++"
(and (= c0 "-") (= c1 "-"))
"--"
(and (= c0 "<") (= c1 "<"))
"<<"
(and (= c0 ">") (= c1 ">"))
">>"
(and (= c0 "+") (= c1 "="))
"+="
(and (= c0 "-") (= c1 "="))
"-="
(and (= c0 "*") (= c1 "="))
"*="
(and (= c0 "/") (= c1 "="))
"/="
(and (= c0 "%") (= c1 "="))
"%="
(and (= c0 "&") (= c1 "="))
"&="
(and (= c0 "|") (= c1 "="))
"|="
(and (= c0 "^") (= c1 "="))
"^="
(and (= c0 ":") (= c1 "="))
":="
(and (= c0 "<") (= c1 "-"))
"<-"
(and (= c0 "&") (= c1 "^"))
"&^"
(or
(= c0 "+")
(= c0 "-")
(= c0 "*")
(= c0 "/")
(= c0 "%")
(= c0 "&")
(= c0 "|")
(= c0 "^")
(= c0 "<")
(= c0 ">")
(= c0 "=")
(= c0 "!")
(= c0 "(")
(= c0 ")")
(= c0 "{")
(= c0 "}")
(= c0 "[")
(= c0 "]")
(= c0 ",")
(= c0 ".")
(= c0 ":"))
c0
:else nil))))
(define
gl-scan!
(fn
()
(cond
(>= pos src-len)
nil
(= (gl-cur) "\n")
(do (gl-maybe-asi! pos) (gl-advance! 1) (gl-scan!))
(lex-space? (gl-cur))
(do (gl-advance! 1) (gl-scan!))
(and (= (gl-cur) "/") (= (gl-peek 1) "/"))
(do (gl-advance! 2) (gl-skip-line!) (gl-scan!))
(and (= (gl-cur) "/") (= (gl-peek 1) "*"))
(do
(gl-advance! 2)
(let
((saw-nl (gl-skip-block! false)))
(when saw-nl (gl-maybe-asi! pos)))
(gl-scan!))
(= (gl-cur) ";")
(do
(gl-emit! "semi" ";" pos)
(gl-advance! 1)
(gl-scan!))
(lex-ident-start? (gl-cur))
(do
(let
((start pos))
(gl-read-ident! start)
(let
((word (slice src start pos)))
(gl-emit!
(if (go-keyword? word) "keyword" "ident")
word
start)))
(gl-scan!))
(lex-digit? (gl-cur))
(do
(let
((start pos))
(gl-read-number!)
(gl-emit! "int" (slice src start pos) start))
(gl-scan!))
(= (gl-cur) "\"")
(let
((start pos) (v (gl-read-string!)))
(gl-emit! "string" v start)
(gl-scan!))
(= (gl-cur) "'")
(let
((start pos) (v (gl-read-rune!)))
(gl-emit! "rune" v start)
(gl-scan!))
:else (let
((op (gl-match-op)))
(cond
op
(do
(gl-emit! "op" op pos)
(gl-advance! (len op))
(gl-scan!))
:else (do (gl-advance! 1) (gl-scan!)))))))
(gl-scan!)
(gl-maybe-asi! pos)
(gl-emit! "eof" nil pos)
tokens)))