go: lex.sx — keywords, ident/int/string/rune lits, comments, ops, ASI + 78 tests [consumes-lex]
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 23s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 23s
First Go-on-SX iteration. Tokenizer consumes lib/guest/lex.sx character-class
predicates. Automatic semicolon insertion per Go spec § Semicolons fires on
newline, EOF, and block comments containing a newline, after
ident/int/string/rune/{break,continue,fallthrough,return}/{++,--,),],}}.
Scoreboard + conformance.sh wired; lex 78/78. Plan Phase 1 sub-items
checked; floats/raw-strings/hex-ints still ⬜.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
371
lib/go/lex.sx
Normal file
371
lib/go/lex.sx
Normal file
@@ -0,0 +1,371 @@
|
||||
;; lib/go/lex.sx — Go tokenizer with automatic semicolon insertion.
|
||||
;;
|
||||
;; Consumes lib/guest/lex.sx character-class predicates.
|
||||
;;
|
||||
;; Tokens: {:type T :value V :pos P}
|
||||
;; Types:
|
||||
;; "ident" — identifiers (foo, _bar, mixedCase)
|
||||
;; "keyword" — one of the 25 Go keywords
|
||||
;; "int" — integer literals (decimal only this iteration)
|
||||
;; "string" — interpreted string literals "..."
|
||||
;; "rune" — rune literals 'x' (single char + simple escapes)
|
||||
;; "op" — operators & punctuation; :value is the literal text
|
||||
;; "semi" — explicit ';' or auto-inserted (Go spec § Semicolons)
|
||||
;; "eof" — end-of-input sentinel
|
||||
;;
|
||||
;; ASI (Go spec § Semicolons): a newline (or EOF, or a block comment
|
||||
;; containing a newline) emits a ";semi" if the previous emitted token's
|
||||
;; type is ident/int/string/rune, or its value is one of
|
||||
;; {break, continue, fallthrough, return, ++, --, ), ], }}.
|
||||
;;
|
||||
;; All scanner locals are gl- prefixed: SX host primitives (peek/emit/etc.)
|
||||
;; silently shadow guest-language defines. See feedback_sx_bind_clash.
|
||||
|
||||
(define
|
||||
go-keywords
|
||||
(list
|
||||
"break"
|
||||
"case"
|
||||
"chan"
|
||||
"const"
|
||||
"continue"
|
||||
"default"
|
||||
"defer"
|
||||
"else"
|
||||
"fallthrough"
|
||||
"for"
|
||||
"func"
|
||||
"go"
|
||||
"goto"
|
||||
"if"
|
||||
"import"
|
||||
"interface"
|
||||
"map"
|
||||
"package"
|
||||
"range"
|
||||
"return"
|
||||
"select"
|
||||
"struct"
|
||||
"switch"
|
||||
"type"
|
||||
"var"))
|
||||
|
||||
(define go-keyword? (fn (s) (some (fn (k) (= k s)) go-keywords)))
|
||||
|
||||
(define go-asi-keywords (list "break" "continue" "fallthrough" "return"))
|
||||
|
||||
(define go-asi-ops (list "++" "--" ")" "]" "}"))
|
||||
|
||||
(define
|
||||
go-asi-trigger?
|
||||
(fn
|
||||
(tok)
|
||||
(if
|
||||
(= tok nil)
|
||||
false
|
||||
(let
|
||||
((ty (get tok :type)) (v (get tok :value)))
|
||||
(or
|
||||
(= ty "ident")
|
||||
(= ty "int")
|
||||
(= ty "string")
|
||||
(= ty "rune")
|
||||
(and (= ty "keyword") (some (fn (k) (= k v)) go-asi-keywords))
|
||||
(and (= ty "op") (some (fn (o) (= o v)) go-asi-ops)))))))
|
||||
|
||||
(define
|
||||
go-tokenize
|
||||
(fn
|
||||
(src)
|
||||
(let
|
||||
((tokens (list)) (pos 0) (src-len (len src)))
|
||||
(define
|
||||
gl-peek
|
||||
(fn
|
||||
(offset)
|
||||
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
|
||||
(define gl-cur (fn () (gl-peek 0)))
|
||||
(define gl-advance! (fn (n) (set! pos (+ pos n))))
|
||||
(define
|
||||
gl-last
|
||||
(fn
|
||||
()
|
||||
(if
|
||||
(= (len tokens) 0)
|
||||
nil
|
||||
(nth tokens (- (len tokens) 1)))))
|
||||
(define gl-emit! (fn (type value start) (append! tokens {:type type :value value :pos start})))
|
||||
(define
|
||||
gl-maybe-asi!
|
||||
(fn
|
||||
(at)
|
||||
(when (go-asi-trigger? (gl-last)) (gl-emit! "semi" "\n" at))))
|
||||
(define
|
||||
gl-skip-line!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (not (= (gl-cur) "\n")))
|
||||
(gl-advance! 1)
|
||||
(gl-skip-line!))))
|
||||
(define
|
||||
gl-skip-block!
|
||||
(fn
|
||||
(saw-nl)
|
||||
(cond
|
||||
(>= pos src-len)
|
||||
saw-nl
|
||||
(and (= (gl-cur) "*") (= (gl-peek 1) "/"))
|
||||
(do (gl-advance! 2) saw-nl)
|
||||
:else (let
|
||||
((is-nl (= (gl-cur) "\n")))
|
||||
(gl-advance! 1)
|
||||
(gl-skip-block! (or saw-nl is-nl))))))
|
||||
(define
|
||||
gl-read-ident!
|
||||
(fn
|
||||
(start)
|
||||
(when
|
||||
(and (< pos src-len) (lex-ident-char? (gl-cur)))
|
||||
(gl-advance! 1)
|
||||
(gl-read-ident! start))
|
||||
(slice src start pos)))
|
||||
(define
|
||||
gl-read-digits!
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and (< pos src-len) (lex-digit? (gl-cur)))
|
||||
(gl-advance! 1)
|
||||
(gl-read-digits!))))
|
||||
(define
|
||||
gl-read-string!
|
||||
(fn
|
||||
()
|
||||
(gl-advance! 1)
|
||||
(let
|
||||
((chars (list)))
|
||||
(define
|
||||
gl-string-loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
(>= pos src-len)
|
||||
nil
|
||||
(= (gl-cur) "\"")
|
||||
(gl-advance! 1)
|
||||
(= (gl-cur) "\\")
|
||||
(do
|
||||
(gl-advance! 1)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (gl-cur)))
|
||||
(cond
|
||||
(= ch "n")
|
||||
(append! chars "\n")
|
||||
(= ch "t")
|
||||
(append! chars "\t")
|
||||
(= ch "r")
|
||||
(append! chars "\r")
|
||||
(= ch "\\")
|
||||
(append! chars "\\")
|
||||
(= ch "\"")
|
||||
(append! chars "\"")
|
||||
(= ch "'")
|
||||
(append! chars "'")
|
||||
:else (append! chars ch))
|
||||
(gl-advance! 1)))
|
||||
(gl-string-loop))
|
||||
:else (do
|
||||
(append! chars (gl-cur))
|
||||
(gl-advance! 1)
|
||||
(gl-string-loop)))))
|
||||
(gl-string-loop)
|
||||
(join "" chars))))
|
||||
(define
|
||||
gl-read-rune!
|
||||
(fn
|
||||
()
|
||||
(gl-advance! 1)
|
||||
(let
|
||||
((chars (list)))
|
||||
(cond
|
||||
(and (< pos src-len) (= (gl-cur) "\\"))
|
||||
(do
|
||||
(gl-advance! 1)
|
||||
(when
|
||||
(< pos src-len)
|
||||
(let
|
||||
((ch (gl-cur)))
|
||||
(cond
|
||||
(= ch "n")
|
||||
(append! chars "\n")
|
||||
(= ch "t")
|
||||
(append! chars "\t")
|
||||
(= ch "r")
|
||||
(append! chars "\r")
|
||||
(= ch "\\")
|
||||
(append! chars "\\")
|
||||
(= ch "'")
|
||||
(append! chars "'")
|
||||
(= ch "\"")
|
||||
(append! chars "\"")
|
||||
:else (append! chars ch))
|
||||
(gl-advance! 1))))
|
||||
(< pos src-len)
|
||||
(do (append! chars (gl-cur)) (gl-advance! 1)))
|
||||
(when
|
||||
(and (< pos src-len) (= (gl-cur) "'"))
|
||||
(gl-advance! 1))
|
||||
(join "" chars))))
|
||||
(define
|
||||
gl-match-op
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((c0 (gl-cur))
|
||||
(c1 (gl-peek 1))
|
||||
(c2 (gl-peek 2)))
|
||||
(cond
|
||||
(and (= c0 "<") (= c1 "<") (= c2 "="))
|
||||
"<<="
|
||||
(and (= c0 ">") (= c1 ">") (= c2 "="))
|
||||
">>="
|
||||
(and (= c0 "&") (= c1 "^") (= c2 "="))
|
||||
"&^="
|
||||
(and (= c0 ".") (= c1 ".") (= c2 "."))
|
||||
"..."
|
||||
(and (= c0 "=") (= c1 "="))
|
||||
"=="
|
||||
(and (= c0 "!") (= c1 "="))
|
||||
"!="
|
||||
(and (= c0 "<") (= c1 "="))
|
||||
"<="
|
||||
(and (= c0 ">") (= c1 "="))
|
||||
">="
|
||||
(and (= c0 "&") (= c1 "&"))
|
||||
"&&"
|
||||
(and (= c0 "|") (= c1 "|"))
|
||||
"||"
|
||||
(and (= c0 "+") (= c1 "+"))
|
||||
"++"
|
||||
(and (= c0 "-") (= c1 "-"))
|
||||
"--"
|
||||
(and (= c0 "<") (= c1 "<"))
|
||||
"<<"
|
||||
(and (= c0 ">") (= c1 ">"))
|
||||
">>"
|
||||
(and (= c0 "+") (= c1 "="))
|
||||
"+="
|
||||
(and (= c0 "-") (= c1 "="))
|
||||
"-="
|
||||
(and (= c0 "*") (= c1 "="))
|
||||
"*="
|
||||
(and (= c0 "/") (= c1 "="))
|
||||
"/="
|
||||
(and (= c0 "%") (= c1 "="))
|
||||
"%="
|
||||
(and (= c0 "&") (= c1 "="))
|
||||
"&="
|
||||
(and (= c0 "|") (= c1 "="))
|
||||
"|="
|
||||
(and (= c0 "^") (= c1 "="))
|
||||
"^="
|
||||
(and (= c0 ":") (= c1 "="))
|
||||
":="
|
||||
(and (= c0 "<") (= c1 "-"))
|
||||
"<-"
|
||||
(and (= c0 "&") (= c1 "^"))
|
||||
"&^"
|
||||
(or
|
||||
(= c0 "+")
|
||||
(= c0 "-")
|
||||
(= c0 "*")
|
||||
(= c0 "/")
|
||||
(= c0 "%")
|
||||
(= c0 "&")
|
||||
(= c0 "|")
|
||||
(= c0 "^")
|
||||
(= c0 "<")
|
||||
(= c0 ">")
|
||||
(= c0 "=")
|
||||
(= c0 "!")
|
||||
(= c0 "(")
|
||||
(= c0 ")")
|
||||
(= c0 "{")
|
||||
(= c0 "}")
|
||||
(= c0 "[")
|
||||
(= c0 "]")
|
||||
(= c0 ",")
|
||||
(= c0 ".")
|
||||
(= c0 ":"))
|
||||
c0
|
||||
:else nil))))
|
||||
(define
|
||||
gl-scan!
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
(>= pos src-len)
|
||||
nil
|
||||
(= (gl-cur) "\n")
|
||||
(do (gl-maybe-asi! pos) (gl-advance! 1) (gl-scan!))
|
||||
(lex-space? (gl-cur))
|
||||
(do (gl-advance! 1) (gl-scan!))
|
||||
(and (= (gl-cur) "/") (= (gl-peek 1) "/"))
|
||||
(do (gl-advance! 2) (gl-skip-line!) (gl-scan!))
|
||||
(and (= (gl-cur) "/") (= (gl-peek 1) "*"))
|
||||
(do
|
||||
(gl-advance! 2)
|
||||
(let
|
||||
((saw-nl (gl-skip-block! false)))
|
||||
(when saw-nl (gl-maybe-asi! pos)))
|
||||
(gl-scan!))
|
||||
(= (gl-cur) ";")
|
||||
(do
|
||||
(gl-emit! "semi" ";" pos)
|
||||
(gl-advance! 1)
|
||||
(gl-scan!))
|
||||
(lex-ident-start? (gl-cur))
|
||||
(do
|
||||
(let
|
||||
((start pos))
|
||||
(gl-read-ident! start)
|
||||
(let
|
||||
((word (slice src start pos)))
|
||||
(gl-emit!
|
||||
(if (go-keyword? word) "keyword" "ident")
|
||||
word
|
||||
start)))
|
||||
(gl-scan!))
|
||||
(lex-digit? (gl-cur))
|
||||
(do
|
||||
(let
|
||||
((start pos))
|
||||
(gl-read-digits!)
|
||||
(gl-emit! "int" (slice src start pos) start))
|
||||
(gl-scan!))
|
||||
(= (gl-cur) "\"")
|
||||
(let
|
||||
((start pos) (v (gl-read-string!)))
|
||||
(gl-emit! "string" v start)
|
||||
(gl-scan!))
|
||||
(= (gl-cur) "'")
|
||||
(let
|
||||
((start pos) (v (gl-read-rune!)))
|
||||
(gl-emit! "rune" v start)
|
||||
(gl-scan!))
|
||||
:else (let
|
||||
((op (gl-match-op)))
|
||||
(cond
|
||||
op
|
||||
(do
|
||||
(gl-emit! "op" op pos)
|
||||
(gl-advance! (len op))
|
||||
(gl-scan!))
|
||||
:else (do (gl-advance! 1) (gl-scan!)))))))
|
||||
(gl-scan!)
|
||||
(gl-maybe-asi! pos)
|
||||
(gl-emit! "eof" nil pos)
|
||||
tokens)))
|
||||
Reference in New Issue
Block a user