Files
rose-ash/lib/go/tests/lex.sx
giles c1baca2e4e
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
go: lex.sx — operator-set audit + tilde; PHASE 1 COMPLETE + 6 tests [proposes-lex]
Adds the missing tilde operator '~' (Go 1.18+ generics type-set
constraint, e.g. 'interface { ~int | ~float64 }') to the longest-match
operator table. Adds an exhaustive 'op-audit:' test block covering
every Go operator/punctuation token by category — arithmetic +
assignment, bitwise + assignment, comparison + logical, decls /
arrows / variadic / inc-dec, punctuation, and tilde.

Phase 1 (tokenizer) is now complete. Two kit gaps surfaced and logged
in plans/go-on-sx.md Blockers for the substrate maintainer / next
statically-typed guest loop:

  * lib/guest/lex.sx lacks lex-oct-digit? / lex-bin-digit?
    (we rolled local gl-* equivalents for 0o.. and 0b.. literals).
  * lib/guest/lex.sx lacks a table-driven longest-prefix operator
    matcher; our gl-match-op is a 25-clause cond ladder. Rust/Swift/TS
    will each hit the same shape with 50+ ops apiece.

lex 129/129. Phase 2 (parser) next.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 07:28:50 +00:00

340 lines
13 KiB
Plaintext

;; Go tokenizer tests.
(define go-test-count 0)
(define go-test-pass 0)
(define go-test-fails (list))
(define gtok-type (fn (t) (get t :type)))
(define gtok-value (fn (t) (get t :value)))
(define tok-types (fn (src) (map gtok-type (go-tokenize src))))
(define tok-values (fn (src) (map gtok-value (go-tokenize src))))
(define
go-test
(fn
(name actual expected)
(set! go-test-count (+ go-test-count 1))
(if
(= actual expected)
(set! go-test-pass (+ go-test-pass 1))
(append! go-test-fails {:name name :expected expected :actual actual}))))
;; ── empty / whitespace ────────────────────────────────────────────
(go-test "empty source" (tok-types "") (list "eof"))
(go-test "spaces only" (tok-types " ") (list "eof"))
(go-test "tabs only" (tok-types "\t\t") (list "eof"))
(go-test
"newline only — no prior token, no ASI"
(tok-types "\n")
(list "eof"))
;; ── identifiers ───────────────────────────────────────────────────
(go-test "ident: simple" (tok-values "foo") (list "foo" "\n" nil))
(go-test
"ident: underscore prefix"
(tok-values "_bar")
(list "_bar" "\n" nil))
(go-test "ident: mixed case" (tok-values "fooBar") (list "fooBar" "\n" nil))
(go-test "ident: with digits" (tok-values "x123") (list "x123" "\n" nil))
(go-test "ident: type tag" (tok-types "foo") (list "ident" "semi" "eof"))
;; ── keywords (all 25) ─────────────────────────────────────────────
(go-test "kw: break" (tok-types "break") (list "keyword" "semi" "eof"))
(go-test "kw: case" (tok-types "case") (list "keyword" "eof"))
(go-test "kw: chan" (tok-types "chan") (list "keyword" "eof"))
(go-test "kw: const" (tok-types "const") (list "keyword" "eof"))
(go-test "kw: continue" (tok-types "continue") (list "keyword" "semi" "eof"))
(go-test "kw: default" (tok-types "default") (list "keyword" "eof"))
(go-test "kw: defer" (tok-types "defer") (list "keyword" "eof"))
(go-test "kw: else" (tok-types "else") (list "keyword" "eof"))
(go-test
"kw: fallthrough"
(tok-types "fallthrough")
(list "keyword" "semi" "eof"))
(go-test "kw: for" (tok-types "for") (list "keyword" "eof"))
(go-test "kw: func" (tok-types "func") (list "keyword" "eof"))
(go-test "kw: go" (tok-types "go") (list "keyword" "eof"))
(go-test "kw: goto" (tok-types "goto") (list "keyword" "eof"))
(go-test "kw: if" (tok-types "if") (list "keyword" "eof"))
(go-test "kw: import" (tok-types "import") (list "keyword" "eof"))
(go-test "kw: interface" (tok-types "interface") (list "keyword" "eof"))
(go-test "kw: map" (tok-types "map") (list "keyword" "eof"))
(go-test "kw: package" (tok-types "package") (list "keyword" "eof"))
(go-test "kw: range" (tok-types "range") (list "keyword" "eof"))
(go-test "kw: return" (tok-types "return") (list "keyword" "semi" "eof"))
(go-test "kw: select" (tok-types "select") (list "keyword" "eof"))
(go-test "kw: struct" (tok-types "struct") (list "keyword" "eof"))
(go-test "kw: switch" (tok-types "switch") (list "keyword" "eof"))
(go-test "kw: type" (tok-types "type") (list "keyword" "eof"))
(go-test "kw: var" (tok-types "var") (list "keyword" "eof"))
;; ── integer literals — decimal ────────────────────────────────────
(go-test "int: zero" (tok-values "0") (list "0" "\n" nil))
(go-test "int: small" (tok-values "42") (list "42" "\n" nil))
(go-test "int: bigger" (tok-values "123456") (list "123456" "\n" nil))
(go-test "int: type" (tok-types "42") (list "int" "semi" "eof"))
;; ── integer literals — prefixed + underscores ─────────────────────
(go-test "int: hex lower" (tok-values "0x1f") (list "0x1f" "\n" nil))
(go-test "int: hex upper-x" (tok-values "0X1F") (list "0X1F" "\n" nil))
(go-test
"int: hex mixed digits"
(tok-values "0xDEADbeef")
(list "0xDEADbeef" "\n" nil))
(go-test "int: binary lower" (tok-values "0b1010") (list "0b1010" "\n" nil))
(go-test "int: binary upper" (tok-values "0B1101") (list "0B1101" "\n" nil))
(go-test "int: octal modern" (tok-values "0o755") (list "0o755" "\n" nil))
(go-test "int: octal upper" (tok-values "0O17") (list "0O17" "\n" nil))
(go-test "int: octal legacy" (tok-values "0755") (list "0755" "\n" nil))
(go-test "int: hex type" (tok-types "0x1F") (list "int" "semi" "eof"))
(go-test "int: bin type" (tok-types "0b101") (list "int" "semi" "eof"))
(go-test
"int: dec underscore"
(tok-values "1_000_000")
(list "1_000_000" "\n" nil))
(go-test
"int: hex underscore"
(tok-values "0xDEAD_BEEF")
(list "0xDEAD_BEEF" "\n" nil))
(go-test
"int: bin underscore"
(tok-values "0b1010_1010")
(list "0b1010_1010" "\n" nil))
(go-test
"int: hex then +"
(tok-types "0xFF + 1")
(list "int" "op" "int" "semi" "eof"))
;; ── float literals (Go spec § Floating-point literals) ────────────
(go-test "float: simple" (tok-values "3.14") (list "3.14" "\n" nil))
(go-test "float: trailing dot" (tok-values "1.") (list "1." "\n" nil))
(go-test "float: leading dot" (tok-values ".5") (list ".5" "\n" nil))
(go-test "float: exp lower" (tok-values "1e10") (list "1e10" "\n" nil))
(go-test "float: exp upper" (tok-values "1E5") (list "1E5" "\n" nil))
(go-test "float: exp negative" (tok-values "1.5e-3") (list "1.5e-3" "\n" nil))
(go-test "float: exp positive" (tok-values "2.0e+2") (list "2.0e+2" "\n" nil))
(go-test "float: zero" (tok-values "0.0") (list "0.0" "\n" nil))
(go-test "float: dot-only-exp" (tok-values ".5e2") (list ".5e2" "\n" nil))
(go-test "float: underscore" (tok-values "1_000.5") (list "1_000.5" "\n" nil))
(go-test "float: type" (tok-types "3.14") (list "float" "semi" "eof"))
(go-test
"float: trailing dot type"
(tok-types "1.")
(list "float" "semi" "eof"))
(go-test
"float: exp-only type"
(tok-types "1e10")
(list "float" "semi" "eof"))
(go-test
"float: then +"
(tok-types "3.14 + 0.1")
(list "float" "op" "float" "semi" "eof"))
(go-test
"float: greedy 1.method"
(tok-types "1.method")
(list "float" "ident" "semi" "eof"))
;; ── imaginary literals (Go spec § Imaginary literals) ─────────────
(go-test "imag: int i" (tok-values "2i") (list "2i" "\n" nil))
(go-test "imag: float i" (tok-values "3.14i") (list "3.14i" "\n" nil))
(go-test "imag: exp i" (tok-values "1e2i") (list "1e2i" "\n" nil))
(go-test "imag: int-i type" (tok-types "2i") (list "imag" "semi" "eof"))
(go-test "imag: float-i type" (tok-types "3.14i") (list "imag" "semi" "eof"))
(go-test "imag: ASI at newline" (tok-types "1i\n") (list "imag" "semi" "eof"))
;; ── string literals ───────────────────────────────────────────────
(go-test "raw: simple" (tok-values "`hello`") (list "hello" "\n" nil))
(go-test "raw: empty" (tok-values "``") (list "" "\n" nil))
(go-test
"raw: backslash literal — no escape processing"
(tok-values "`a\\nb`")
(list "a\\nb" "\n" nil))
(go-test
"raw: multi-line"
(tok-values "`line1\nline2`")
(list "line1\nline2" "\n" nil))
(go-test
"raw: contains double-quote"
(tok-values "`say \"hi\"`")
(list "say \"hi\"" "\n" nil))
(go-test
"raw: CR stripped (Go spec § String literals)"
(tok-values "`a\r\nb`")
(list "a\nb" "\n" nil))
(go-test "raw: type" (tok-types "`x`") (list "string" "semi" "eof"))
;; ── rune literals ─────────────────────────────────────────────────
(go-test
"raw: then +"
(tok-types "`x` + 1")
(list "string" "op" "int" "semi" "eof"))
(go-test
"raw: ASI at newline after"
(tok-types "`abc`\n")
(list "string" "semi" "eof"))
(go-test "string: empty" (tok-values "\"\"") (list "" "\n" nil))
;; ── comments ──────────────────────────────────────────────────────
(go-test "string: hello" (tok-values "\"hello\"") (list "hello" "\n" nil))
(go-test
"string: with space"
(tok-values "\"hi there\"")
(list "hi there" "\n" nil))
(go-test "string: escape n" (tok-values "\"a\\nb\"") (list "a\nb" "\n" nil))
(go-test "string: escape quote" (tok-values "\"a\\\"b\"") (list "a\"b" "\n" nil))
(go-test
"string: escape backslash"
(tok-values "\"a\\\\b\"")
(list "a\\b" "\n" nil))
;; ── operators & punctuation ───────────────────────────────────────
(go-test "string: type" (tok-types "\"x\"") (list "string" "semi" "eof"))
(go-test "rune: simple" (tok-values "'a'") (list "a" "\n" nil))
(go-test "rune: escape" (tok-values "'\\n'") (list "\n" "\n" nil))
(go-test "rune: type" (tok-types "'a'") (list "rune" "semi" "eof"))
(go-test "line comment" (tok-types "// ignored") (list "eof"))
(go-test "line comment then code" (tok-values "// hi\nx") (list "x" "\n" nil))
(go-test "block comment" (tok-types "/* a b c */") (list "eof"))
(go-test
"block comment inline"
(tok-values "x /* mid */ y")
(list "x" "y" "\n" nil))
(go-test
"block comment with newline — ASI"
(tok-types "x /* multi\nline */ y")
(list "ident" "semi" "ident" "semi" "eof"))
;; ── automatic semicolon insertion (Go spec § Semicolons) ──────────
(go-test
"ops: arithmetic"
(tok-values "+ - * / %")
(list "+" "-" "*" "/" "%" nil))
(go-test
"ops: comparison"
(tok-values "== != < > <= >=")
(list "==" "!=" "<" ">" "<=" ">=" nil))
(go-test "ops: logical" (tok-values "&& || !") (list "&&" "||" "!" nil))
(go-test
"ops: assign forms"
(tok-values "= := += -=")
(list "=" ":=" "+=" "-=" nil))
(go-test "ops: channel arrow" (tok-values "<- chan") (list "<-" "chan" nil))
(go-test "ops: incdec ASI" (tok-types "++ --") (list "op" "op" "semi" "eof"))
(go-test "ops: ellipsis" (tok-values "...") (list "..." nil))
(go-test
"punct: all brackets"
(tok-values "( ) { } [ ]")
(list "(" ")" "{" "}" "[" "]" "\n" nil))
(go-test
"punct: comma colon dot"
(tok-values ", : .")
(list "," ":" "." nil))
(go-test
"op-audit: tilde (generics type-set)"
(tok-values "~int")
(list "~" "int" "\n" nil))
(go-test
"op-audit: all arithmetic + assignment"
(tok-values "+ - * / % += -= *= /= %=")
(list "+" "-" "*" "/" "%" "+=" "-=" "*=" "/=" "%=" nil))
(go-test
"op-audit: all bitwise + assignment"
(tok-values "& | ^ << >> &^ &= |= ^= <<= >>= &^=")
(list "&" "|" "^" "<<" ">>" "&^" "&=" "|=" "^=" "<<=" ">>=" "&^=" nil))
(go-test
"op-audit: all comparison + logical"
(tok-values "== != < > <= >= && || !")
(list "==" "!=" "<" ">" "<=" ">=" "&&" "||" "!" nil))
(go-test
"op-audit: assign / decls / arrows / variadic / inc-dec"
(tok-values "= := <- ++ -- ...")
(list "=" ":=" "<-" "++" "--" "..." nil))
;; ── short program ─────────────────────────────────────────────────
(go-test
"op-audit: punctuation"
(tok-values "( ) [ ] { } , . :")
(list "(" ")" "[" "]" "{" "}" "," "." ":" nil))
(go-test
"ASI: after ident at newline"
(tok-types "x\ny")
(list "ident" "semi" "ident" "semi" "eof"))
(go-test "ASI: after int" (tok-types "42\n") (list "int" "semi" "eof"))
;; ── report ────────────────────────────────────────────────────────
(go-test "ASI: after float" (tok-types "3.14\n") (list "float" "semi" "eof"))
(go-test
"ASI: after string"
(tok-types "\"hi\"\n")
(list "string" "semi" "eof"))
(go-test "ASI: after rune" (tok-types "'a'\n") (list "rune" "semi" "eof"))
(go-test
"ASI: after )"
(tok-types "f()\n")
(list "ident" "op" "op" "semi" "eof"))
(go-test
"ASI: after ]"
(tok-types "x[0]\n")
(list "ident" "op" "int" "op" "semi" "eof"))
(go-test "ASI: after }" (tok-types "{}\n") (list "op" "op" "semi" "eof"))
(go-test "ASI: after ++" (tok-types "i++\n") (list "ident" "op" "semi" "eof"))
(go-test
"ASI: NOT after +"
(tok-types "x +\ny")
(list "ident" "op" "ident" "semi" "eof"))
(go-test
"ASI: NOT after ("
(tok-types "f(\nx)")
(list "ident" "op" "ident" "op" "semi" "eof"))
(go-test
"ASI: blank lines collapse — single semi only"
(tok-types "x\n\n\ny")
(list "ident" "semi" "ident" "semi" "eof"))
(go-test
"ASI: at EOF after ident"
(tok-types "x")
(list "ident" "semi" "eof"))
(go-test
"ASI: explicit semi"
(tok-types "x;y")
(list "ident" "semi" "ident" "semi" "eof"))
(go-test
"short-decl: x := 42 (types)"
(tok-types "x := 42")
(list "ident" "op" "int" "semi" "eof"))
(go-test
"short-decl: x := 42 (values)"
(tok-values "x := 42")
(list "x" ":=" "42" "\n" nil))
(go-test
"func decl shape"
(tok-types "func foo() int { return 0 }")
(list
"keyword"
"ident"
"op"
"op"
"ident"
"op"
"keyword"
"int"
"op"
"semi"
"eof"))
(define go-lex-test-summary (str "lex " go-test-pass "/" go-test-count))