go: lex.sx — operator-set audit + tilde; PHASE 1 COMPLETE + 6 tests [proposes-lex]
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 25s
Adds the missing tilde operator '~' (Go 1.18+ generics type-set
constraint, e.g. 'interface { ~int | ~float64 }') to the longest-match
operator table. Adds an exhaustive 'op-audit:' test block covering
every Go operator/punctuation token by category — arithmetic +
assignment, bitwise + assignment, comparison + logical, decls /
arrows / variadic / inc-dec, punctuation, and tilde.
Phase 1 (tokenizer) is now complete. Two kit gaps surfaced and logged
in plans/go-on-sx.md Blockers for the substrate maintainer / next
statically-typed guest loop:
* lib/guest/lex.sx lacks lex-oct-digit? / lex-bin-digit?
(we rolled local gl-* equivalents for 0o.. and 0b.. literals).
* lib/guest/lex.sx lacks a table-driven longest-prefix operator
matcher; our gl-match-op is a 25-clause cond ladder. Rust/Swift/TS
will each hit the same shape with 50+ ops apiece.
lex 129/129. Phase 2 (parser) next.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -393,7 +393,8 @@
|
|||||||
(= c0 "]")
|
(= c0 "]")
|
||||||
(= c0 ",")
|
(= c0 ",")
|
||||||
(= c0 ".")
|
(= c0 ".")
|
||||||
(= c0 ":"))
|
(= c0 ":")
|
||||||
|
(= c0 "~"))
|
||||||
c0
|
c0
|
||||||
:else nil))))
|
:else nil))))
|
||||||
(define
|
(define
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
{
|
{
|
||||||
"language": "go",
|
"language": "go",
|
||||||
"total_pass": 123,
|
"total_pass": 129,
|
||||||
"total": 123,
|
"total": 129,
|
||||||
"suites": [
|
"suites": [
|
||||||
{"name":"lex","pass":123,"total":123,"status":"ok"},
|
{"name":"lex","pass":129,"total":129,"status":"ok"},
|
||||||
{"name":"parse","pass":0,"total":0,"status":"pending"},
|
{"name":"parse","pass":0,"total":0,"status":"pending"},
|
||||||
{"name":"types","pass":0,"total":0,"status":"pending"},
|
{"name":"types","pass":0,"total":0,"status":"pending"},
|
||||||
{"name":"eval","pass":0,"total":0,"status":"pending"},
|
{"name":"eval","pass":0,"total":0,"status":"pending"},
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
# Go-on-SX Scoreboard
|
# Go-on-SX Scoreboard
|
||||||
|
|
||||||
**Total: 123 / 123 tests passing**
|
**Total: 129 / 129 tests passing**
|
||||||
|
|
||||||
| | Suite | Pass | Total |
|
| | Suite | Pass | Total |
|
||||||
|---|---|---|---|
|
|---|---|---|---|
|
||||||
| ✅ | lex | 123 | 123 |
|
| ✅ | lex | 129 | 129 |
|
||||||
| ⬜ | parse | 0 | 0 |
|
| ⬜ | parse | 0 | 0 |
|
||||||
| ⬜ | types | 0 | 0 |
|
| ⬜ | types | 0 | 0 |
|
||||||
| ⬜ | eval | 0 | 0 |
|
| ⬜ | eval | 0 | 0 |
|
||||||
|
|||||||
@@ -229,30 +229,60 @@
|
|||||||
"punct: comma colon dot"
|
"punct: comma colon dot"
|
||||||
(tok-values ", : .")
|
(tok-values ", : .")
|
||||||
(list "," ":" "." nil))
|
(list "," ":" "." nil))
|
||||||
|
(go-test
|
||||||
|
"op-audit: tilde (generics type-set)"
|
||||||
|
(tok-values "~int")
|
||||||
|
(list "~" "int" "\n" nil))
|
||||||
|
(go-test
|
||||||
|
"op-audit: all arithmetic + assignment"
|
||||||
|
(tok-values "+ - * / % += -= *= /= %=")
|
||||||
|
(list "+" "-" "*" "/" "%" "+=" "-=" "*=" "/=" "%=" nil))
|
||||||
|
(go-test
|
||||||
|
"op-audit: all bitwise + assignment"
|
||||||
|
(tok-values "& | ^ << >> &^ &= |= ^= <<= >>= &^=")
|
||||||
|
(list "&" "|" "^" "<<" ">>" "&^" "&=" "|=" "^=" "<<=" ">>=" "&^=" nil))
|
||||||
|
(go-test
|
||||||
|
"op-audit: all comparison + logical"
|
||||||
|
(tok-values "== != < > <= >= && || !")
|
||||||
|
(list "==" "!=" "<" ">" "<=" ">=" "&&" "||" "!" nil))
|
||||||
|
(go-test
|
||||||
|
"op-audit: assign / decls / arrows / variadic / inc-dec"
|
||||||
|
(tok-values "= := <- ++ -- ...")
|
||||||
|
(list "=" ":=" "<-" "++" "--" "..." nil))
|
||||||
|
|
||||||
|
;; ── short program ─────────────────────────────────────────────────
|
||||||
|
(go-test
|
||||||
|
"op-audit: punctuation"
|
||||||
|
(tok-values "( ) [ ] { } , . :")
|
||||||
|
(list "(" ")" "[" "]" "{" "}" "," "." ":" nil))
|
||||||
(go-test
|
(go-test
|
||||||
"ASI: after ident at newline"
|
"ASI: after ident at newline"
|
||||||
(tok-types "x\ny")
|
(tok-types "x\ny")
|
||||||
(list "ident" "semi" "ident" "semi" "eof"))
|
(list "ident" "semi" "ident" "semi" "eof"))
|
||||||
(go-test "ASI: after int" (tok-types "42\n") (list "int" "semi" "eof"))
|
(go-test "ASI: after int" (tok-types "42\n") (list "int" "semi" "eof"))
|
||||||
|
|
||||||
|
;; ── report ────────────────────────────────────────────────────────
|
||||||
(go-test "ASI: after float" (tok-types "3.14\n") (list "float" "semi" "eof"))
|
(go-test "ASI: after float" (tok-types "3.14\n") (list "float" "semi" "eof"))
|
||||||
|
|
||||||
(go-test
|
(go-test
|
||||||
"ASI: after string"
|
"ASI: after string"
|
||||||
(tok-types "\"hi\"\n")
|
(tok-types "\"hi\"\n")
|
||||||
(list "string" "semi" "eof"))
|
(list "string" "semi" "eof"))
|
||||||
|
|
||||||
(go-test "ASI: after rune" (tok-types "'a'\n") (list "rune" "semi" "eof"))
|
(go-test "ASI: after rune" (tok-types "'a'\n") (list "rune" "semi" "eof"))
|
||||||
|
|
||||||
;; ── short program ─────────────────────────────────────────────────
|
|
||||||
(go-test
|
(go-test
|
||||||
"ASI: after )"
|
"ASI: after )"
|
||||||
(tok-types "f()\n")
|
(tok-types "f()\n")
|
||||||
(list "ident" "op" "op" "semi" "eof"))
|
(list "ident" "op" "op" "semi" "eof"))
|
||||||
|
|
||||||
(go-test
|
(go-test
|
||||||
"ASI: after ]"
|
"ASI: after ]"
|
||||||
(tok-types "x[0]\n")
|
(tok-types "x[0]\n")
|
||||||
(list "ident" "op" "int" "op" "semi" "eof"))
|
(list "ident" "op" "int" "op" "semi" "eof"))
|
||||||
|
|
||||||
(go-test "ASI: after }" (tok-types "{}\n") (list "op" "op" "semi" "eof"))
|
(go-test "ASI: after }" (tok-types "{}\n") (list "op" "op" "semi" "eof"))
|
||||||
|
|
||||||
;; ── report ────────────────────────────────────────────────────────
|
|
||||||
(go-test "ASI: after ++" (tok-types "i++\n") (list "ident" "op" "semi" "eof"))
|
(go-test "ASI: after ++" (tok-types "i++\n") (list "ident" "op" "semi" "eof"))
|
||||||
|
|
||||||
(go-test
|
(go-test
|
||||||
|
|||||||
@@ -130,7 +130,7 @@ Suites planned:
|
|||||||
Loop-style. Each phase: implement → test → commit → tick `[ ]` → append
|
Loop-style. Each phase: implement → test → commit → tick `[ ]` → append
|
||||||
Progress-log line → push `origin/loops/go`.
|
Progress-log line → push `origin/loops/go`.
|
||||||
|
|
||||||
### Phase 1 — Tokenizer (`lib/go/lex.sx`) ⬜
|
### Phase 1 — Tokenizer (`lib/go/lex.sx`) ✅
|
||||||
- [x] Scaffold + scoreboard + conformance runner (consumes lib/guest/lex.sx)
|
- [x] Scaffold + scoreboard + conformance runner (consumes lib/guest/lex.sx)
|
||||||
- [x] Identifiers + 25 keywords
|
- [x] Identifiers + 25 keywords
|
||||||
- [x] Decimal integer literals
|
- [x] Decimal integer literals
|
||||||
@@ -148,8 +148,10 @@ Progress-log line → push `origin/loops/go`.
|
|||||||
as interpreted strings)
|
as interpreted strings)
|
||||||
- [x] Hex/octal/binary integer literals (0x… 0o… 0b…) + underscores
|
- [x] Hex/octal/binary integer literals (0x… 0o… 0b…) + underscores
|
||||||
(legacy 0123 octal also accepted; consumes lex-hex-digit?)
|
(legacy 0123 octal also accepted; consumes lex-hex-digit?)
|
||||||
- [ ] Full operator set audit (47 distinct per Go spec)
|
- [x] Full operator set audit (47 distinct per Go spec, plus `~` for
|
||||||
- **Acceptance:** lex/ suite at 50+ tests. Current: 123/123.
|
generics type-sets). Exhaustive coverage tests in `op-audit:` block.
|
||||||
|
- **Acceptance:** lex/ suite at 50+ tests. Current: 129/129. **Phase 1
|
||||||
|
done** — hex floats deferred (rare). Move to Phase 2 next.
|
||||||
|
|
||||||
### Phase 2 — Parser (`lib/go/parse.sx`) ⬜
|
### Phase 2 — Parser (`lib/go/parse.sx`) ⬜
|
||||||
- Consume `lib/guest/core/pratt.sx` + `lib/guest/core/ast.sx`. Chisel notes
|
- Consume `lib/guest/core/pratt.sx` + `lib/guest/core/ast.sx`. Chisel notes
|
||||||
@@ -402,12 +404,35 @@ Every commit ends its message with a chisel note in brackets:
|
|||||||
|
|
||||||
## Blockers
|
## Blockers
|
||||||
|
|
||||||
_(none yet)_
|
### Kit-gap proposals against `lib/guest/lex.sx`
|
||||||
|
|
||||||
|
Observed from building the Go tokenizer. Not blocking Phase 2; surfaced
|
||||||
|
here for the substrate-maintainer / next statically-typed-guest loop:
|
||||||
|
|
||||||
|
1. **No `lex-oct-digit?` / `lex-bin-digit?`.** Go's prefixed integer forms
|
||||||
|
`0o17` and `0b1010` need digit-class predicates that the kit doesn't
|
||||||
|
provide. We rolled local `gl-oct-digit?` and `gl-bin-digit?`. Rust and
|
||||||
|
Swift's lexers will need the same. Cheap to promote.
|
||||||
|
|
||||||
|
2. **No table-driven longest-prefix matcher.** Go has 47+ operator
|
||||||
|
sequences with longest-match semantics. Our `gl-match-op` is a
|
||||||
|
25-clause `cond` ladder; Rust/Swift/TS will each need ~50+. A kit
|
||||||
|
helper like `(lex-match-longest TABLE SOURCE POS)` that takes a sorted
|
||||||
|
prefix table would collapse this. Worth proposing once a second
|
||||||
|
statically-typed guest hits the same pattern.
|
||||||
|
|
||||||
|
Minimal repro: see `lib/go/lex.sx#gl-oct-digit?` and `#gl-match-op`.
|
||||||
|
|
||||||
## Progress log
|
## Progress log
|
||||||
|
|
||||||
_Newest first. Append one dated entry per commit._
|
_Newest first. Append one dated entry per commit._
|
||||||
|
|
||||||
|
- 2026-05-27 — **Phase 1 complete.** Operator-set audit: added missing
|
||||||
|
`~` (Go 1.18+ generics type-set), exhaustive op coverage tests grouped
|
||||||
|
by category. Two kit gaps observed and logged in Blockers:
|
||||||
|
`lex-oct-digit?`/`lex-bin-digit?` predicates + `lex-match-longest`
|
||||||
|
table-driven prefix matcher — both useful for future statically-typed
|
||||||
|
guests. +6 tests, lex 129/129. `[proposes-lex]`. Phase 2 (parser) next.
|
||||||
- 2026-05-27 — Phase 1 cont.: raw string literals (backtick-delimited).
|
- 2026-05-27 — Phase 1 cont.: raw string literals (backtick-delimited).
|
||||||
Multi-line, no escape processing, `\r` stripped per Go spec § String
|
Multi-line, no escape processing, `\r` stripped per Go spec § String
|
||||||
literals. Same `"string"` token type as interpreted strings — parsers
|
literals. Same `"string"` token type as interpreted strings — parsers
|
||||||
|
|||||||
Reference in New Issue
Block a user