From c1baca2e4e657480339eab1af749744364198f29 Mon Sep 17 00:00:00 2001 From: giles Date: Wed, 27 May 2026 07:28:50 +0000 Subject: [PATCH] =?UTF-8?q?go:=20lex.sx=20=E2=80=94=20operator-set=20audit?= =?UTF-8?q?=20+=20tilde;=20PHASE=201=20COMPLETE=20+=206=20tests=20[propose?= =?UTF-8?q?s-lex]?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the missing tilde operator '~' (Go 1.18+ generics type-set constraint, e.g. 'interface { ~int | ~float64 }') to the longest-match operator table. Adds an exhaustive 'op-audit:' test block covering every Go operator/punctuation token by category — arithmetic + assignment, bitwise + assignment, comparison + logical, decls / arrows / variadic / inc-dec, punctuation, and tilde. Phase 1 (tokenizer) is now complete. Two kit gaps surfaced and logged in plans/go-on-sx.md Blockers for the substrate maintainer / next statically-typed guest loop: * lib/guest/lex.sx lacks lex-oct-digit? / lex-bin-digit? (we rolled local gl-* equivalents for 0o.. and 0b.. literals). * lib/guest/lex.sx lacks a table-driven longest-prefix operator matcher; our gl-match-op is a 25-clause cond ladder. Rust/Swift/TS will each hit the same shape with 50+ ops apiece. lex 129/129. Phase 2 (parser) next. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/go/lex.sx | 3 ++- lib/go/scoreboard.json | 6 +++--- lib/go/scoreboard.md | 4 ++-- lib/go/tests/lex.sx | 34 ++++++++++++++++++++++++++++++++-- plans/go-on-sx.md | 33 +++++++++++++++++++++++++++++---- 5 files changed, 68 insertions(+), 12 deletions(-) diff --git a/lib/go/lex.sx b/lib/go/lex.sx index 29f5d1c1..f79e72b5 100644 --- a/lib/go/lex.sx +++ b/lib/go/lex.sx @@ -393,7 +393,8 @@ (= c0 "]") (= c0 ",") (= c0 ".") - (= c0 ":")) + (= c0 ":") + (= c0 "~")) c0 :else nil)))) (define diff --git a/lib/go/scoreboard.json b/lib/go/scoreboard.json index c856e925..b7863644 100644 --- a/lib/go/scoreboard.json +++ b/lib/go/scoreboard.json @@ -1,9 +1,9 @@ { "language": "go", - "total_pass": 123, - "total": 123, + "total_pass": 129, + "total": 129, "suites": [ - {"name":"lex","pass":123,"total":123,"status":"ok"}, + {"name":"lex","pass":129,"total":129,"status":"ok"}, {"name":"parse","pass":0,"total":0,"status":"pending"}, {"name":"types","pass":0,"total":0,"status":"pending"}, {"name":"eval","pass":0,"total":0,"status":"pending"}, diff --git a/lib/go/scoreboard.md b/lib/go/scoreboard.md index 82f738a4..b0346fdb 100644 --- a/lib/go/scoreboard.md +++ b/lib/go/scoreboard.md @@ -1,10 +1,10 @@ # Go-on-SX Scoreboard -**Total: 123 / 123 tests passing** +**Total: 129 / 129 tests passing** | | Suite | Pass | Total | |---|---|---|---| -| ✅ | lex | 123 | 123 | +| ✅ | lex | 129 | 129 | | ⬜ | parse | 0 | 0 | | ⬜ | types | 0 | 0 | | ⬜ | eval | 0 | 0 | diff --git a/lib/go/tests/lex.sx b/lib/go/tests/lex.sx index 50604e16..da21fac4 100644 --- a/lib/go/tests/lex.sx +++ b/lib/go/tests/lex.sx @@ -229,30 +229,60 @@ "punct: comma colon dot" (tok-values ", : .") (list "," ":" "." nil)) +(go-test + "op-audit: tilde (generics type-set)" + (tok-values "~int") + (list "~" "int" "\n" nil)) +(go-test + "op-audit: all arithmetic + assignment" + (tok-values "+ - * / % += -= *= /= %=") + (list "+" "-" "*" "/" "%" "+=" "-=" "*=" "/=" "%=" nil)) +(go-test + "op-audit: all bitwise + assignment" + (tok-values "& | ^ << >> &^ &= |= ^= <<= >>= &^=") + (list "&" "|" "^" "<<" ">>" "&^" "&=" "|=" "^=" "<<=" ">>=" "&^=" nil)) +(go-test + "op-audit: all comparison + logical" + (tok-values "== != < > <= >= && || !") + (list "==" "!=" "<" ">" "<=" ">=" "&&" "||" "!" nil)) +(go-test + "op-audit: assign / decls / arrows / variadic / inc-dec" + (tok-values "= := <- ++ -- ...") + (list "=" ":=" "<-" "++" "--" "..." nil)) + +;; ── short program ───────────────────────────────────────────────── +(go-test + "op-audit: punctuation" + (tok-values "( ) [ ] { } , . :") + (list "(" ")" "[" "]" "{" "}" "," "." ":" nil)) (go-test "ASI: after ident at newline" (tok-types "x\ny") (list "ident" "semi" "ident" "semi" "eof")) (go-test "ASI: after int" (tok-types "42\n") (list "int" "semi" "eof")) + +;; ── report ──────────────────────────────────────────────────────── (go-test "ASI: after float" (tok-types "3.14\n") (list "float" "semi" "eof")) + (go-test "ASI: after string" (tok-types "\"hi\"\n") (list "string" "semi" "eof")) + (go-test "ASI: after rune" (tok-types "'a'\n") (list "rune" "semi" "eof")) -;; ── short program ───────────────────────────────────────────────── (go-test "ASI: after )" (tok-types "f()\n") (list "ident" "op" "op" "semi" "eof")) + (go-test "ASI: after ]" (tok-types "x[0]\n") (list "ident" "op" "int" "op" "semi" "eof")) + (go-test "ASI: after }" (tok-types "{}\n") (list "op" "op" "semi" "eof")) -;; ── report ──────────────────────────────────────────────────────── (go-test "ASI: after ++" (tok-types "i++\n") (list "ident" "op" "semi" "eof")) (go-test diff --git a/plans/go-on-sx.md b/plans/go-on-sx.md index b5054af6..70457860 100644 --- a/plans/go-on-sx.md +++ b/plans/go-on-sx.md @@ -130,7 +130,7 @@ Suites planned: Loop-style. Each phase: implement → test → commit → tick `[ ]` → append Progress-log line → push `origin/loops/go`. -### Phase 1 — Tokenizer (`lib/go/lex.sx`) ⬜ +### Phase 1 — Tokenizer (`lib/go/lex.sx`) ✅ - [x] Scaffold + scoreboard + conformance runner (consumes lib/guest/lex.sx) - [x] Identifiers + 25 keywords - [x] Decimal integer literals @@ -148,8 +148,10 @@ Progress-log line → push `origin/loops/go`. as interpreted strings) - [x] Hex/octal/binary integer literals (0x… 0o… 0b…) + underscores (legacy 0123 octal also accepted; consumes lex-hex-digit?) -- [ ] Full operator set audit (47 distinct per Go spec) -- **Acceptance:** lex/ suite at 50+ tests. Current: 123/123. +- [x] Full operator set audit (47 distinct per Go spec, plus `~` for + generics type-sets). Exhaustive coverage tests in `op-audit:` block. +- **Acceptance:** lex/ suite at 50+ tests. Current: 129/129. **Phase 1 + done** — hex floats deferred (rare). Move to Phase 2 next. ### Phase 2 — Parser (`lib/go/parse.sx`) ⬜ - Consume `lib/guest/core/pratt.sx` + `lib/guest/core/ast.sx`. Chisel notes @@ -402,12 +404,35 @@ Every commit ends its message with a chisel note in brackets: ## Blockers -_(none yet)_ +### Kit-gap proposals against `lib/guest/lex.sx` + +Observed from building the Go tokenizer. Not blocking Phase 2; surfaced +here for the substrate-maintainer / next statically-typed-guest loop: + +1. **No `lex-oct-digit?` / `lex-bin-digit?`.** Go's prefixed integer forms + `0o17` and `0b1010` need digit-class predicates that the kit doesn't + provide. We rolled local `gl-oct-digit?` and `gl-bin-digit?`. Rust and + Swift's lexers will need the same. Cheap to promote. + +2. **No table-driven longest-prefix matcher.** Go has 47+ operator + sequences with longest-match semantics. Our `gl-match-op` is a + 25-clause `cond` ladder; Rust/Swift/TS will each need ~50+. A kit + helper like `(lex-match-longest TABLE SOURCE POS)` that takes a sorted + prefix table would collapse this. Worth proposing once a second + statically-typed guest hits the same pattern. + +Minimal repro: see `lib/go/lex.sx#gl-oct-digit?` and `#gl-match-op`. ## Progress log _Newest first. Append one dated entry per commit._ +- 2026-05-27 — **Phase 1 complete.** Operator-set audit: added missing + `~` (Go 1.18+ generics type-set), exhaustive op coverage tests grouped + by category. Two kit gaps observed and logged in Blockers: + `lex-oct-digit?`/`lex-bin-digit?` predicates + `lex-match-longest` + table-driven prefix matcher — both useful for future statically-typed + guests. +6 tests, lex 129/129. `[proposes-lex]`. Phase 2 (parser) next. - 2026-05-27 — Phase 1 cont.: raw string literals (backtick-delimited). Multi-line, no escape processing, `\r` stripped per Go spec § String literals. Same `"string"` token type as interpreted strings — parsers