Files
rose-ash/lib/go/parse.sx
giles 4922b6e987
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 33s
go: parse.sx — package/import/var/const/type declarations + 10 tests [consumes-ast]
First slice of Phase 2 declarations:
  package main                      →  (list :package "main")
  import "fmt"                      →  (ast-import "fmt")    [from kit]
  var x int                         →  var-decl + :field binding
  var x = 5                         →  init only (type inferred)
  var x int = 5                     →  both type and init
  var x, y int = 1, 2               →  multi-name shared type
  const Pi = 3.14                   →  const-decl
  const C int = 42                  →  typed const
  type T int                        →  named alias
  type Point struct { x, y int }    →  named struct

New gp-parse-top dispatches on the leading keyword: routes
package/import/var/const/type to gp-parse-decl; everything else
still goes through gp-parse-expr. Existing expression tests are
unaffected (cur won't be a decl keyword at expression start).

var/const decls use the (:field NAMES TYPE) shape from the
ast-binding-group proposal — first concrete cross-deliverable use:
struct fields, var decls, const decls all envelope through the
same node. That's the smell test for whether the kit shape is
right; so far it's clean.

import uses the canonical ast-import from lib/guest/ast.sx — first
direct use of a kit constructor for a declaration shape.

Grouped/parenthesized decls (var (...), import (...), const (...),
type (...)) and func decls (with method receivers + named params)
deferred to subsequent iterations.

parse 124/124, total 253/253.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-27 19:44:24 +00:00

648 lines
25 KiB
Plaintext

;; lib/go/parse.sx — Go parser. Tokenises via go-tokenize (lib/go/lex.sx),
;; builds canonical AST nodes per lib/guest/ast.sx, and uses
;; pratt-op-lookup from lib/guest/pratt.sx for operator-precedence climbing.
;;
;; Slices so far:
;; 1. Primary expressions — literal / identifier → ast-literal / ast-var
;; 2. Binary operators — Pratt precedence climbing against
;; go-precedence-table; binary application
;; emitted as (ast-app (ast-var OP) [LHS RHS]).
;;
;; The climbing loop is per-language (see lib/guest/pratt.sx header on why)
;; but the entry shape and lookup are shared.
;;
;; All scanner locals are gp- prefixed: SX host primitives silently shadow
;; guest-language defines.
(define
go-precedence-table
(list
(list "*" 5 :left)
(list "/" 5 :left)
(list "%" 5 :left)
(list "<<" 5 :left)
(list ">>" 5 :left)
(list "&" 5 :left)
(list "&^" 5 :left)
(list "+" 4 :left)
(list "-" 4 :left)
(list "|" 4 :left)
(list "^" 4 :left)
(list "==" 3 :left)
(list "!=" 3 :left)
(list "<" 3 :left)
(list "<=" 3 :left)
(list ">" 3 :left)
(list ">=" 3 :left)
(list "&&" 2 :left)
(list "||" 1 :left)))
(define
go-parse
(fn
(src)
(let
((gp-tokens (go-tokenize src)) (gp-idx 0))
(define gp-cur (fn () (nth gp-tokens gp-idx)))
(define gp-advance! (fn () (set! gp-idx (+ gp-idx 1))))
(define gp-tok-type (fn () (get (gp-cur) :type)))
(define gp-tok-value (fn () (get (gp-cur) :value)))
(define
gp-parse-primary
(fn
()
(let
((ty (gp-tok-type)) (v (gp-tok-value)))
(cond
(or
(= ty "int")
(= ty "float")
(= ty "imag")
(= ty "string")
(= ty "rune"))
(do (gp-advance!) (ast-literal v))
(= ty "ident")
(do (gp-advance!) (ast-var v))
;; Type-prefixed composite literal starters: [, map, struct.
;; We parse a full type, then if '{' follows it's a composite
;; literal; otherwise the type is the operand (the caller
;; decides what to do — currently statement parsing isn't here).
(or (and (= ty "op") (= v "["))
(and (= ty "keyword")
(or (= v "map") (= v "struct"))))
(let ((tytree (gp-parse-type)))
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) "{"))
(do
(gp-advance!)
(list :composite tytree (gp-parse-composite-elems)))
:else tytree))
:else nil))))
(define
gp-parse-composite-elems
;; Caller has consumed '{'. Parses elements until '}'.
;; Each element: either an expression, or KEY ':' VALUE.
;; KEY can be an ident (struct field name) or an expression
;; (map key) — parser is permissive, types phase disambiguates.
;; Returns a list of expression nodes or (list :kv KEY VALUE).
(fn
()
(let ((elems (list)))
(define
gp-comp-loop
(fn
()
(cond
(= (gp-tok-type) "semi")
(do (gp-advance!) (gp-comp-loop))
(and (= (gp-tok-type) "op") (= (gp-tok-value) "}"))
(gp-advance!)
:else
(do
(let ((first (gp-parse-expr 1)))
(cond
(and (= (gp-tok-type) "op")
(= (gp-tok-value) ":"))
(do
(gp-advance!)
(let ((val (gp-parse-expr 1)))
(append! elems (list :kv first val))))
:else
(append! elems first)))
(when (and (= (gp-tok-type) "op")
(= (gp-tok-value) ","))
(gp-advance!))
(gp-comp-loop)))))
(gp-comp-loop)
elems)))
(define
gp-parse-call-args
;; Parse comma-separated args inside (...). Caller has already
;; consumed the opening "(". Consumes the closing ")".
;; Returns a list of argument AST nodes.
(fn
()
(let ((args (list)))
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) ")"))
(do (gp-advance!) args)
:else
(do
(let ((first (gp-parse-expr 1)))
(when (not (= first nil)) (append! args first)))
(define
gp-args-rest
(fn
()
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) ","))
(do
(gp-advance!)
(let ((arg (gp-parse-expr 1)))
(when (not (= arg nil)) (append! args arg)))
(gp-args-rest))
(and (= (gp-tok-type) "op") (= (gp-tok-value) ")"))
(gp-advance!)
:else nil)))
(gp-args-rest)
args)))))
(define
gp-parse-interface-elems
;; Caller positioned BEFORE '{'. Parses elements until '}'.
;; Two element shapes:
;; M(params) [results] → (list :method "M" PARAMS RESULTS)
;; T or pkg.T → (list :embed TYPE)
;; Type sets (Go 1.18+: ~int | ~float64) deferred.
(fn
()
(when (and (= (gp-tok-type) "op") (= (gp-tok-value) "{"))
(gp-advance!))
(let ((elems (list)))
(define
gp-iface-loop
(fn
()
(cond
(= (gp-tok-type) "semi")
(do (gp-advance!) (gp-iface-loop))
(and (= (gp-tok-type) "op") (= (gp-tok-value) "}"))
(gp-advance!)
(= (gp-tok-type) "ident")
(do
(let ((name (gp-tok-value)))
(gp-advance!)
(cond
(and (= (gp-tok-type) "op")
(= (gp-tok-value) "("))
(let ((params (gp-parse-func-type-params)))
(let ((results (gp-parse-func-type-results)))
(append! elems
(list :method name params results))))
(and (= (gp-tok-type) "op")
(= (gp-tok-value) "."))
(do
(gp-advance!)
(cond
(= (gp-tok-type) "ident")
(let ((sel-name (gp-tok-value)))
(gp-advance!)
(append! elems
(list :embed
(list :ty-sel name sel-name))))
:else
(append! elems
(list :embed (list :ty-name name)))))
:else
(append! elems (list :embed (list :ty-name name)))))
(gp-iface-loop))
:else nil)))
(gp-iface-loop)
elems)))
(define
gp-parse-struct-fields
;; Caller positioned BEFORE '{'. Parses fields until '}'.
;; field := name [, name]* TYPE
;; Tolerates ASI-inserted semis between fields. Embedded fields
;; (anonymous type without preceding names) and field tags are
;; deferred. Returns a list of (list :field NAMES TYPE).
(fn
()
(when (and (= (gp-tok-type) "op") (= (gp-tok-value) "{"))
(gp-advance!))
(let ((fields (list)))
(define
gp-struct-loop
(fn
()
(cond
(= (gp-tok-type) "semi")
(do (gp-advance!) (gp-struct-loop))
(and (= (gp-tok-type) "op") (= (gp-tok-value) "}"))
(gp-advance!)
(= (gp-tok-type) "ident")
(do
(let ((names (list (gp-tok-value))))
(gp-advance!)
(define
gp-names-rest
(fn
()
(when (and (= (gp-tok-type) "op")
(= (gp-tok-value) ","))
(gp-advance!)
(when (= (gp-tok-type) "ident")
(append! names (gp-tok-value))
(gp-advance!))
(gp-names-rest))))
(gp-names-rest)
(let ((ty (gp-parse-type)))
(append! fields (list :field names ty))))
(gp-struct-loop))
:else nil)))
(gp-struct-loop)
fields)))
(define
gp-parse-func-type-params
;; Anonymous-only func-type params: caller is positioned BEFORE
;; the opening "(". Returns a list of type AST nodes.
;; Named params (a int, b string) are deferred — they're needed
;; for func DECLARATIONS, not pure func-type expressions.
(fn
()
(let ((params (list)))
(when (and (= (gp-tok-type) "op") (= (gp-tok-value) "("))
(gp-advance!))
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) ")"))
(do (gp-advance!) params)
:else
(do
(let ((first (gp-parse-type)))
(when (not (= first nil)) (append! params first)))
(define
gp-params-rest
(fn
()
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) ","))
(do
(gp-advance!)
(let ((t (gp-parse-type)))
(when (not (= t nil)) (append! params t)))
(gp-params-rest))
(and (= (gp-tok-type) "op") (= (gp-tok-value) ")"))
(gp-advance!)
:else nil)))
(gp-params-rest)
params)))))
(define
gp-parse-func-type-results
;; Zero, one, or many return types. Caller is positioned after
;; the closing ')' of params.
;; no return — next token is not a type-starter
;; single return — bare type follows
;; multi return — '(' T, T, ... ')'
(fn
()
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) "("))
(gp-parse-func-type-params)
:else
(let ((t (gp-parse-type)))
(cond
(= t nil) (list)
:else (list t))))))
(define
gp-parse-type
;; Go type-expression parser. Covers:
;; *T → (list :ty-ptr T)
;; name → (list :ty-name "name")
;; pkg.Name → (list :ty-sel "pkg" "Name")
;; []T → (list :ty-slice T)
;; [N]T → (list :ty-array N T)
;; map[K]V → (list :ty-map K V)
;; chan T → (list :ty-chan :both T)
;; chan<- T → (list :ty-chan :send T)
;; <-chan T → (list :ty-chan :recv T)
;; Struct, interface, func types are deferred to a later slice.
(fn
()
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) "*"))
(do (gp-advance!) (list :ty-ptr (gp-parse-type)))
(and (= (gp-tok-type) "op") (= (gp-tok-value) "["))
(do
(gp-advance!)
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) "]"))
(do (gp-advance!) (list :ty-slice (gp-parse-type)))
:else
(let ((sz (gp-parse-expr 1)))
(when (and (= (gp-tok-type) "op") (= (gp-tok-value) "]"))
(gp-advance!))
(list :ty-array sz (gp-parse-type)))))
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "map"))
(do
(gp-advance!)
(when (and (= (gp-tok-type) "op") (= (gp-tok-value) "["))
(gp-advance!))
(let ((k (gp-parse-type)))
(when (and (= (gp-tok-type) "op") (= (gp-tok-value) "]"))
(gp-advance!))
(let ((v (gp-parse-type)))
(list :ty-map k v))))
(and (= (gp-tok-type) "op") (= (gp-tok-value) "<-"))
(do
(gp-advance!)
(when (and (= (gp-tok-type) "keyword") (= (gp-tok-value) "chan"))
(gp-advance!))
(list :ty-chan :recv (gp-parse-type)))
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "chan"))
(do
(gp-advance!)
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) "<-"))
(do (gp-advance!) (list :ty-chan :send (gp-parse-type)))
:else (list :ty-chan :both (gp-parse-type))))
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "func"))
(do
(gp-advance!)
(let ((params (gp-parse-func-type-params)))
(let ((results (gp-parse-func-type-results)))
(list :ty-func params results))))
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "struct"))
(do
(gp-advance!)
(list :ty-struct (gp-parse-struct-fields)))
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "interface"))
(do
(gp-advance!)
(list :ty-interface (gp-parse-interface-elems)))
(= (gp-tok-type) "ident")
(let ((name (gp-tok-value)))
(gp-advance!)
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) "."))
(do
(gp-advance!)
(cond
(= (gp-tok-type) "ident")
(let ((sel-name (gp-tok-value)))
(gp-advance!)
(list :ty-sel name sel-name))
:else (list :ty-name name)))
:else (list :ty-name name)))
:else nil)))
(define
gp-parse-bracket-expr
;; Optional expression inside brackets — returns nil if next token
;; is ':' or ']' (the slice "omitted" cases).
(fn
()
(cond
(and (= (gp-tok-type) "op")
(or (= (gp-tok-value) ":") (= (gp-tok-value) "]")))
nil
:else (gp-parse-expr 1))))
(define
gp-parse-bracket
;; Caller has consumed '['. Parses index or slice and ']'.
;; x[i] → (list :index BASE i)
;; x[a:b] → (list :slice BASE LOW HIGH nil) (LOW/HIGH may be nil)
;; x[a:b:c] → (list :slice BASE LOW HIGH MAX)
;; Returns the AST node based on BASE.
(fn
(base)
(let ((low (gp-parse-bracket-expr)))
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) "]"))
(do (gp-advance!) (list :index base low))
(and (= (gp-tok-type) "op") (= (gp-tok-value) ":"))
(do
(gp-advance!)
(let ((high (gp-parse-bracket-expr)))
(cond
(and (= (gp-tok-type) "op") (= (gp-tok-value) "]"))
(do (gp-advance!) (list :slice base low high nil))
(and (= (gp-tok-type) "op") (= (gp-tok-value) ":"))
(do
(gp-advance!)
(let ((maxe (gp-parse-bracket-expr)))
(when (and (= (gp-tok-type) "op")
(= (gp-tok-value) "]"))
(gp-advance!))
(list :slice base low high maxe)))
:else (list :slice base low high nil))))
:else base))))
(define
gp-parse-postfix
;; Left-associative postfix loop on top of gp-parse-primary:
;; x.field → (list :select x "field") — Go-specific
;; f(args...) → (ast-app f args) — canonical
;; x[i] → (list :index x i) — Go-specific
;; x[a:b] → (list :slice x low high max) — Go-specific
(fn
()
(let ((base (gp-parse-primary)))
(gp-postfix-loop base))))
(define
gp-postfix-loop
(fn
(base)
(cond
(= base nil) nil
:else
(let ((tok (gp-cur)))
(cond
(and (= (get tok :type) "op") (= (get tok :value) "."))
(do
(gp-advance!)
(let ((next-tok (gp-cur)))
(cond
;; .(T) — type assertion
(and (= (get next-tok :type) "op")
(= (get next-tok :value) "("))
(do
(gp-advance!)
(let ((ty (gp-parse-type)))
(when (and (= (gp-tok-type) "op")
(= (gp-tok-value) ")"))
(gp-advance!))
(gp-postfix-loop (list :assert base ty))))
;; .ident — selector / member access
(= (get next-tok :type) "ident")
(do
(gp-advance!)
(gp-postfix-loop
(list :select base (get next-tok :value))))
:else base)))
(and (= (get tok :type) "op") (= (get tok :value) "("))
(do
(gp-advance!)
(gp-postfix-loop (ast-app base (gp-parse-call-args))))
(and (= (get tok :type) "op") (= (get tok :value) "["))
(do
(gp-advance!)
(gp-postfix-loop (gp-parse-bracket base)))
;; Ident-prefixed composite literal: T{...}. The base is
;; the AST expression for the type-name (an ast-var or a
;; :select node); a later phase resolves it as a type.
(and (= (get tok :type) "op") (= (get tok :value) "{"))
(do
(gp-advance!)
(gp-postfix-loop
(list :composite base (gp-parse-composite-elems))))
:else base)))))
(define
gp-unary-ops
;; Go spec § Operators: prefix unary, all higher precedence than
;; any binary operator. <- is the channel receive form (send is a
;; statement, not an expression, so never appears here as binary).
(list "+" "-" "!" "^" "*" "&" "<-"))
(define
gp-parse-unary
(fn
()
(let ((tok (gp-cur)))
(cond
(and (= (get tok :type) "op")
(some (fn (u) (= u (get tok :value))) gp-unary-ops))
(do
(gp-advance!)
(let ((operand (gp-parse-unary)))
(cond
(= operand nil) nil
:else (ast-app (ast-var (get tok :value)) (list operand)))))
:else (gp-parse-postfix)))))
(define
gp-parse-expr
(fn
(min-prec)
(let ((left (gp-parse-unary))) (gp-pratt-loop left min-prec))))
(define
gp-pratt-loop
(fn
(left min-prec)
(cond
(= left nil) nil
:else
(let
((tok (gp-cur)))
(cond
(not (= (get tok :type) "op"))
left
:else (let
((entry (pratt-op-lookup go-precedence-table (get tok :value))))
(cond
(= entry nil)
left
(< (pratt-op-prec entry) min-prec)
left
:else (do
(gp-advance!)
(let
((next-min (if (= (pratt-op-assoc entry) :left) (+ (pratt-op-prec entry) 1) (pratt-op-prec entry))))
(let
((right (gp-parse-expr next-min)))
(gp-pratt-loop
(ast-app
(ast-var (get tok :value))
(list left right))
min-prec)))))))))))
(define
gp-parse-expr-list
;; Comma-separated expressions; reused by var/const initialisers.
(fn
()
(let ((exprs (list)))
(let ((first (gp-parse-expr 1)))
(when (not (= first nil)) (append! exprs first)))
(define
gp-exprs-rest
(fn
()
(when (and (= (gp-tok-type) "op") (= (gp-tok-value) ","))
(gp-advance!)
(let ((e (gp-parse-expr 1)))
(when (not (= e nil)) (append! exprs e)))
(gp-exprs-rest))))
(gp-exprs-rest)
exprs)))
(define
gp-parse-var-or-const
;; Caller has consumed 'var' or 'const'. TAG is :var-decl or :const-decl.
;; Shape: TAG (list :field NAMES TYPE-OR-NIL) EXPRS-OR-NIL
;; Both type and init are optional (must have at least one in Go;
;; lexer is permissive).
(fn
(tag)
(let ((names (list)))
(when (= (gp-tok-type) "ident")
(append! names (gp-tok-value))
(gp-advance!))
(define
gp-names-rest
(fn
()
(when (and (= (gp-tok-type) "op") (= (gp-tok-value) ","))
(gp-advance!)
(when (= (gp-tok-type) "ident")
(append! names (gp-tok-value))
(gp-advance!))
(gp-names-rest))))
(gp-names-rest)
(let ((ty nil) (exprs nil))
(when (and (not (= (gp-tok-type) "eof"))
(not (= (gp-tok-type) "semi"))
(not (and (= (gp-tok-type) "op")
(= (gp-tok-value) "="))))
(set! ty (gp-parse-type)))
(when (and (= (gp-tok-type) "op") (= (gp-tok-value) "="))
(gp-advance!)
(set! exprs (gp-parse-expr-list)))
(list tag (list :field names ty) exprs)))))
(define
gp-parse-type-decl
;; Caller has consumed 'type'. Single-decl form only:
;; type NAME TYPE → (list :type-decl "NAME" TYPE)
(fn
()
(cond
(= (gp-tok-type) "ident")
(let ((name (gp-tok-value)))
(gp-advance!)
(let ((t (gp-parse-type)))
(list :type-decl name t)))
:else nil)))
(define
gp-parse-decl
;; Single declaration: package / import / var / const / type.
;; Grouped/parenthesized forms and func decls are deferred.
(fn
()
(cond
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "package"))
(do
(gp-advance!)
(cond
(= (gp-tok-type) "ident")
(let ((name (gp-tok-value)))
(gp-advance!)
(list :package name))
:else nil))
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "import"))
(do
(gp-advance!)
(cond
(= (gp-tok-type) "string")
(let ((path (gp-tok-value)))
(gp-advance!)
(ast-import path))
:else nil))
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "var"))
(do (gp-advance!) (gp-parse-var-or-const :var-decl))
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "const"))
(do (gp-advance!) (gp-parse-var-or-const :const-decl))
(and (= (gp-tok-type) "keyword") (= (gp-tok-value) "type"))
(do (gp-advance!) (gp-parse-type-decl))
:else nil)))
(define
gp-parse-top
;; Top-level dispatch: declaration keywords go to gp-parse-decl,
;; everything else is parsed as an expression. ASI semis at the
;; start are skipped.
(fn
()
(cond
(= (gp-tok-type) "semi")
(do (gp-advance!) (gp-parse-top))
(and (= (gp-tok-type) "keyword")
(or (= (gp-tok-value) "package")
(= (gp-tok-value) "import")
(= (gp-tok-value) "var")
(= (gp-tok-value) "const")
(= (gp-tok-value) "type")))
(gp-parse-decl)
:else (gp-parse-expr 1))))
(gp-parse-top))))