Files
rose-ash/lib/smalltalk/parser.sx
giles 82bad15b13
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
smalltalk: super send + top-level temps + 9 super tests
2026-04-25 03:15:39 +00:00

913 lines
33 KiB
Plaintext

;; Smalltalk parser — produces an AST from the tokenizer's token stream.
;;
;; AST node shapes (dicts):
;; {:type "lit-int" :value N} integer
;; {:type "lit-float" :value F} float
;; {:type "lit-string" :value S} string
;; {:type "lit-char" :value C} character
;; {:type "lit-symbol" :value S} symbol literal (#foo)
;; {:type "lit-array" :elements (list ...)} literal array (#(1 2 #foo))
;; {:type "lit-byte-array" :elements (...)} byte array (#[1 2 3])
;; {:type "lit-nil" } / "lit-true" / "lit-false"
;; {:type "ident" :name "x"} variable reference
;; {:type "self"} / "super" / "thisContext" pseudo-variables
;; {:type "assign" :name "x" :expr E} x := E
;; {:type "return" :expr E} ^ E
;; {:type "send" :receiver R :selector S :args (list ...)}
;; {:type "cascade" :receiver R :messages (list {:selector :args} ...)}
;; {:type "block" :params (list "a") :temps (list "t") :body (list expr)}
;; {:type "seq" :exprs (list ...)} statement sequence
;; {:type "method" :selector S :params (list ...) :temps (list ...) :body (list ...) :pragmas (list ...)}
;;
;; A "chunk" / class-definition stream is parsed at a higher level (deferred).
;; ── Chunk-stream reader ────────────────────────────────────────────────
;; Pharo chunk format: chunks are separated by `!`. A doubled `!!` inside a
;; chunk represents a single literal `!`. Returns list of chunk strings with
;; surrounding whitespace trimmed.
(define
st-read-chunks
(fn
(src)
(let
((chunks (list))
(buf (list))
(pos 0)
(n (len src)))
(begin
(define
flush!
(fn
()
(let
((s (st-trim (join "" buf))))
(begin (append! chunks s) (set! buf (list))))))
(define
rc-loop
(fn
()
(when
(< pos n)
(let
((c (nth src pos)))
(cond
((= c "!")
(cond
((and (< (+ pos 1) n) (= (nth src (+ pos 1)) "!"))
(begin (append! buf "!") (set! pos (+ pos 2)) (rc-loop)))
(else
(begin (flush!) (set! pos (+ pos 1)) (rc-loop)))))
(else
(begin (append! buf c) (set! pos (+ pos 1)) (rc-loop))))))))
(rc-loop)
;; trailing text without a closing `!` — preserve as a chunk
(when (> (len buf) 0) (flush!))
chunks))))
(define
st-trim
(fn
(s)
(let
((n (len s)) (i 0) (j 0))
(begin
(set! j n)
(define
tl-loop
(fn
()
(when
(and (< i n) (st-trim-ws? (nth s i)))
(begin (set! i (+ i 1)) (tl-loop)))))
(tl-loop)
(define
tr-loop
(fn
()
(when
(and (> j i) (st-trim-ws? (nth s (- j 1))))
(begin (set! j (- j 1)) (tr-loop)))))
(tr-loop)
(slice s i j)))))
(define
st-trim-ws?
(fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; Parse a chunk stream. Walks chunks and applies the Pharo file-in
;; convention: a chunk that evaluates to "X methodsFor: 'cat'" or
;; "X class methodsFor: 'cat'" enters a methods batch — subsequent chunks
;; are method source until an empty chunk closes the batch.
;;
;; Returns list of entries:
;; {:kind "expr" :ast EXPR-AST}
;; {:kind "method" :class CLS :class-side? BOOL :category CAT :ast METHOD-AST}
;; {:kind "blank"} (empty chunks outside a methods batch)
;; {:kind "end-methods"} (empty chunk closing a methods batch)
(define
st-parse-chunks
(fn
(src)
(let
((chunks (st-read-chunks src))
(entries (list))
(mode "do-it")
(cls-name nil)
(class-side? false)
(category nil))
(begin
(for-each
(fn
(chunk)
(cond
((= chunk "")
(cond
((= mode "methods")
(begin
(append! entries {:kind "end-methods"})
(set! mode "do-it")
(set! cls-name nil)
(set! class-side? false)
(set! category nil)))
(else (append! entries {:kind "blank"}))))
((= mode "methods")
(append!
entries
{:kind "method"
:class cls-name
:class-side? class-side?
:category category
:ast (st-parse-method chunk)}))
(else
(let
((ast (st-parse-expr chunk)))
(begin
(append! entries {:kind "expr" :ast ast})
(let
((mf (st-detect-methods-for ast)))
(when
(not (= mf nil))
(begin
(set! mode "methods")
(set! cls-name (get mf :class))
(set! class-side? (get mf :class-side?))
(set! category (get mf :category))))))))))
chunks)
entries))))
;; Recognise `Foo methodsFor: 'cat'` (and related) as starting a methods batch.
;; Returns nil if the AST doesn't look like one of these forms.
(define
st-detect-methods-for
(fn
(ast)
(cond
((not (= (get ast :type) "send")) nil)
((not (st-is-methods-for-selector? (get ast :selector))) nil)
(else
(let
((recv (get ast :receiver)) (args (get ast :args)))
(let
((cat-arg (if (> (len args) 0) (nth args 0) nil)))
(let
((category
(cond
((= cat-arg nil) nil)
((= (get cat-arg :type) "lit-string") (get cat-arg :value))
((= (get cat-arg :type) "lit-symbol") (get cat-arg :value))
(else nil))))
(cond
((= (get recv :type) "ident")
{:class (get recv :name)
:class-side? false
:category category})
;; `Foo class methodsFor: 'cat'` — recv is a unary send `Foo class`
((and
(= (get recv :type) "send")
(= (get recv :selector) "class")
(= (get (get recv :receiver) :type) "ident"))
{:class (get (get recv :receiver) :name)
:class-side? true
:category category})
(else nil)))))))))
(define
st-is-methods-for-selector?
(fn
(sel)
(or
(= sel "methodsFor:")
(= sel "methodsFor:stamp:")
(= sel "category:"))))
(define st-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
(define st-tok-value (fn (t) (if (= t nil) nil (get t :value))))
;; Parse a *single* Smalltalk expression from source.
(define st-parse-expr (fn (src) (st-parse-with src "expr")))
;; Parse a sequence of statements separated by '.' Returns a {:type "seq"} node.
(define st-parse (fn (src) (st-parse-with src "seq")))
;; Parse a method body — `selector params | temps | body`.
;; Only the "method header + body" form (no chunk delimiters).
(define st-parse-method (fn (src) (st-parse-with src "method")))
(define
st-parse-with
(fn
(src mode)
(let
((tokens (st-tokenize src)) (idx 0) (tok-len 0))
(begin
(set! tok-len (len tokens))
(define peek-tok (fn () (nth tokens idx)))
(define
peek-tok-at
(fn (n) (if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil)))
(define advance-tok! (fn () (set! idx (+ idx 1))))
(define
at?
(fn
(type value)
(let
((t (peek-tok)))
(and
(= (st-tok-type t) type)
(or (= value nil) (= (st-tok-value t) value))))))
(define at-type? (fn (type) (= (st-tok-type (peek-tok)) type)))
(define
consume!
(fn
(type value)
(if
(at? type value)
(let ((t (peek-tok))) (begin (advance-tok!) t))
(error
(str
"st-parse: expected "
type
(if (= value nil) "" (str " '" value "'"))
" got "
(st-tok-type (peek-tok))
" '"
(st-tok-value (peek-tok))
"' at idx "
idx)))))
;; ── Primary: atoms, paren'd expr, blocks, literal arrays, byte arrays.
(define
parse-primary
(fn
()
(let
((t (peek-tok)))
(let
((ty (st-tok-type t)) (v (st-tok-value t)))
(cond
((= ty "number")
(begin
(advance-tok!)
(cond
((number? v) {:type (if (integer? v) "lit-int" "lit-float") :value v})
(else {:type "lit-int" :value v}))))
((= ty "string")
(begin (advance-tok!) {:type "lit-string" :value v}))
((= ty "char")
(begin (advance-tok!) {:type "lit-char" :value v}))
((= ty "symbol")
(begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "array-open") (parse-literal-array))
((= ty "byte-array-open") (parse-byte-array))
((= ty "lparen")
(begin
(advance-tok!)
(let
((e (parse-expression)))
(begin (consume! "rparen" nil) e))))
((= ty "lbracket") (parse-block))
((= ty "ident")
(begin
(advance-tok!)
(cond
((= v "nil") {:type "lit-nil"})
((= v "true") {:type "lit-true"})
((= v "false") {:type "lit-false"})
((= v "self") {:type "self"})
((= v "super") {:type "super"})
((= v "thisContext") {:type "thisContext"})
(else {:type "ident" :name v}))))
((= ty "binary")
;; Negative numeric literal: '-' immediately before a number.
(cond
((and (= v "-") (= (st-tok-type (peek-tok-at 1)) "number"))
(let
((n (st-tok-value (peek-tok-at 1))))
(begin
(advance-tok!)
(advance-tok!)
(cond
((dict? n) {:type "lit-int" :value n})
((integer? n) {:type "lit-int" :value (- 0 n)})
(else {:type "lit-float" :value (- 0 n)})))))
(else
(error
(str "st-parse: unexpected binary '" v "' at idx " idx)))))
(else
(error
(str
"st-parse: unexpected "
ty
" '"
v
"' at idx "
idx))))))))
;; #(elem elem ...) — elements are atoms or nested parenthesised arrays.
(define
parse-literal-array
(fn
()
(let
((items (list)))
(begin
(consume! "array-open" nil)
(define
arr-loop
(fn
()
(cond
((at? "rparen" nil) (advance-tok!))
(else
(begin
(append! items (parse-array-element))
(arr-loop))))))
(arr-loop)
{:type "lit-array" :elements items}))))
;; #[1 2 3]
(define
parse-byte-array
(fn
()
(let
((items (list)))
(begin
(consume! "byte-array-open" nil)
(define
ba-loop
(fn
()
(cond
((at? "rbracket" nil) (advance-tok!))
(else
(let
((t (peek-tok)))
(cond
((= (st-tok-type t) "number")
(begin
(advance-tok!)
(append! items (st-tok-value t))
(ba-loop)))
(else
(error
(str
"st-parse: byte array expects number, got "
(st-tok-type t))))))))))
(ba-loop)
{:type "lit-byte-array" :elements items}))))
;; Inside a literal array: bare idents become symbols, nested (...) is a sub-array.
(define
parse-array-element
(fn
()
(let
((t (peek-tok)))
(let
((ty (st-tok-type t)) (v (st-tok-value t)))
(cond
((= ty "number") (begin (advance-tok!) {:type "lit-int" :value v}))
((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v}))
((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v}))
((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "ident")
(begin
(advance-tok!)
(cond
((= v "nil") {:type "lit-nil"})
((= v "true") {:type "lit-true"})
((= v "false") {:type "lit-false"})
(else {:type "lit-symbol" :value v}))))
((= ty "keyword") (begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "binary") (begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "lparen")
(let ((items (list)))
(begin
(advance-tok!)
(define
sub-loop
(fn
()
(cond
((at? "rparen" nil) (advance-tok!))
(else
(begin (append! items (parse-array-element)) (sub-loop))))))
(sub-loop)
{:type "lit-array" :elements items})))
((= ty "array-open") (parse-literal-array))
((= ty "byte-array-open") (parse-byte-array))
(else
(error
(str "st-parse: bad literal-array element " ty " '" v "'"))))))))
;; [:a :b | | t1 t2 | body. body. ...]
(define
parse-block
(fn
()
(begin
(consume! "lbracket" nil)
(let
((params (list)) (temps (list)))
(begin
;; Block params
(define
p-loop
(fn
()
(when
(at? "colon" nil)
(begin
(advance-tok!)
(let
((t (consume! "ident" nil)))
(begin
(append! params (st-tok-value t))
(p-loop)))))))
(p-loop)
(when (> (len params) 0) (consume! "bar" nil))
;; Block temps: | t1 t2 |
(when
(and
(at? "bar" nil)
;; Not `|` followed immediately by binary content — the only
;; legitimate `|` inside a block here is the temp delimiter.
true)
(begin
(advance-tok!)
(define
t-loop
(fn
()
(when
(at? "ident" nil)
(let
((t (peek-tok)))
(begin
(advance-tok!)
(append! temps (st-tok-value t))
(t-loop))))))
(t-loop)
(consume! "bar" nil)))
;; Body: statements terminated by `.` or `]`
(let
((body (parse-statements "rbracket")))
(begin
(consume! "rbracket" nil)
{:type "block" :params params :temps temps :body body})))))))
;; Parse statements up to a closing token (rbracket or eof). Returns list.
(define
parse-statements
(fn
(terminator)
(let
((stmts (list)))
(begin
(define
s-loop
(fn
()
(cond
((at-type? terminator) nil)
((at-type? "eof") nil)
(else
(begin
(append! stmts (parse-statement))
;; consume optional period(s)
(define
dot-loop
(fn
()
(when
(at? "period" nil)
(begin (advance-tok!) (dot-loop)))))
(dot-loop)
(s-loop))))))
(s-loop)
stmts))))
;; Statement: ^expr | ident := expr | expr
(define
parse-statement
(fn
()
(cond
((at? "caret" nil)
(begin
(advance-tok!)
{:type "return" :expr (parse-expression)}))
((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign"))
(let
((name-tok (peek-tok)))
(begin
(advance-tok!)
(advance-tok!)
{:type "assign"
:name (st-tok-value name-tok)
:expr (parse-expression)})))
(else (parse-expression)))))
;; Top-level expression. Assignment (right-associative chain) sits at
;; the top; cascade is below.
(define
parse-expression
(fn
()
(cond
((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign"))
(let
((name-tok (peek-tok)))
(begin
(advance-tok!)
(advance-tok!)
{:type "assign"
:name (st-tok-value name-tok)
:expr (parse-expression)})))
(else (parse-cascade)))))
(define
parse-cascade
(fn
()
(let
((head (parse-keyword-message)))
(cond
((at? "semi" nil)
(let
((receiver (cascade-receiver head))
(first-msg (cascade-first-message head))
(msgs (list)))
(begin
(append! msgs first-msg)
(define
c-loop
(fn
()
(when
(at? "semi" nil)
(begin
(advance-tok!)
(append! msgs (parse-cascade-message))
(c-loop)))))
(c-loop)
{:type "cascade" :receiver receiver :messages msgs})))
(else head)))))
;; Extract the receiver from a head send so cascades share it.
(define
cascade-receiver
(fn
(head)
(cond
((= (get head :type) "send") (get head :receiver))
(else head))))
(define
cascade-first-message
(fn
(head)
(cond
((= (get head :type) "send")
{:selector (get head :selector) :args (get head :args)})
(else
;; Shouldn't happen — cascade requires at least one prior message.
(error "st-parse: cascade with no prior message")))))
;; Subsequent cascade message (after the `;`): unary | binary | keyword
(define
parse-cascade-message
(fn
()
(cond
((at-type? "ident")
(let ((t (peek-tok)))
(begin
(advance-tok!)
{:selector (st-tok-value t) :args (list)})))
((at-type? "binary")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(let
((arg (parse-unary-message)))
{:selector (st-tok-value t) :args (list arg)}))))
((at-type? "keyword")
(let
((sel-parts (list)) (args (list)))
(begin
(define
kw-loop
(fn
()
(when
(at-type? "keyword")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! sel-parts (st-tok-value t))
(append! args (parse-binary-message))
(kw-loop))))))
(kw-loop)
{:selector (join "" sel-parts) :args args})))
(else
(error
(str "st-parse: bad cascade message at idx " idx))))))
;; Keyword message: <binary> (kw <binary>)+
(define
parse-keyword-message
(fn
()
(let
((receiver (parse-binary-message)))
(cond
((at-type? "keyword")
(let
((sel-parts (list)) (args (list)))
(begin
(define
kw-loop
(fn
()
(when
(at-type? "keyword")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! sel-parts (st-tok-value t))
(append! args (parse-binary-message))
(kw-loop))))))
(kw-loop)
{:type "send"
:receiver receiver
:selector (join "" sel-parts)
:args args})))
(else receiver)))))
;; Binary message: <unary> (binop <unary>)*
(define
parse-binary-message
(fn
()
(let
((receiver (parse-unary-message)))
(begin
(define
b-loop
(fn
()
(when
(at-type? "binary")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(let
((arg (parse-unary-message)))
(set!
receiver
{:type "send"
:receiver receiver
:selector (st-tok-value t)
:args (list arg)}))
(b-loop))))))
(b-loop)
receiver))))
;; Unary message: <primary> ident* (ident NOT followed by ':')
(define
parse-unary-message
(fn
()
(let
((receiver (parse-primary)))
(begin
(define
u-loop
(fn
()
(when
(and
(at-type? "ident")
(let
((nxt (peek-tok-at 1)))
(not (= (st-tok-type nxt) "assign"))))
(let ((t (peek-tok)))
(begin
(advance-tok!)
(set!
receiver
{:type "send"
:receiver receiver
:selector (st-tok-value t)
:args (list)})
(u-loop))))))
(u-loop)
receiver))))
;; Parse a single pragma: `<keyword: literal (keyword: literal)* >`
;; Returns {:selector "primitive:" :args (list literal-asts)}.
(define
parse-pragma
(fn
()
(begin
(consume! "binary" "<")
(let
((sel-parts (list)) (args (list)))
(begin
(define
pr-loop
(fn
()
(when
(at-type? "keyword")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! sel-parts (st-tok-value t))
(append! args (parse-pragma-arg))
(pr-loop))))))
(pr-loop)
(consume! "binary" ">")
{:selector (join "" sel-parts) :args args})))))
;; Pragma arguments are literals only.
(define
parse-pragma-arg
(fn
()
(let
((t (peek-tok)))
(let
((ty (st-tok-type t)) (v (st-tok-value t)))
(cond
((= ty "number")
(begin
(advance-tok!)
{:type (if (integer? v) "lit-int" "lit-float") :value v}))
((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v}))
((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v}))
((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "ident")
(begin
(advance-tok!)
(cond
((= v "nil") {:type "lit-nil"})
((= v "true") {:type "lit-true"})
((= v "false") {:type "lit-false"})
(else (error (str "st-parse: pragma arg must be literal, got ident " v))))))
((and (= ty "binary") (= v "-")
(= (st-tok-type (peek-tok-at 1)) "number"))
(let ((n (st-tok-value (peek-tok-at 1))))
(begin
(advance-tok!)
(advance-tok!)
{:type (if (integer? n) "lit-int" "lit-float")
:value (- 0 n)})))
(else
(error
(str "st-parse: pragma arg must be literal, got " ty))))))))
;; Method header: unary | binary arg | (kw arg)+
(define
parse-method
(fn
()
(let
((sel "")
(params (list))
(temps (list))
(pragmas (list))
(body (list)))
(begin
(cond
;; Unary header
((at-type? "ident")
(let ((t (peek-tok)))
(begin (advance-tok!) (set! sel (st-tok-value t)))))
;; Binary header: binop ident
((at-type? "binary")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(set! sel (st-tok-value t))
(let ((p (consume! "ident" nil)))
(append! params (st-tok-value p))))))
;; Keyword header: (kw ident)+
((at-type? "keyword")
(let ((sel-parts (list)))
(begin
(define
kh-loop
(fn
()
(when
(at-type? "keyword")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! sel-parts (st-tok-value t))
(let ((p (consume! "ident" nil)))
(append! params (st-tok-value p)))
(kh-loop))))))
(kh-loop)
(set! sel (join "" sel-parts)))))
(else
(error
(str
"st-parse-method: expected selector header, got "
(st-tok-type (peek-tok))))))
;; Pragmas and temps may appear in either order. Allow many
;; pragmas; one temps section.
(define
parse-temps!
(fn
()
(begin
(advance-tok!)
(define
th-loop
(fn
()
(when
(at-type? "ident")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! temps (st-tok-value t))
(th-loop))))))
(th-loop)
(consume! "bar" nil))))
(define
pt-loop
(fn
()
(cond
((and
(at? "binary" "<")
(= (st-tok-type (peek-tok-at 1)) "keyword"))
(begin (append! pragmas (parse-pragma)) (pt-loop)))
((and (at? "bar" nil) (= (len temps) 0))
(begin (parse-temps!) (pt-loop)))
(else nil))))
(pt-loop)
;; Body statements
(set! body (parse-statements "eof"))
{:type "method"
:selector sel
:params params
:temps temps
:pragmas pragmas
:body body}))))
;; Top-level program: optional temp declaration, then statements
;; separated by '.'. Pharo workspace-style scripts allow
;; `| temps | body...` at the top level.
(cond
((= mode "expr") (parse-expression))
((= mode "method") (parse-method))
(else
(let ((temps (list)))
(begin
(when
(at? "bar" nil)
(begin
(advance-tok!)
(define
tt-loop
(fn
()
(when
(at-type? "ident")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! temps (st-tok-value t))
(tt-loop))))))
(tt-loop)
(consume! "bar" nil)))
{:type "seq" :temps temps :exprs (parse-statements "eof")}))))))))