Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
628 lines
23 KiB
Plaintext
628 lines
23 KiB
Plaintext
;; Smalltalk parser — produces an AST from the tokenizer's token stream.
|
|
;;
|
|
;; AST node shapes (dicts):
|
|
;; {:type "lit-int" :value N} integer
|
|
;; {:type "lit-float" :value F} float
|
|
;; {:type "lit-string" :value S} string
|
|
;; {:type "lit-char" :value C} character
|
|
;; {:type "lit-symbol" :value S} symbol literal (#foo)
|
|
;; {:type "lit-array" :elements (list ...)} literal array (#(1 2 #foo))
|
|
;; {:type "lit-byte-array" :elements (...)} byte array (#[1 2 3])
|
|
;; {:type "lit-nil" } / "lit-true" / "lit-false"
|
|
;; {:type "ident" :name "x"} variable reference
|
|
;; {:type "self"} / "super" / "thisContext" pseudo-variables
|
|
;; {:type "assign" :name "x" :expr E} x := E
|
|
;; {:type "return" :expr E} ^ E
|
|
;; {:type "send" :receiver R :selector S :args (list ...)}
|
|
;; {:type "cascade" :receiver R :messages (list {:selector :args} ...)}
|
|
;; {:type "block" :params (list "a") :temps (list "t") :body (list expr)}
|
|
;; {:type "seq" :exprs (list ...)} statement sequence
|
|
;; {:type "method" :selector S :params (list ...) :temps (list ...) :body (list ...) :pragmas (list ...)}
|
|
;;
|
|
;; A "chunk" / class-definition stream is parsed at a higher level (deferred).
|
|
|
|
(define st-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
|
|
|
|
(define st-tok-value (fn (t) (if (= t nil) nil (get t :value))))
|
|
|
|
;; Parse a *single* Smalltalk expression from source.
|
|
(define st-parse-expr (fn (src) (st-parse-with src "expr")))
|
|
|
|
;; Parse a sequence of statements separated by '.' Returns a {:type "seq"} node.
|
|
(define st-parse (fn (src) (st-parse-with src "seq")))
|
|
|
|
;; Parse a method body — `selector params | temps | body`.
|
|
;; Only the "method header + body" form (no chunk delimiters).
|
|
(define st-parse-method (fn (src) (st-parse-with src "method")))
|
|
|
|
(define
|
|
st-parse-with
|
|
(fn
|
|
(src mode)
|
|
(let
|
|
((tokens (st-tokenize src)) (idx 0) (tok-len 0))
|
|
(begin
|
|
(set! tok-len (len tokens))
|
|
(define peek-tok (fn () (nth tokens idx)))
|
|
(define
|
|
peek-tok-at
|
|
(fn (n) (if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil)))
|
|
(define advance-tok! (fn () (set! idx (+ idx 1))))
|
|
(define
|
|
at?
|
|
(fn
|
|
(type value)
|
|
(let
|
|
((t (peek-tok)))
|
|
(and
|
|
(= (st-tok-type t) type)
|
|
(or (= value nil) (= (st-tok-value t) value))))))
|
|
(define at-type? (fn (type) (= (st-tok-type (peek-tok)) type)))
|
|
(define
|
|
consume!
|
|
(fn
|
|
(type value)
|
|
(if
|
|
(at? type value)
|
|
(let ((t (peek-tok))) (begin (advance-tok!) t))
|
|
(error
|
|
(str
|
|
"st-parse: expected "
|
|
type
|
|
(if (= value nil) "" (str " '" value "'"))
|
|
" got "
|
|
(st-tok-type (peek-tok))
|
|
" '"
|
|
(st-tok-value (peek-tok))
|
|
"' at idx "
|
|
idx)))))
|
|
|
|
;; ── Primary: atoms, paren'd expr, blocks, literal arrays, byte arrays.
|
|
(define
|
|
parse-primary
|
|
(fn
|
|
()
|
|
(let
|
|
((t (peek-tok)))
|
|
(let
|
|
((ty (st-tok-type t)) (v (st-tok-value t)))
|
|
(cond
|
|
((= ty "number")
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((number? v) {:type (if (integer? v) "lit-int" "lit-float") :value v})
|
|
(else {:type "lit-int" :value v}))))
|
|
((= ty "string")
|
|
(begin (advance-tok!) {:type "lit-string" :value v}))
|
|
((= ty "char")
|
|
(begin (advance-tok!) {:type "lit-char" :value v}))
|
|
((= ty "symbol")
|
|
(begin (advance-tok!) {:type "lit-symbol" :value v}))
|
|
((= ty "array-open") (parse-literal-array))
|
|
((= ty "byte-array-open") (parse-byte-array))
|
|
((= ty "lparen")
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((e (parse-expression)))
|
|
(begin (consume! "rparen" nil) e))))
|
|
((= ty "lbracket") (parse-block))
|
|
((= ty "ident")
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((= v "nil") {:type "lit-nil"})
|
|
((= v "true") {:type "lit-true"})
|
|
((= v "false") {:type "lit-false"})
|
|
((= v "self") {:type "self"})
|
|
((= v "super") {:type "super"})
|
|
((= v "thisContext") {:type "thisContext"})
|
|
(else {:type "ident" :name v}))))
|
|
((= ty "binary")
|
|
;; Negative numeric literal: '-' immediately before a number.
|
|
(cond
|
|
((and (= v "-") (= (st-tok-type (peek-tok-at 1)) "number"))
|
|
(let
|
|
((n (st-tok-value (peek-tok-at 1))))
|
|
(begin
|
|
(advance-tok!)
|
|
(advance-tok!)
|
|
(cond
|
|
((dict? n) {:type "lit-int" :value n})
|
|
((integer? n) {:type "lit-int" :value (- 0 n)})
|
|
(else {:type "lit-float" :value (- 0 n)})))))
|
|
(else
|
|
(error
|
|
(str "st-parse: unexpected binary '" v "' at idx " idx)))))
|
|
(else
|
|
(error
|
|
(str
|
|
"st-parse: unexpected "
|
|
ty
|
|
" '"
|
|
v
|
|
"' at idx "
|
|
idx))))))))
|
|
|
|
;; #(elem elem ...) — elements are atoms or nested parenthesised arrays.
|
|
(define
|
|
parse-literal-array
|
|
(fn
|
|
()
|
|
(let
|
|
((items (list)))
|
|
(begin
|
|
(consume! "array-open" nil)
|
|
(define
|
|
arr-loop
|
|
(fn
|
|
()
|
|
(cond
|
|
((at? "rparen" nil) (advance-tok!))
|
|
(else
|
|
(begin
|
|
(append! items (parse-array-element))
|
|
(arr-loop))))))
|
|
(arr-loop)
|
|
{:type "lit-array" :elements items}))))
|
|
|
|
;; #[1 2 3]
|
|
(define
|
|
parse-byte-array
|
|
(fn
|
|
()
|
|
(let
|
|
((items (list)))
|
|
(begin
|
|
(consume! "byte-array-open" nil)
|
|
(define
|
|
ba-loop
|
|
(fn
|
|
()
|
|
(cond
|
|
((at? "rbracket" nil) (advance-tok!))
|
|
(else
|
|
(let
|
|
((t (peek-tok)))
|
|
(cond
|
|
((= (st-tok-type t) "number")
|
|
(begin
|
|
(advance-tok!)
|
|
(append! items (st-tok-value t))
|
|
(ba-loop)))
|
|
(else
|
|
(error
|
|
(str
|
|
"st-parse: byte array expects number, got "
|
|
(st-tok-type t))))))))))
|
|
(ba-loop)
|
|
{:type "lit-byte-array" :elements items}))))
|
|
|
|
;; Inside a literal array: bare idents become symbols, nested (...) is a sub-array.
|
|
(define
|
|
parse-array-element
|
|
(fn
|
|
()
|
|
(let
|
|
((t (peek-tok)))
|
|
(let
|
|
((ty (st-tok-type t)) (v (st-tok-value t)))
|
|
(cond
|
|
((= ty "number") (begin (advance-tok!) {:type "lit-int" :value v}))
|
|
((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v}))
|
|
((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v}))
|
|
((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v}))
|
|
((= ty "ident")
|
|
(begin
|
|
(advance-tok!)
|
|
(cond
|
|
((= v "nil") {:type "lit-nil"})
|
|
((= v "true") {:type "lit-true"})
|
|
((= v "false") {:type "lit-false"})
|
|
(else {:type "lit-symbol" :value v}))))
|
|
((= ty "keyword") (begin (advance-tok!) {:type "lit-symbol" :value v}))
|
|
((= ty "binary") (begin (advance-tok!) {:type "lit-symbol" :value v}))
|
|
((= ty "lparen")
|
|
(let ((items (list)))
|
|
(begin
|
|
(advance-tok!)
|
|
(define
|
|
sub-loop
|
|
(fn
|
|
()
|
|
(cond
|
|
((at? "rparen" nil) (advance-tok!))
|
|
(else
|
|
(begin (append! items (parse-array-element)) (sub-loop))))))
|
|
(sub-loop)
|
|
{:type "lit-array" :elements items})))
|
|
((= ty "array-open") (parse-literal-array))
|
|
((= ty "byte-array-open") (parse-byte-array))
|
|
(else
|
|
(error
|
|
(str "st-parse: bad literal-array element " ty " '" v "'"))))))))
|
|
|
|
;; [:a :b | | t1 t2 | body. body. ...]
|
|
(define
|
|
parse-block
|
|
(fn
|
|
()
|
|
(begin
|
|
(consume! "lbracket" nil)
|
|
(let
|
|
((params (list)) (temps (list)))
|
|
(begin
|
|
;; Block params
|
|
(define
|
|
p-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at? "colon" nil)
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((t (consume! "ident" nil)))
|
|
(begin
|
|
(append! params (st-tok-value t))
|
|
(p-loop)))))))
|
|
(p-loop)
|
|
(when (> (len params) 0) (consume! "bar" nil))
|
|
;; Block temps: | t1 t2 |
|
|
(when
|
|
(and
|
|
(at? "bar" nil)
|
|
;; Not `|` followed immediately by binary content — the only
|
|
;; legitimate `|` inside a block here is the temp delimiter.
|
|
true)
|
|
(begin
|
|
(advance-tok!)
|
|
(define
|
|
t-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at? "ident" nil)
|
|
(let
|
|
((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(append! temps (st-tok-value t))
|
|
(t-loop))))))
|
|
(t-loop)
|
|
(consume! "bar" nil)))
|
|
;; Body: statements terminated by `.` or `]`
|
|
(let
|
|
((body (parse-statements "rbracket")))
|
|
(begin
|
|
(consume! "rbracket" nil)
|
|
{:type "block" :params params :temps temps :body body})))))))
|
|
|
|
;; Parse statements up to a closing token (rbracket or eof). Returns list.
|
|
(define
|
|
parse-statements
|
|
(fn
|
|
(terminator)
|
|
(let
|
|
((stmts (list)))
|
|
(begin
|
|
(define
|
|
s-loop
|
|
(fn
|
|
()
|
|
(cond
|
|
((at-type? terminator) nil)
|
|
((at-type? "eof") nil)
|
|
(else
|
|
(begin
|
|
(append! stmts (parse-statement))
|
|
;; consume optional period(s)
|
|
(define
|
|
dot-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at? "period" nil)
|
|
(begin (advance-tok!) (dot-loop)))))
|
|
(dot-loop)
|
|
(s-loop))))))
|
|
(s-loop)
|
|
stmts))))
|
|
|
|
;; Statement: ^expr | ident := expr | expr
|
|
(define
|
|
parse-statement
|
|
(fn
|
|
()
|
|
(cond
|
|
((at? "caret" nil)
|
|
(begin
|
|
(advance-tok!)
|
|
{:type "return" :expr (parse-expression)}))
|
|
((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign"))
|
|
(let
|
|
((name-tok (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(advance-tok!)
|
|
{:type "assign"
|
|
:name (st-tok-value name-tok)
|
|
:expr (parse-expression)})))
|
|
(else (parse-expression)))))
|
|
|
|
;; Top-level expression. Assignment (right-associative chain) sits at
|
|
;; the top; cascade is below.
|
|
(define
|
|
parse-expression
|
|
(fn
|
|
()
|
|
(cond
|
|
((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign"))
|
|
(let
|
|
((name-tok (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(advance-tok!)
|
|
{:type "assign"
|
|
:name (st-tok-value name-tok)
|
|
:expr (parse-expression)})))
|
|
(else (parse-cascade)))))
|
|
|
|
(define
|
|
parse-cascade
|
|
(fn
|
|
()
|
|
(let
|
|
((head (parse-keyword-message)))
|
|
(cond
|
|
((at? "semi" nil)
|
|
(let
|
|
((receiver (cascade-receiver head))
|
|
(first-msg (cascade-first-message head))
|
|
(msgs (list)))
|
|
(begin
|
|
(append! msgs first-msg)
|
|
(define
|
|
c-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at? "semi" nil)
|
|
(begin
|
|
(advance-tok!)
|
|
(append! msgs (parse-cascade-message))
|
|
(c-loop)))))
|
|
(c-loop)
|
|
{:type "cascade" :receiver receiver :messages msgs})))
|
|
(else head)))))
|
|
|
|
;; Extract the receiver from a head send so cascades share it.
|
|
(define
|
|
cascade-receiver
|
|
(fn
|
|
(head)
|
|
(cond
|
|
((= (get head :type) "send") (get head :receiver))
|
|
(else head))))
|
|
|
|
(define
|
|
cascade-first-message
|
|
(fn
|
|
(head)
|
|
(cond
|
|
((= (get head :type) "send")
|
|
{:selector (get head :selector) :args (get head :args)})
|
|
(else
|
|
;; Shouldn't happen — cascade requires at least one prior message.
|
|
(error "st-parse: cascade with no prior message")))))
|
|
|
|
;; Subsequent cascade message (after the `;`): unary | binary | keyword
|
|
(define
|
|
parse-cascade-message
|
|
(fn
|
|
()
|
|
(cond
|
|
((at-type? "ident")
|
|
(let ((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
{:selector (st-tok-value t) :args (list)})))
|
|
((at-type? "binary")
|
|
(let ((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((arg (parse-unary-message)))
|
|
{:selector (st-tok-value t) :args (list arg)}))))
|
|
((at-type? "keyword")
|
|
(let
|
|
((sel-parts (list)) (args (list)))
|
|
(begin
|
|
(define
|
|
kw-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-type? "keyword")
|
|
(let ((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(append! sel-parts (st-tok-value t))
|
|
(append! args (parse-binary-message))
|
|
(kw-loop))))))
|
|
(kw-loop)
|
|
{:selector (join "" sel-parts) :args args})))
|
|
(else
|
|
(error
|
|
(str "st-parse: bad cascade message at idx " idx))))))
|
|
|
|
;; Keyword message: <binary> (kw <binary>)+
|
|
(define
|
|
parse-keyword-message
|
|
(fn
|
|
()
|
|
(let
|
|
((receiver (parse-binary-message)))
|
|
(cond
|
|
((at-type? "keyword")
|
|
(let
|
|
((sel-parts (list)) (args (list)))
|
|
(begin
|
|
(define
|
|
kw-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-type? "keyword")
|
|
(let ((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(append! sel-parts (st-tok-value t))
|
|
(append! args (parse-binary-message))
|
|
(kw-loop))))))
|
|
(kw-loop)
|
|
{:type "send"
|
|
:receiver receiver
|
|
:selector (join "" sel-parts)
|
|
:args args})))
|
|
(else receiver)))))
|
|
|
|
;; Binary message: <unary> (binop <unary>)*
|
|
(define
|
|
parse-binary-message
|
|
(fn
|
|
()
|
|
(let
|
|
((receiver (parse-unary-message)))
|
|
(begin
|
|
(define
|
|
b-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-type? "binary")
|
|
(let ((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(let
|
|
((arg (parse-unary-message)))
|
|
(set!
|
|
receiver
|
|
{:type "send"
|
|
:receiver receiver
|
|
:selector (st-tok-value t)
|
|
:args (list arg)}))
|
|
(b-loop))))))
|
|
(b-loop)
|
|
receiver))))
|
|
|
|
;; Unary message: <primary> ident* (ident NOT followed by ':')
|
|
(define
|
|
parse-unary-message
|
|
(fn
|
|
()
|
|
(let
|
|
((receiver (parse-primary)))
|
|
(begin
|
|
(define
|
|
u-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(and
|
|
(at-type? "ident")
|
|
(let
|
|
((nxt (peek-tok-at 1)))
|
|
(not (= (st-tok-type nxt) "assign"))))
|
|
(let ((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(set!
|
|
receiver
|
|
{:type "send"
|
|
:receiver receiver
|
|
:selector (st-tok-value t)
|
|
:args (list)})
|
|
(u-loop))))))
|
|
(u-loop)
|
|
receiver))))
|
|
|
|
;; Method header: unary | binary arg | (kw arg)+
|
|
(define
|
|
parse-method
|
|
(fn
|
|
()
|
|
(let
|
|
((sel "") (params (list)) (temps (list)) (body (list)))
|
|
(begin
|
|
(cond
|
|
;; Unary header
|
|
((at-type? "ident")
|
|
(let ((t (peek-tok)))
|
|
(begin (advance-tok!) (set! sel (st-tok-value t)))))
|
|
;; Binary header: binop ident
|
|
((at-type? "binary")
|
|
(let ((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(set! sel (st-tok-value t))
|
|
(let ((p (consume! "ident" nil)))
|
|
(append! params (st-tok-value p))))))
|
|
;; Keyword header: (kw ident)+
|
|
((at-type? "keyword")
|
|
(let ((sel-parts (list)))
|
|
(begin
|
|
(define
|
|
kh-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-type? "keyword")
|
|
(let ((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(append! sel-parts (st-tok-value t))
|
|
(let ((p (consume! "ident" nil)))
|
|
(append! params (st-tok-value p)))
|
|
(kh-loop))))))
|
|
(kh-loop)
|
|
(set! sel (join "" sel-parts)))))
|
|
(else
|
|
(error
|
|
(str
|
|
"st-parse-method: expected selector header, got "
|
|
(st-tok-type (peek-tok))))))
|
|
;; Optional temps: | t1 t2 |
|
|
(when
|
|
(at? "bar" nil)
|
|
(begin
|
|
(advance-tok!)
|
|
(define
|
|
th-loop
|
|
(fn
|
|
()
|
|
(when
|
|
(at-type? "ident")
|
|
(let ((t (peek-tok)))
|
|
(begin
|
|
(advance-tok!)
|
|
(append! temps (st-tok-value t))
|
|
(th-loop))))))
|
|
(th-loop)
|
|
(consume! "bar" nil)))
|
|
;; Body statements
|
|
(set! body (parse-statements "eof"))
|
|
{:type "method"
|
|
:selector sel
|
|
:params params
|
|
:temps temps
|
|
:body body}))))
|
|
|
|
;; Top-level program: statements separated by '.'
|
|
(cond
|
|
((= mode "expr") (parse-expression))
|
|
((= mode "method") (parse-method))
|
|
(else
|
|
{:type "seq" :exprs (parse-statements "eof")}))))))
|