;; Smalltalk parser — produces an AST from the tokenizer's token stream. ;; ;; AST node shapes (dicts): ;; {:type "lit-int" :value N} integer ;; {:type "lit-float" :value F} float ;; {:type "lit-string" :value S} string ;; {:type "lit-char" :value C} character ;; {:type "lit-symbol" :value S} symbol literal (#foo) ;; {:type "lit-array" :elements (list ...)} literal array (#(1 2 #foo)) ;; {:type "lit-byte-array" :elements (...)} byte array (#[1 2 3]) ;; {:type "lit-nil" } / "lit-true" / "lit-false" ;; {:type "ident" :name "x"} variable reference ;; {:type "self"} / "super" / "thisContext" pseudo-variables ;; {:type "assign" :name "x" :expr E} x := E ;; {:type "return" :expr E} ^ E ;; {:type "send" :receiver R :selector S :args (list ...)} ;; {:type "cascade" :receiver R :messages (list {:selector :args} ...)} ;; {:type "block" :params (list "a") :temps (list "t") :body (list expr)} ;; {:type "seq" :exprs (list ...)} statement sequence ;; {:type "method" :selector S :params (list ...) :temps (list ...) :body (list ...) :pragmas (list ...)} ;; ;; A "chunk" / class-definition stream is parsed at a higher level (deferred). ;; ── Chunk-stream reader ──────────────────────────────────────────────── ;; Pharo chunk format: chunks are separated by `!`. A doubled `!!` inside a ;; chunk represents a single literal `!`. Returns list of chunk strings with ;; surrounding whitespace trimmed. (define st-read-chunks (fn (src) (let ((chunks (list)) (buf (list)) (pos 0) (n (len src))) (begin (define flush! (fn () (let ((s (st-trim (join "" buf)))) (begin (append! chunks s) (set! buf (list)))))) (define rc-loop (fn () (when (< pos n) (let ((c (nth src pos))) (cond ((= c "!") (cond ((and (< (+ pos 1) n) (= (nth src (+ pos 1)) "!")) (begin (append! buf "!") (set! pos (+ pos 2)) (rc-loop))) (else (begin (flush!) (set! pos (+ pos 1)) (rc-loop))))) (else (begin (append! buf c) (set! pos (+ pos 1)) (rc-loop)))))))) (rc-loop) ;; trailing text without a closing `!` — preserve as a chunk (when (> (len buf) 0) (flush!)) chunks)))) (define st-trim (fn (s) (let ((n (len s)) (i 0) (j 0)) (begin (set! j n) (define tl-loop (fn () (when (and (< i n) (st-trim-ws? (nth s i))) (begin (set! i (+ i 1)) (tl-loop))))) (tl-loop) (define tr-loop (fn () (when (and (> j i) (st-trim-ws? (nth s (- j 1)))) (begin (set! j (- j 1)) (tr-loop))))) (tr-loop) (slice s i j))))) (define st-trim-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r")))) ;; Parse a chunk stream. Walks chunks and applies the Pharo file-in ;; convention: a chunk that evaluates to "X methodsFor: 'cat'" or ;; "X class methodsFor: 'cat'" enters a methods batch — subsequent chunks ;; are method source until an empty chunk closes the batch. ;; ;; Returns list of entries: ;; {:kind "expr" :ast EXPR-AST} ;; {:kind "method" :class CLS :class-side? BOOL :category CAT :ast METHOD-AST} ;; {:kind "blank"} (empty chunks outside a methods batch) ;; {:kind "end-methods"} (empty chunk closing a methods batch) (define st-parse-chunks (fn (src) (let ((chunks (st-read-chunks src)) (entries (list)) (mode "do-it") (cls-name nil) (class-side? false) (category nil)) (begin (for-each (fn (chunk) (cond ((= chunk "") (cond ((= mode "methods") (begin (append! entries {:kind "end-methods"}) (set! mode "do-it") (set! cls-name nil) (set! class-side? false) (set! category nil))) (else (append! entries {:kind "blank"})))) ((= mode "methods") (append! entries {:kind "method" :class cls-name :class-side? class-side? :category category :ast (st-parse-method chunk)})) (else (let ((ast (st-parse-expr chunk))) (begin (append! entries {:kind "expr" :ast ast}) (let ((mf (st-detect-methods-for ast))) (when (not (= mf nil)) (begin (set! mode "methods") (set! cls-name (get mf :class)) (set! class-side? (get mf :class-side?)) (set! category (get mf :category)))))))))) chunks) entries)))) ;; Recognise `Foo methodsFor: 'cat'` (and related) as starting a methods batch. ;; Returns nil if the AST doesn't look like one of these forms. (define st-detect-methods-for (fn (ast) (cond ((not (= (get ast :type) "send")) nil) ((not (st-is-methods-for-selector? (get ast :selector))) nil) (else (let ((recv (get ast :receiver)) (args (get ast :args))) (let ((cat-arg (if (> (len args) 0) (nth args 0) nil))) (let ((category (cond ((= cat-arg nil) nil) ((= (get cat-arg :type) "lit-string") (get cat-arg :value)) ((= (get cat-arg :type) "lit-symbol") (get cat-arg :value)) (else nil)))) (cond ((= (get recv :type) "ident") {:class (get recv :name) :class-side? false :category category}) ;; `Foo class methodsFor: 'cat'` — recv is a unary send `Foo class` ((and (= (get recv :type) "send") (= (get recv :selector) "class") (= (get (get recv :receiver) :type) "ident")) {:class (get (get recv :receiver) :name) :class-side? true :category category}) (else nil))))))))) (define st-is-methods-for-selector? (fn (sel) (or (= sel "methodsFor:") (= sel "methodsFor:stamp:") (= sel "category:")))) (define st-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) (define st-tok-value (fn (t) (if (= t nil) nil (get t :value)))) ;; Parse a *single* Smalltalk expression from source. (define st-parse-expr (fn (src) (st-parse-with src "expr"))) ;; Parse a sequence of statements separated by '.' Returns a {:type "seq"} node. (define st-parse (fn (src) (st-parse-with src "seq"))) ;; Parse a method body — `selector params | temps | body`. ;; Only the "method header + body" form (no chunk delimiters). (define st-parse-method (fn (src) (st-parse-with src "method"))) (define st-parse-with (fn (src mode) (let ((tokens (st-tokenize src)) (idx 0) (tok-len 0)) (begin (set! tok-len (len tokens)) (define peek-tok (fn () (nth tokens idx))) (define peek-tok-at (fn (n) (if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil))) (define advance-tok! (fn () (set! idx (+ idx 1)))) (define at? (fn (type value) (let ((t (peek-tok))) (and (= (st-tok-type t) type) (or (= value nil) (= (st-tok-value t) value)))))) (define at-type? (fn (type) (= (st-tok-type (peek-tok)) type))) (define consume! (fn (type value) (if (at? type value) (let ((t (peek-tok))) (begin (advance-tok!) t)) (error (str "st-parse: expected " type (if (= value nil) "" (str " '" value "'")) " got " (st-tok-type (peek-tok)) " '" (st-tok-value (peek-tok)) "' at idx " idx))))) ;; ── Primary: atoms, paren'd expr, blocks, literal arrays, byte arrays. (define parse-primary (fn () (let ((t (peek-tok))) (let ((ty (st-tok-type t)) (v (st-tok-value t))) (cond ((= ty "number") (begin (advance-tok!) (cond ((number? v) {:type (if (integer? v) "lit-int" "lit-float") :value v}) (else {:type "lit-int" :value v})))) ((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v})) ((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v})) ((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v})) ((= ty "array-open") (parse-literal-array)) ((= ty "byte-array-open") (parse-byte-array)) ((= ty "lparen") (begin (advance-tok!) (let ((e (parse-expression))) (begin (consume! "rparen" nil) e)))) ((= ty "lbracket") (parse-block)) ((= ty "lbrace") (parse-dynamic-array)) ((= ty "ident") (begin (advance-tok!) (cond ((= v "nil") {:type "lit-nil"}) ((= v "true") {:type "lit-true"}) ((= v "false") {:type "lit-false"}) ((= v "self") {:type "self"}) ((= v "super") {:type "super"}) ((= v "thisContext") {:type "thisContext"}) (else {:type "ident" :name v})))) ((= ty "binary") ;; Negative numeric literal: '-' immediately before a number. (cond ((and (= v "-") (= (st-tok-type (peek-tok-at 1)) "number")) (let ((n (st-tok-value (peek-tok-at 1)))) (begin (advance-tok!) (advance-tok!) (cond ((dict? n) {:type "lit-int" :value n}) ((integer? n) {:type "lit-int" :value (- 0 n)}) (else {:type "lit-float" :value (- 0 n)}))))) (else (error (str "st-parse: unexpected binary '" v "' at idx " idx))))) (else (error (str "st-parse: unexpected " ty " '" v "' at idx " idx)))))))) ;; #(elem elem ...) — elements are atoms or nested parenthesised arrays. (define parse-literal-array (fn () (let ((items (list))) (begin (consume! "array-open" nil) (define arr-loop (fn () (cond ((at? "rparen" nil) (advance-tok!)) (else (begin (append! items (parse-array-element)) (arr-loop)))))) (arr-loop) {:type "lit-array" :elements items})))) ;; { expr. expr. expr } — Pharo dynamic array literal. Each element ;; is a *full expression* evaluated at runtime; the result is a ;; fresh mutable array. Empty `{}` is a 0-length array. (define parse-dynamic-array (fn () (let ((items (list))) (begin (consume! "lbrace" nil) (define da-loop (fn () (cond ((at? "rbrace" nil) (advance-tok!)) (else (begin (append! items (parse-expression)) (define dot-loop (fn () (when (at? "period" nil) (begin (advance-tok!) (dot-loop))))) (dot-loop) (da-loop)))))) (da-loop) {:type "dynamic-array" :elements items})))) ;; #[1 2 3] (define parse-byte-array (fn () (let ((items (list))) (begin (consume! "byte-array-open" nil) (define ba-loop (fn () (cond ((at? "rbracket" nil) (advance-tok!)) (else (let ((t (peek-tok))) (cond ((= (st-tok-type t) "number") (begin (advance-tok!) (append! items (st-tok-value t)) (ba-loop))) (else (error (str "st-parse: byte array expects number, got " (st-tok-type t)))))))))) (ba-loop) {:type "lit-byte-array" :elements items})))) ;; Inside a literal array: bare idents become symbols, nested (...) is a sub-array. (define parse-array-element (fn () (let ((t (peek-tok))) (let ((ty (st-tok-type t)) (v (st-tok-value t))) (cond ((= ty "number") (begin (advance-tok!) {:type "lit-int" :value v})) ((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v})) ((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v})) ((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v})) ((= ty "ident") (begin (advance-tok!) (cond ((= v "nil") {:type "lit-nil"}) ((= v "true") {:type "lit-true"}) ((= v "false") {:type "lit-false"}) (else {:type "lit-symbol" :value v})))) ((= ty "keyword") (begin (advance-tok!) {:type "lit-symbol" :value v})) ((= ty "binary") (begin (advance-tok!) {:type "lit-symbol" :value v})) ((= ty "lparen") (let ((items (list))) (begin (advance-tok!) (define sub-loop (fn () (cond ((at? "rparen" nil) (advance-tok!)) (else (begin (append! items (parse-array-element)) (sub-loop)))))) (sub-loop) {:type "lit-array" :elements items}))) ((= ty "array-open") (parse-literal-array)) ((= ty "byte-array-open") (parse-byte-array)) (else (error (str "st-parse: bad literal-array element " ty " '" v "'")))))))) ;; [:a :b | | t1 t2 | body. body. ...] (define parse-block (fn () (begin (consume! "lbracket" nil) (let ((params (list)) (temps (list))) (begin ;; Block params (define p-loop (fn () (when (at? "colon" nil) (begin (advance-tok!) (let ((t (consume! "ident" nil))) (begin (append! params (st-tok-value t)) (p-loop))))))) (p-loop) (when (> (len params) 0) (consume! "bar" nil)) ;; Block temps: | t1 t2 | (when (and (at? "bar" nil) ;; Not `|` followed immediately by binary content — the only ;; legitimate `|` inside a block here is the temp delimiter. true) (begin (advance-tok!) (define t-loop (fn () (when (at? "ident" nil) (let ((t (peek-tok))) (begin (advance-tok!) (append! temps (st-tok-value t)) (t-loop)))))) (t-loop) (consume! "bar" nil))) ;; Body: statements terminated by `.` or `]` (let ((body (parse-statements "rbracket"))) (begin (consume! "rbracket" nil) {:type "block" :params params :temps temps :body body}))))))) ;; Parse statements up to a closing token (rbracket or eof). Returns list. (define parse-statements (fn (terminator) (let ((stmts (list))) (begin (define s-loop (fn () (cond ((at-type? terminator) nil) ((at-type? "eof") nil) (else (begin (append! stmts (parse-statement)) ;; consume optional period(s) (define dot-loop (fn () (when (at? "period" nil) (begin (advance-tok!) (dot-loop))))) (dot-loop) (s-loop)))))) (s-loop) stmts)))) ;; Statement: ^expr | ident := expr | expr (define parse-statement (fn () (cond ((at? "caret" nil) (begin (advance-tok!) {:type "return" :expr (parse-expression)})) ((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign")) (let ((name-tok (peek-tok))) (begin (advance-tok!) (advance-tok!) {:type "assign" :name (st-tok-value name-tok) :expr (parse-expression)}))) (else (parse-expression))))) ;; Top-level expression. Assignment (right-associative chain) sits at ;; the top; cascade is below. (define parse-expression (fn () (cond ((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign")) (let ((name-tok (peek-tok))) (begin (advance-tok!) (advance-tok!) {:type "assign" :name (st-tok-value name-tok) :expr (parse-expression)}))) (else (parse-cascade))))) (define parse-cascade (fn () (let ((head (parse-keyword-message))) (cond ((at? "semi" nil) (let ((receiver (cascade-receiver head)) (first-msg (cascade-first-message head)) (msgs (list))) (begin (append! msgs first-msg) (define c-loop (fn () (when (at? "semi" nil) (begin (advance-tok!) (append! msgs (parse-cascade-message)) (c-loop))))) (c-loop) {:type "cascade" :receiver receiver :messages msgs}))) (else head))))) ;; Extract the receiver from a head send so cascades share it. (define cascade-receiver (fn (head) (cond ((= (get head :type) "send") (get head :receiver)) (else head)))) (define cascade-first-message (fn (head) (cond ((= (get head :type) "send") {:selector (get head :selector) :args (get head :args)}) (else ;; Shouldn't happen — cascade requires at least one prior message. (error "st-parse: cascade with no prior message"))))) ;; Subsequent cascade message (after the `;`): unary | binary | keyword (define parse-cascade-message (fn () (cond ((at-type? "ident") (let ((t (peek-tok))) (begin (advance-tok!) {:selector (st-tok-value t) :args (list)}))) ((at-type? "binary") (let ((t (peek-tok))) (begin (advance-tok!) (let ((arg (parse-unary-message))) {:selector (st-tok-value t) :args (list arg)})))) ((at-type? "keyword") (let ((sel-parts (list)) (args (list))) (begin (define kw-loop (fn () (when (at-type? "keyword") (let ((t (peek-tok))) (begin (advance-tok!) (append! sel-parts (st-tok-value t)) (append! args (parse-binary-message)) (kw-loop)))))) (kw-loop) {:selector (join "" sel-parts) :args args}))) (else (error (str "st-parse: bad cascade message at idx " idx)))))) ;; Keyword message: (kw )+ (define parse-keyword-message (fn () (let ((receiver (parse-binary-message))) (cond ((at-type? "keyword") (let ((sel-parts (list)) (args (list))) (begin (define kw-loop (fn () (when (at-type? "keyword") (let ((t (peek-tok))) (begin (advance-tok!) (append! sel-parts (st-tok-value t)) (append! args (parse-binary-message)) (kw-loop)))))) (kw-loop) {:type "send" :receiver receiver :selector (join "" sel-parts) :args args}))) (else receiver))))) ;; Binary message: (binop )* ;; A bare `|` is also a legitimate binary selector (logical or in ;; some Smalltalks); the tokenizer emits it as the `bar` type so ;; that block-param / temp-decl delimiters are easy to spot. ;; In expression position, accept it as a binary operator. (define parse-binary-message (fn () (let ((receiver (parse-unary-message))) (begin (define b-loop (fn () (when (or (at-type? "binary") (at-type? "bar")) (let ((t (peek-tok))) (begin (advance-tok!) (let ((arg (parse-unary-message))) (set! receiver {:type "send" :receiver receiver :selector (st-tok-value t) :args (list arg)})) (b-loop)))))) (b-loop) receiver)))) ;; Unary message: ident* (ident NOT followed by ':') (define parse-unary-message (fn () (let ((receiver (parse-primary))) (begin (define u-loop (fn () (when (and (at-type? "ident") (let ((nxt (peek-tok-at 1))) (not (= (st-tok-type nxt) "assign")))) (let ((t (peek-tok))) (begin (advance-tok!) (set! receiver {:type "send" :receiver receiver :selector (st-tok-value t) :args (list)}) (u-loop)))))) (u-loop) receiver)))) ;; Parse a single pragma: `` ;; Returns {:selector "primitive:" :args (list literal-asts)}. (define parse-pragma (fn () (begin (consume! "binary" "<") (let ((sel-parts (list)) (args (list))) (begin (define pr-loop (fn () (when (at-type? "keyword") (let ((t (peek-tok))) (begin (advance-tok!) (append! sel-parts (st-tok-value t)) (append! args (parse-pragma-arg)) (pr-loop)))))) (pr-loop) (consume! "binary" ">") {:selector (join "" sel-parts) :args args}))))) ;; Pragma arguments are literals only. (define parse-pragma-arg (fn () (let ((t (peek-tok))) (let ((ty (st-tok-type t)) (v (st-tok-value t))) (cond ((= ty "number") (begin (advance-tok!) {:type (if (integer? v) "lit-int" "lit-float") :value v})) ((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v})) ((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v})) ((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v})) ((= ty "ident") (begin (advance-tok!) (cond ((= v "nil") {:type "lit-nil"}) ((= v "true") {:type "lit-true"}) ((= v "false") {:type "lit-false"}) (else (error (str "st-parse: pragma arg must be literal, got ident " v)))))) ((and (= ty "binary") (= v "-") (= (st-tok-type (peek-tok-at 1)) "number")) (let ((n (st-tok-value (peek-tok-at 1)))) (begin (advance-tok!) (advance-tok!) {:type (if (integer? n) "lit-int" "lit-float") :value (- 0 n)}))) (else (error (str "st-parse: pragma arg must be literal, got " ty)))))))) ;; Method header: unary | binary arg | (kw arg)+ (define parse-method (fn () (let ((sel "") (params (list)) (temps (list)) (pragmas (list)) (body (list))) (begin (cond ;; Unary header ((at-type? "ident") (let ((t (peek-tok))) (begin (advance-tok!) (set! sel (st-tok-value t))))) ;; Binary header: binop ident ((at-type? "binary") (let ((t (peek-tok))) (begin (advance-tok!) (set! sel (st-tok-value t)) (let ((p (consume! "ident" nil))) (append! params (st-tok-value p)))))) ;; Keyword header: (kw ident)+ ((at-type? "keyword") (let ((sel-parts (list))) (begin (define kh-loop (fn () (when (at-type? "keyword") (let ((t (peek-tok))) (begin (advance-tok!) (append! sel-parts (st-tok-value t)) (let ((p (consume! "ident" nil))) (append! params (st-tok-value p))) (kh-loop)))))) (kh-loop) (set! sel (join "" sel-parts))))) (else (error (str "st-parse-method: expected selector header, got " (st-tok-type (peek-tok)))))) ;; Pragmas and temps may appear in either order. Allow many ;; pragmas; one temps section. (define parse-temps! (fn () (begin (advance-tok!) (define th-loop (fn () (when (at-type? "ident") (let ((t (peek-tok))) (begin (advance-tok!) (append! temps (st-tok-value t)) (th-loop)))))) (th-loop) (consume! "bar" nil)))) (define pt-loop (fn () (cond ((and (at? "binary" "<") (= (st-tok-type (peek-tok-at 1)) "keyword")) (begin (append! pragmas (parse-pragma)) (pt-loop))) ((and (at? "bar" nil) (= (len temps) 0)) (begin (parse-temps!) (pt-loop))) (else nil)))) (pt-loop) ;; Body statements (set! body (parse-statements "eof")) {:type "method" :selector sel :params params :temps temps :pragmas pragmas :body body})))) ;; Top-level program: optional temp declaration, then statements ;; separated by '.'. Pharo workspace-style scripts allow ;; `| temps | body...` at the top level. (cond ((= mode "expr") (parse-expression)) ((= mode "method") (parse-method)) (else (let ((temps (list))) (begin (when (at? "bar" nil) (begin (advance-tok!) (define tt-loop (fn () (when (at-type? "ident") (let ((t (peek-tok))) (begin (advance-tok!) (append! temps (st-tok-value t)) (tt-loop)))))) (tt-loop) (consume! "bar" nil))) {:type "seq" :temps temps :exprs (parse-statements "eof")}))))))))