smalltalk: chunk-stream parser + pragmas + 21 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled

This commit is contained in:
2026-04-25 01:11:44 +00:00
parent 33ce994f23
commit e71154f9c6
4 changed files with 583 additions and 20 deletions

View File

@@ -21,6 +21,185 @@
;;
;; A "chunk" / class-definition stream is parsed at a higher level (deferred).
;; ── Chunk-stream reader ────────────────────────────────────────────────
;; Pharo chunk format: chunks are separated by `!`. A doubled `!!` inside a
;; chunk represents a single literal `!`. Returns list of chunk strings with
;; surrounding whitespace trimmed.
(define
st-read-chunks
(fn
(src)
(let
((chunks (list))
(buf (list))
(pos 0)
(n (len src)))
(begin
(define
flush!
(fn
()
(let
((s (st-trim (join "" buf))))
(begin (append! chunks s) (set! buf (list))))))
(define
rc-loop
(fn
()
(when
(< pos n)
(let
((c (nth src pos)))
(cond
((= c "!")
(cond
((and (< (+ pos 1) n) (= (nth src (+ pos 1)) "!"))
(begin (append! buf "!") (set! pos (+ pos 2)) (rc-loop)))
(else
(begin (flush!) (set! pos (+ pos 1)) (rc-loop)))))
(else
(begin (append! buf c) (set! pos (+ pos 1)) (rc-loop))))))))
(rc-loop)
;; trailing text without a closing `!` — preserve as a chunk
(when (> (len buf) 0) (flush!))
chunks))))
(define
st-trim
(fn
(s)
(let
((n (len s)) (i 0) (j 0))
(begin
(set! j n)
(define
tl-loop
(fn
()
(when
(and (< i n) (st-trim-ws? (nth s i)))
(begin (set! i (+ i 1)) (tl-loop)))))
(tl-loop)
(define
tr-loop
(fn
()
(when
(and (> j i) (st-trim-ws? (nth s (- j 1))))
(begin (set! j (- j 1)) (tr-loop)))))
(tr-loop)
(slice s i j)))))
(define
st-trim-ws?
(fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; Parse a chunk stream. Walks chunks and applies the Pharo file-in
;; convention: a chunk that evaluates to "X methodsFor: 'cat'" or
;; "X class methodsFor: 'cat'" enters a methods batch — subsequent chunks
;; are method source until an empty chunk closes the batch.
;;
;; Returns list of entries:
;; {:kind "expr" :ast EXPR-AST}
;; {:kind "method" :class CLS :class-side? BOOL :category CAT :ast METHOD-AST}
;; {:kind "blank"} (empty chunks outside a methods batch)
;; {:kind "end-methods"} (empty chunk closing a methods batch)
(define
st-parse-chunks
(fn
(src)
(let
((chunks (st-read-chunks src))
(entries (list))
(mode "do-it")
(cls-name nil)
(class-side? false)
(category nil))
(begin
(for-each
(fn
(chunk)
(cond
((= chunk "")
(cond
((= mode "methods")
(begin
(append! entries {:kind "end-methods"})
(set! mode "do-it")
(set! cls-name nil)
(set! class-side? false)
(set! category nil)))
(else (append! entries {:kind "blank"}))))
((= mode "methods")
(append!
entries
{:kind "method"
:class cls-name
:class-side? class-side?
:category category
:ast (st-parse-method chunk)}))
(else
(let
((ast (st-parse-expr chunk)))
(begin
(append! entries {:kind "expr" :ast ast})
(let
((mf (st-detect-methods-for ast)))
(when
(not (= mf nil))
(begin
(set! mode "methods")
(set! cls-name (get mf :class))
(set! class-side? (get mf :class-side?))
(set! category (get mf :category))))))))))
chunks)
entries))))
;; Recognise `Foo methodsFor: 'cat'` (and related) as starting a methods batch.
;; Returns nil if the AST doesn't look like one of these forms.
(define
st-detect-methods-for
(fn
(ast)
(cond
((not (= (get ast :type) "send")) nil)
((not (st-is-methods-for-selector? (get ast :selector))) nil)
(else
(let
((recv (get ast :receiver)) (args (get ast :args)))
(let
((cat-arg (if (> (len args) 0) (nth args 0) nil)))
(let
((category
(cond
((= cat-arg nil) nil)
((= (get cat-arg :type) "lit-string") (get cat-arg :value))
((= (get cat-arg :type) "lit-symbol") (get cat-arg :value))
(else nil))))
(cond
((= (get recv :type) "ident")
{:class (get recv :name)
:class-side? false
:category category})
;; `Foo class methodsFor: 'cat'` — recv is a unary send `Foo class`
((and
(= (get recv :type) "send")
(= (get recv :selector) "class")
(= (get (get recv :receiver) :type) "ident"))
{:class (get (get recv :receiver) :name)
:class-side? true
:category category})
(else nil)))))))))
(define
st-is-methods-for-selector?
(fn
(sel)
(or
(= sel "methodsFor:")
(= sel "methodsFor:stamp:")
(= sel "category:"))))
(define st-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
(define st-tok-value (fn (t) (if (= t nil) nil (get t :value))))
@@ -548,13 +727,81 @@
(u-loop)
receiver))))
;; Parse a single pragma: `<keyword: literal (keyword: literal)* >`
;; Returns {:selector "primitive:" :args (list literal-asts)}.
(define
parse-pragma
(fn
()
(begin
(consume! "binary" "<")
(let
((sel-parts (list)) (args (list)))
(begin
(define
pr-loop
(fn
()
(when
(at-type? "keyword")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! sel-parts (st-tok-value t))
(append! args (parse-pragma-arg))
(pr-loop))))))
(pr-loop)
(consume! "binary" ">")
{:selector (join "" sel-parts) :args args})))))
;; Pragma arguments are literals only.
(define
parse-pragma-arg
(fn
()
(let
((t (peek-tok)))
(let
((ty (st-tok-type t)) (v (st-tok-value t)))
(cond
((= ty "number")
(begin
(advance-tok!)
{:type (if (integer? v) "lit-int" "lit-float") :value v}))
((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v}))
((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v}))
((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "ident")
(begin
(advance-tok!)
(cond
((= v "nil") {:type "lit-nil"})
((= v "true") {:type "lit-true"})
((= v "false") {:type "lit-false"})
(else (error (str "st-parse: pragma arg must be literal, got ident " v))))))
((and (= ty "binary") (= v "-")
(= (st-tok-type (peek-tok-at 1)) "number"))
(let ((n (st-tok-value (peek-tok-at 1))))
(begin
(advance-tok!)
(advance-tok!)
{:type (if (integer? n) "lit-int" "lit-float")
:value (- 0 n)})))
(else
(error
(str "st-parse: pragma arg must be literal, got " ty))))))))
;; Method header: unary | binary arg | (kw arg)+
(define
parse-method
(fn
()
(let
((sel "") (params (list)) (temps (list)) (body (list)))
((sel "")
(params (list))
(temps (list))
(pragmas (list))
(body (list)))
(begin
(cond
;; Unary header
@@ -593,30 +840,47 @@
(str
"st-parse-method: expected selector header, got "
(st-tok-type (peek-tok))))))
;; Optional temps: | t1 t2 |
(when
(at? "bar" nil)
(begin
(advance-tok!)
(define
th-loop
(fn
()
(when
(at-type? "ident")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! temps (st-tok-value t))
(th-loop))))))
(th-loop)
(consume! "bar" nil)))
;; Pragmas and temps may appear in either order. Allow many
;; pragmas; one temps section.
(define
parse-temps!
(fn
()
(begin
(advance-tok!)
(define
th-loop
(fn
()
(when
(at-type? "ident")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! temps (st-tok-value t))
(th-loop))))))
(th-loop)
(consume! "bar" nil))))
(define
pt-loop
(fn
()
(cond
((and
(at? "binary" "<")
(= (st-tok-type (peek-tok-at 1)) "keyword"))
(begin (append! pragmas (parse-pragma)) (pt-loop)))
((and (at? "bar" nil) (= (len temps) 0))
(begin (parse-temps!) (pt-loop)))
(else nil))))
(pt-loop)
;; Body statements
(set! body (parse-statements "eof"))
{:type "method"
:selector sel
:params params
:temps temps
:pragmas pragmas
:body body}))))
;; Top-level program: statements separated by '.'