smalltalk: chunk-stream parser + pragmas + 21 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled

This commit is contained in:
2026-04-25 01:11:44 +00:00
parent 33ce994f23
commit e71154f9c6
4 changed files with 583 additions and 20 deletions

View File

@@ -21,6 +21,185 @@
;;
;; A "chunk" / class-definition stream is parsed at a higher level (deferred).
;; ── Chunk-stream reader ────────────────────────────────────────────────
;; Pharo chunk format: chunks are separated by `!`. A doubled `!!` inside a
;; chunk represents a single literal `!`. Returns list of chunk strings with
;; surrounding whitespace trimmed.
(define
st-read-chunks
(fn
(src)
(let
((chunks (list))
(buf (list))
(pos 0)
(n (len src)))
(begin
(define
flush!
(fn
()
(let
((s (st-trim (join "" buf))))
(begin (append! chunks s) (set! buf (list))))))
(define
rc-loop
(fn
()
(when
(< pos n)
(let
((c (nth src pos)))
(cond
((= c "!")
(cond
((and (< (+ pos 1) n) (= (nth src (+ pos 1)) "!"))
(begin (append! buf "!") (set! pos (+ pos 2)) (rc-loop)))
(else
(begin (flush!) (set! pos (+ pos 1)) (rc-loop)))))
(else
(begin (append! buf c) (set! pos (+ pos 1)) (rc-loop))))))))
(rc-loop)
;; trailing text without a closing `!` — preserve as a chunk
(when (> (len buf) 0) (flush!))
chunks))))
(define
st-trim
(fn
(s)
(let
((n (len s)) (i 0) (j 0))
(begin
(set! j n)
(define
tl-loop
(fn
()
(when
(and (< i n) (st-trim-ws? (nth s i)))
(begin (set! i (+ i 1)) (tl-loop)))))
(tl-loop)
(define
tr-loop
(fn
()
(when
(and (> j i) (st-trim-ws? (nth s (- j 1))))
(begin (set! j (- j 1)) (tr-loop)))))
(tr-loop)
(slice s i j)))))
(define
st-trim-ws?
(fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
;; Parse a chunk stream. Walks chunks and applies the Pharo file-in
;; convention: a chunk that evaluates to "X methodsFor: 'cat'" or
;; "X class methodsFor: 'cat'" enters a methods batch — subsequent chunks
;; are method source until an empty chunk closes the batch.
;;
;; Returns list of entries:
;; {:kind "expr" :ast EXPR-AST}
;; {:kind "method" :class CLS :class-side? BOOL :category CAT :ast METHOD-AST}
;; {:kind "blank"} (empty chunks outside a methods batch)
;; {:kind "end-methods"} (empty chunk closing a methods batch)
(define
st-parse-chunks
(fn
(src)
(let
((chunks (st-read-chunks src))
(entries (list))
(mode "do-it")
(cls-name nil)
(class-side? false)
(category nil))
(begin
(for-each
(fn
(chunk)
(cond
((= chunk "")
(cond
((= mode "methods")
(begin
(append! entries {:kind "end-methods"})
(set! mode "do-it")
(set! cls-name nil)
(set! class-side? false)
(set! category nil)))
(else (append! entries {:kind "blank"}))))
((= mode "methods")
(append!
entries
{:kind "method"
:class cls-name
:class-side? class-side?
:category category
:ast (st-parse-method chunk)}))
(else
(let
((ast (st-parse-expr chunk)))
(begin
(append! entries {:kind "expr" :ast ast})
(let
((mf (st-detect-methods-for ast)))
(when
(not (= mf nil))
(begin
(set! mode "methods")
(set! cls-name (get mf :class))
(set! class-side? (get mf :class-side?))
(set! category (get mf :category))))))))))
chunks)
entries))))
;; Recognise `Foo methodsFor: 'cat'` (and related) as starting a methods batch.
;; Returns nil if the AST doesn't look like one of these forms.
(define
st-detect-methods-for
(fn
(ast)
(cond
((not (= (get ast :type) "send")) nil)
((not (st-is-methods-for-selector? (get ast :selector))) nil)
(else
(let
((recv (get ast :receiver)) (args (get ast :args)))
(let
((cat-arg (if (> (len args) 0) (nth args 0) nil)))
(let
((category
(cond
((= cat-arg nil) nil)
((= (get cat-arg :type) "lit-string") (get cat-arg :value))
((= (get cat-arg :type) "lit-symbol") (get cat-arg :value))
(else nil))))
(cond
((= (get recv :type) "ident")
{:class (get recv :name)
:class-side? false
:category category})
;; `Foo class methodsFor: 'cat'` — recv is a unary send `Foo class`
((and
(= (get recv :type) "send")
(= (get recv :selector) "class")
(= (get (get recv :receiver) :type) "ident"))
{:class (get (get recv :receiver) :name)
:class-side? true
:category category})
(else nil)))))))))
(define
st-is-methods-for-selector?
(fn
(sel)
(or
(= sel "methodsFor:")
(= sel "methodsFor:stamp:")
(= sel "category:"))))
(define st-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
(define st-tok-value (fn (t) (if (= t nil) nil (get t :value))))
@@ -548,13 +727,81 @@
(u-loop)
receiver))))
;; Parse a single pragma: `<keyword: literal (keyword: literal)* >`
;; Returns {:selector "primitive:" :args (list literal-asts)}.
(define
parse-pragma
(fn
()
(begin
(consume! "binary" "<")
(let
((sel-parts (list)) (args (list)))
(begin
(define
pr-loop
(fn
()
(when
(at-type? "keyword")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! sel-parts (st-tok-value t))
(append! args (parse-pragma-arg))
(pr-loop))))))
(pr-loop)
(consume! "binary" ">")
{:selector (join "" sel-parts) :args args})))))
;; Pragma arguments are literals only.
(define
parse-pragma-arg
(fn
()
(let
((t (peek-tok)))
(let
((ty (st-tok-type t)) (v (st-tok-value t)))
(cond
((= ty "number")
(begin
(advance-tok!)
{:type (if (integer? v) "lit-int" "lit-float") :value v}))
((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v}))
((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v}))
((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "ident")
(begin
(advance-tok!)
(cond
((= v "nil") {:type "lit-nil"})
((= v "true") {:type "lit-true"})
((= v "false") {:type "lit-false"})
(else (error (str "st-parse: pragma arg must be literal, got ident " v))))))
((and (= ty "binary") (= v "-")
(= (st-tok-type (peek-tok-at 1)) "number"))
(let ((n (st-tok-value (peek-tok-at 1))))
(begin
(advance-tok!)
(advance-tok!)
{:type (if (integer? n) "lit-int" "lit-float")
:value (- 0 n)})))
(else
(error
(str "st-parse: pragma arg must be literal, got " ty))))))))
;; Method header: unary | binary arg | (kw arg)+
(define
parse-method
(fn
()
(let
((sel "") (params (list)) (temps (list)) (body (list)))
((sel "")
(params (list))
(temps (list))
(pragmas (list))
(body (list)))
(begin
(cond
;; Unary header
@@ -593,30 +840,47 @@
(str
"st-parse-method: expected selector header, got "
(st-tok-type (peek-tok))))))
;; Optional temps: | t1 t2 |
(when
(at? "bar" nil)
(begin
(advance-tok!)
(define
th-loop
(fn
()
(when
(at-type? "ident")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! temps (st-tok-value t))
(th-loop))))))
(th-loop)
(consume! "bar" nil)))
;; Pragmas and temps may appear in either order. Allow many
;; pragmas; one temps section.
(define
parse-temps!
(fn
()
(begin
(advance-tok!)
(define
th-loop
(fn
()
(when
(at-type? "ident")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! temps (st-tok-value t))
(th-loop))))))
(th-loop)
(consume! "bar" nil))))
(define
pt-loop
(fn
()
(cond
((and
(at? "binary" "<")
(= (st-tok-type (peek-tok-at 1)) "keyword"))
(begin (append! pragmas (parse-pragma)) (pt-loop)))
((and (at? "bar" nil) (= (len temps) 0))
(begin (parse-temps!) (pt-loop)))
(else nil))))
(pt-loop)
;; Body statements
(set! body (parse-statements "eof"))
{:type "method"
:selector sel
:params params
:temps temps
:pragmas pragmas
:body body}))))
;; Top-level program: statements separated by '.'

View File

@@ -314,6 +314,7 @@
:selector "factorial"
:params (list)
:temps (list)
:pragmas (list)
:body (list
{:type "return"
:expr {:type "send"
@@ -335,6 +336,7 @@
:selector "+"
:params (list "other")
:temps (list)
:pragmas (list)
:body (list {:type "return" :expr {:type "lit-string" :value "plus"}})})
(st-test
@@ -344,6 +346,7 @@
:selector "at:put:"
:params (list "i" "v")
:temps (list)
:pragmas (list)
:body (list {:type "return" :expr {:type "ident" :name "v"}})})
(st-test
@@ -353,6 +356,7 @@
:selector "twice:"
:params (list "x")
:temps (list "t")
:pragmas (list)
:body (list
{:type "assign"
:name "t"

View File

@@ -0,0 +1,294 @@
;; Smalltalk chunk-stream parser + pragma tests.
;;
;; Reuses helpers (st-test, st-deep=?) from tokenize.sx. Counters reset
;; here so this file's summary covers chunk + pragma tests only.
(set! st-test-pass 0)
(set! st-test-fail 0)
(set! st-test-fails (list))
;; ── 1. Raw chunk reader ──
(st-test "empty source" (st-read-chunks "") (list))
(st-test "single chunk" (st-read-chunks "foo!") (list "foo"))
(st-test "two chunks" (st-read-chunks "a! b!") (list "a" "b"))
(st-test "trailing no bang" (st-read-chunks "a! b") (list "a" "b"))
(st-test "empty chunk" (st-read-chunks "a! ! b!") (list "a" "" "b"))
(st-test
"doubled bang escapes"
(st-read-chunks "yes!! no!yes!")
(list "yes! no" "yes"))
(st-test
"whitespace trimmed"
(st-read-chunks " \n hello \n !")
(list "hello"))
;; ── 2. Chunk parser — do-it mode ──
(st-test
"single do-it chunk"
(st-parse-chunks "1 + 2!")
(list
{:kind "expr"
:ast {:type "send"
:receiver {:type "lit-int" :value 1}
:selector "+"
:args (list {:type "lit-int" :value 2})}}))
(st-test
"two do-it chunks"
(st-parse-chunks "x := 1! y := 2!")
(list
{:kind "expr"
:ast {:type "assign" :name "x" :expr {:type "lit-int" :value 1}}}
{:kind "expr"
:ast {:type "assign" :name "y" :expr {:type "lit-int" :value 2}}}))
(st-test
"blank chunk outside methods"
(st-parse-chunks "1! ! 2!")
(list
{:kind "expr" :ast {:type "lit-int" :value 1}}
{:kind "blank"}
{:kind "expr" :ast {:type "lit-int" :value 2}}))
;; ── 3. Methods batch ──
(st-test
"methodsFor opens method batch"
(st-parse-chunks
"Foo methodsFor: 'access'! foo ^ 1! bar ^ 2! !")
(list
{:kind "expr"
:ast {:type "send"
:receiver {:type "ident" :name "Foo"}
:selector "methodsFor:"
:args (list {:type "lit-string" :value "access"})}}
{:kind "method"
:class "Foo"
:class-side? false
:category "access"
:ast {:type "method"
:selector "foo"
:params (list)
:temps (list)
:pragmas (list)
:body (list
{:type "return" :expr {:type "lit-int" :value 1}})}}
{:kind "method"
:class "Foo"
:class-side? false
:category "access"
:ast {:type "method"
:selector "bar"
:params (list)
:temps (list)
:pragmas (list)
:body (list
{:type "return" :expr {:type "lit-int" :value 2}})}}
{:kind "end-methods"}))
(st-test
"class-side methodsFor"
(st-parse-chunks
"Foo class methodsFor: 'creation'! make ^ self new! !")
(list
{:kind "expr"
:ast {:type "send"
:receiver {:type "send"
:receiver {:type "ident" :name "Foo"}
:selector "class"
:args (list)}
:selector "methodsFor:"
:args (list {:type "lit-string" :value "creation"})}}
{:kind "method"
:class "Foo"
:class-side? true
:category "creation"
:ast {:type "method"
:selector "make"
:params (list)
:temps (list)
:pragmas (list)
:body (list
{:type "return"
:expr {:type "send"
:receiver {:type "self"}
:selector "new"
:args (list)}})}}
{:kind "end-methods"}))
(st-test
"method batch returns to do-it after empty chunk"
(st-parse-chunks
"Foo methodsFor: 'a'! m1 ^ 1! ! 99!")
(list
{:kind "expr"
:ast {:type "send"
:receiver {:type "ident" :name "Foo"}
:selector "methodsFor:"
:args (list {:type "lit-string" :value "a"})}}
{:kind "method"
:class "Foo"
:class-side? false
:category "a"
:ast {:type "method"
:selector "m1"
:params (list)
:temps (list)
:pragmas (list)
:body (list
{:type "return" :expr {:type "lit-int" :value 1}})}}
{:kind "end-methods"}
{:kind "expr" :ast {:type "lit-int" :value 99}}))
;; ── 4. Pragmas in method bodies ──
(st-test
"single pragma"
(st-parse-method "primAt: i <primitive: 60> ^ self")
{:type "method"
:selector "primAt:"
:params (list "i")
:temps (list)
:pragmas (list
{:selector "primitive:"
:args (list {:type "lit-int" :value 60})})
:body (list {:type "return" :expr {:type "self"}})})
(st-test
"pragma with two keyword pairs"
(st-parse-method "fft <primitive: 1 module: 'fft'> ^ nil")
{:type "method"
:selector "fft"
:params (list)
:temps (list)
:pragmas (list
{:selector "primitive:module:"
:args (list
{:type "lit-int" :value 1}
{:type "lit-string" :value "fft"})})
:body (list {:type "return" :expr {:type "lit-nil"}})})
(st-test
"pragma with negative number"
(st-parse-method "neg <primitive: -1> ^ nil")
{:type "method"
:selector "neg"
:params (list)
:temps (list)
:pragmas (list
{:selector "primitive:"
:args (list {:type "lit-int" :value -1})})
:body (list {:type "return" :expr {:type "lit-nil"}})})
(st-test
"pragma with symbol arg"
(st-parse-method "tagged <category: #algebra> ^ nil")
{:type "method"
:selector "tagged"
:params (list)
:temps (list)
:pragmas (list
{:selector "category:"
:args (list {:type "lit-symbol" :value "algebra"})})
:body (list {:type "return" :expr {:type "lit-nil"}})})
(st-test
"pragma then temps"
(st-parse-method "calc <primitive: 1> | t | t := 5. ^ t")
{:type "method"
:selector "calc"
:params (list)
:temps (list "t")
:pragmas (list
{:selector "primitive:"
:args (list {:type "lit-int" :value 1})})
:body (list
{:type "assign" :name "t" :expr {:type "lit-int" :value 5}}
{:type "return" :expr {:type "ident" :name "t"}})})
(st-test
"temps then pragma"
(st-parse-method "calc | t | <primitive: 1> t := 5. ^ t")
{:type "method"
:selector "calc"
:params (list)
:temps (list "t")
:pragmas (list
{:selector "primitive:"
:args (list {:type "lit-int" :value 1})})
:body (list
{:type "assign" :name "t" :expr {:type "lit-int" :value 5}}
{:type "return" :expr {:type "ident" :name "t"}})})
(st-test
"two pragmas"
(st-parse-method "m <primitive: 1> <category: 'a'> ^ self")
{:type "method"
:selector "m"
:params (list)
:temps (list)
:pragmas (list
{:selector "primitive:"
:args (list {:type "lit-int" :value 1})}
{:selector "category:"
:args (list {:type "lit-string" :value "a"})})
:body (list {:type "return" :expr {:type "self"}})})
;; ── 5. End-to-end: a small "filed-in" snippet ──
(st-test
"small filed-in class snippet"
(st-parse-chunks
"Object subclass: #Account
instanceVariableNames: 'balance'!
!Account methodsFor: 'access'!
balance
^ balance!
deposit: amount
balance := balance + amount.
^ self! !")
(list
{:kind "expr"
:ast {:type "send"
:receiver {:type "ident" :name "Object"}
:selector "subclass:instanceVariableNames:"
:args (list
{:type "lit-symbol" :value "Account"}
{:type "lit-string" :value "balance"})}}
{:kind "blank"}
{:kind "expr"
:ast {:type "send"
:receiver {:type "ident" :name "Account"}
:selector "methodsFor:"
:args (list {:type "lit-string" :value "access"})}}
{:kind "method"
:class "Account"
:class-side? false
:category "access"
:ast {:type "method"
:selector "balance"
:params (list)
:temps (list)
:pragmas (list)
:body (list
{:type "return"
:expr {:type "ident" :name "balance"}})}}
{:kind "method"
:class "Account"
:class-side? false
:category "access"
:ast {:type "method"
:selector "deposit:"
:params (list "amount")
:temps (list)
:pragmas (list)
:body (list
{:type "assign"
:name "balance"
:expr {:type "send"
:receiver {:type "ident" :name "balance"}
:selector "+"
:args (list {:type "ident" :name "amount"})}}
{:type "return" :expr {:type "self"}})}}
{:kind "end-methods"}))
(list st-test-pass st-test-fail)

View File

@@ -52,7 +52,7 @@ Core mapping:
### Phase 1 — tokenizer + parser
- [x] Tokenizer: identifiers, keywords (`foo:`), binary selectors (`+`, `==`, `,`, `->`, `~=` etc.), numbers (radix `16r1F`; **scaled `1.5s2` deferred**), strings `'…''…'`, characters `$c`, symbols `#foo` `#'foo bar'` `#+`, byte arrays `#[1 2 3]` (open token), literal arrays `#(1 #foo 'x')` (open token), comments `"…"`
- [x] Parser (expression level): blocks `[:a :b | | t1 t2 | …]`, cascades, message precedence (unary > binary > keyword), assignment, return, statement sequences, literal arrays, byte arrays, paren grouping, method headers (`+ other`, `at:put:`, unary, with temps and body). Class-definition keyword messages parse as ordinary keyword sends — no special-case needed.
- [ ] Parser (chunk-stream level): `! !` chunk separators driving a sequence of top-level expressions, pragmas `<primitive: 1>` inside method bodies
- [x] Parser (chunk-stream level): `st-read-chunks` splits source on `!` (with `!!` doubling) and `st-parse-chunks` runs the Pharo file-in state machine — `methodsFor:` / `class methodsFor:` opens a method batch, an empty chunk closes it. Pragmas `<primitive: >` (incl. multiple keyword pairs, before or after temps, multiple per method) parsed into the method AST.
- [x] Unit tests in `lib/smalltalk/tests/parse.sx`
### Phase 2 — object model + sequential eval
@@ -108,6 +108,7 @@ Core mapping:
_Newest first. Agent appends on every commit._
- 2026-04-25: chunk-stream parser + pragmas + 21 chunk/pragma tests (`lib/smalltalk/tests/parse_chunks.sx`). `st-read-chunks` (with `!!` doubling), `st-parse-chunks` state machine for `methodsFor:` batches incl. class-side. Pragmas with multiple keyword pairs, signed numeric / string / symbol args, in either pragma-then-temps or temps-then-pragma order. 131/131 tests pass.
- 2026-04-25: expression-level parser + 47 parse tests (`lib/smalltalk/parser.sx`, `lib/smalltalk/tests/parse.sx`). Full message precedence (unary > binary > keyword), cascades, blocks with params/temps, literal/byte arrays, assignment chain, method headers (unary/binary/keyword). Chunk-format `! !` driver deferred to a follow-up box. 110/110 tests pass.
- 2026-04-25: tokenizer + 63 tests (`lib/smalltalk/tokenizer.sx`, `lib/smalltalk/tests/tokenize.sx`, `lib/smalltalk/test.sh`). All token types covered except scaled decimals `1.5s2` (deferred). `#(` and `#[` emit open tokens; literal-array contents lexed as ordinary tokens for the parser to interpret.