smalltalk: expression parser + 47 parse tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled

This commit is contained in:
2026-04-25 00:46:03 +00:00
parent 4e7d2183ad
commit 33ce994f23
4 changed files with 1036 additions and 8 deletions

627
lib/smalltalk/parser.sx Normal file
View File

@@ -0,0 +1,627 @@
;; Smalltalk parser — produces an AST from the tokenizer's token stream.
;;
;; AST node shapes (dicts):
;; {:type "lit-int" :value N} integer
;; {:type "lit-float" :value F} float
;; {:type "lit-string" :value S} string
;; {:type "lit-char" :value C} character
;; {:type "lit-symbol" :value S} symbol literal (#foo)
;; {:type "lit-array" :elements (list ...)} literal array (#(1 2 #foo))
;; {:type "lit-byte-array" :elements (...)} byte array (#[1 2 3])
;; {:type "lit-nil" } / "lit-true" / "lit-false"
;; {:type "ident" :name "x"} variable reference
;; {:type "self"} / "super" / "thisContext" pseudo-variables
;; {:type "assign" :name "x" :expr E} x := E
;; {:type "return" :expr E} ^ E
;; {:type "send" :receiver R :selector S :args (list ...)}
;; {:type "cascade" :receiver R :messages (list {:selector :args} ...)}
;; {:type "block" :params (list "a") :temps (list "t") :body (list expr)}
;; {:type "seq" :exprs (list ...)} statement sequence
;; {:type "method" :selector S :params (list ...) :temps (list ...) :body (list ...) :pragmas (list ...)}
;;
;; A "chunk" / class-definition stream is parsed at a higher level (deferred).
(define st-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
(define st-tok-value (fn (t) (if (= t nil) nil (get t :value))))
;; Parse a *single* Smalltalk expression from source.
(define st-parse-expr (fn (src) (st-parse-with src "expr")))
;; Parse a sequence of statements separated by '.' Returns a {:type "seq"} node.
(define st-parse (fn (src) (st-parse-with src "seq")))
;; Parse a method body — `selector params | temps | body`.
;; Only the "method header + body" form (no chunk delimiters).
(define st-parse-method (fn (src) (st-parse-with src "method")))
(define
st-parse-with
(fn
(src mode)
(let
((tokens (st-tokenize src)) (idx 0) (tok-len 0))
(begin
(set! tok-len (len tokens))
(define peek-tok (fn () (nth tokens idx)))
(define
peek-tok-at
(fn (n) (if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil)))
(define advance-tok! (fn () (set! idx (+ idx 1))))
(define
at?
(fn
(type value)
(let
((t (peek-tok)))
(and
(= (st-tok-type t) type)
(or (= value nil) (= (st-tok-value t) value))))))
(define at-type? (fn (type) (= (st-tok-type (peek-tok)) type)))
(define
consume!
(fn
(type value)
(if
(at? type value)
(let ((t (peek-tok))) (begin (advance-tok!) t))
(error
(str
"st-parse: expected "
type
(if (= value nil) "" (str " '" value "'"))
" got "
(st-tok-type (peek-tok))
" '"
(st-tok-value (peek-tok))
"' at idx "
idx)))))
;; ── Primary: atoms, paren'd expr, blocks, literal arrays, byte arrays.
(define
parse-primary
(fn
()
(let
((t (peek-tok)))
(let
((ty (st-tok-type t)) (v (st-tok-value t)))
(cond
((= ty "number")
(begin
(advance-tok!)
(cond
((number? v) {:type (if (integer? v) "lit-int" "lit-float") :value v})
(else {:type "lit-int" :value v}))))
((= ty "string")
(begin (advance-tok!) {:type "lit-string" :value v}))
((= ty "char")
(begin (advance-tok!) {:type "lit-char" :value v}))
((= ty "symbol")
(begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "array-open") (parse-literal-array))
((= ty "byte-array-open") (parse-byte-array))
((= ty "lparen")
(begin
(advance-tok!)
(let
((e (parse-expression)))
(begin (consume! "rparen" nil) e))))
((= ty "lbracket") (parse-block))
((= ty "ident")
(begin
(advance-tok!)
(cond
((= v "nil") {:type "lit-nil"})
((= v "true") {:type "lit-true"})
((= v "false") {:type "lit-false"})
((= v "self") {:type "self"})
((= v "super") {:type "super"})
((= v "thisContext") {:type "thisContext"})
(else {:type "ident" :name v}))))
((= ty "binary")
;; Negative numeric literal: '-' immediately before a number.
(cond
((and (= v "-") (= (st-tok-type (peek-tok-at 1)) "number"))
(let
((n (st-tok-value (peek-tok-at 1))))
(begin
(advance-tok!)
(advance-tok!)
(cond
((dict? n) {:type "lit-int" :value n})
((integer? n) {:type "lit-int" :value (- 0 n)})
(else {:type "lit-float" :value (- 0 n)})))))
(else
(error
(str "st-parse: unexpected binary '" v "' at idx " idx)))))
(else
(error
(str
"st-parse: unexpected "
ty
" '"
v
"' at idx "
idx))))))))
;; #(elem elem ...) — elements are atoms or nested parenthesised arrays.
(define
parse-literal-array
(fn
()
(let
((items (list)))
(begin
(consume! "array-open" nil)
(define
arr-loop
(fn
()
(cond
((at? "rparen" nil) (advance-tok!))
(else
(begin
(append! items (parse-array-element))
(arr-loop))))))
(arr-loop)
{:type "lit-array" :elements items}))))
;; #[1 2 3]
(define
parse-byte-array
(fn
()
(let
((items (list)))
(begin
(consume! "byte-array-open" nil)
(define
ba-loop
(fn
()
(cond
((at? "rbracket" nil) (advance-tok!))
(else
(let
((t (peek-tok)))
(cond
((= (st-tok-type t) "number")
(begin
(advance-tok!)
(append! items (st-tok-value t))
(ba-loop)))
(else
(error
(str
"st-parse: byte array expects number, got "
(st-tok-type t))))))))))
(ba-loop)
{:type "lit-byte-array" :elements items}))))
;; Inside a literal array: bare idents become symbols, nested (...) is a sub-array.
(define
parse-array-element
(fn
()
(let
((t (peek-tok)))
(let
((ty (st-tok-type t)) (v (st-tok-value t)))
(cond
((= ty "number") (begin (advance-tok!) {:type "lit-int" :value v}))
((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v}))
((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v}))
((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "ident")
(begin
(advance-tok!)
(cond
((= v "nil") {:type "lit-nil"})
((= v "true") {:type "lit-true"})
((= v "false") {:type "lit-false"})
(else {:type "lit-symbol" :value v}))))
((= ty "keyword") (begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "binary") (begin (advance-tok!) {:type "lit-symbol" :value v}))
((= ty "lparen")
(let ((items (list)))
(begin
(advance-tok!)
(define
sub-loop
(fn
()
(cond
((at? "rparen" nil) (advance-tok!))
(else
(begin (append! items (parse-array-element)) (sub-loop))))))
(sub-loop)
{:type "lit-array" :elements items})))
((= ty "array-open") (parse-literal-array))
((= ty "byte-array-open") (parse-byte-array))
(else
(error
(str "st-parse: bad literal-array element " ty " '" v "'"))))))))
;; [:a :b | | t1 t2 | body. body. ...]
(define
parse-block
(fn
()
(begin
(consume! "lbracket" nil)
(let
((params (list)) (temps (list)))
(begin
;; Block params
(define
p-loop
(fn
()
(when
(at? "colon" nil)
(begin
(advance-tok!)
(let
((t (consume! "ident" nil)))
(begin
(append! params (st-tok-value t))
(p-loop)))))))
(p-loop)
(when (> (len params) 0) (consume! "bar" nil))
;; Block temps: | t1 t2 |
(when
(and
(at? "bar" nil)
;; Not `|` followed immediately by binary content — the only
;; legitimate `|` inside a block here is the temp delimiter.
true)
(begin
(advance-tok!)
(define
t-loop
(fn
()
(when
(at? "ident" nil)
(let
((t (peek-tok)))
(begin
(advance-tok!)
(append! temps (st-tok-value t))
(t-loop))))))
(t-loop)
(consume! "bar" nil)))
;; Body: statements terminated by `.` or `]`
(let
((body (parse-statements "rbracket")))
(begin
(consume! "rbracket" nil)
{:type "block" :params params :temps temps :body body})))))))
;; Parse statements up to a closing token (rbracket or eof). Returns list.
(define
parse-statements
(fn
(terminator)
(let
((stmts (list)))
(begin
(define
s-loop
(fn
()
(cond
((at-type? terminator) nil)
((at-type? "eof") nil)
(else
(begin
(append! stmts (parse-statement))
;; consume optional period(s)
(define
dot-loop
(fn
()
(when
(at? "period" nil)
(begin (advance-tok!) (dot-loop)))))
(dot-loop)
(s-loop))))))
(s-loop)
stmts))))
;; Statement: ^expr | ident := expr | expr
(define
parse-statement
(fn
()
(cond
((at? "caret" nil)
(begin
(advance-tok!)
{:type "return" :expr (parse-expression)}))
((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign"))
(let
((name-tok (peek-tok)))
(begin
(advance-tok!)
(advance-tok!)
{:type "assign"
:name (st-tok-value name-tok)
:expr (parse-expression)})))
(else (parse-expression)))))
;; Top-level expression. Assignment (right-associative chain) sits at
;; the top; cascade is below.
(define
parse-expression
(fn
()
(cond
((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign"))
(let
((name-tok (peek-tok)))
(begin
(advance-tok!)
(advance-tok!)
{:type "assign"
:name (st-tok-value name-tok)
:expr (parse-expression)})))
(else (parse-cascade)))))
(define
parse-cascade
(fn
()
(let
((head (parse-keyword-message)))
(cond
((at? "semi" nil)
(let
((receiver (cascade-receiver head))
(first-msg (cascade-first-message head))
(msgs (list)))
(begin
(append! msgs first-msg)
(define
c-loop
(fn
()
(when
(at? "semi" nil)
(begin
(advance-tok!)
(append! msgs (parse-cascade-message))
(c-loop)))))
(c-loop)
{:type "cascade" :receiver receiver :messages msgs})))
(else head)))))
;; Extract the receiver from a head send so cascades share it.
(define
cascade-receiver
(fn
(head)
(cond
((= (get head :type) "send") (get head :receiver))
(else head))))
(define
cascade-first-message
(fn
(head)
(cond
((= (get head :type) "send")
{:selector (get head :selector) :args (get head :args)})
(else
;; Shouldn't happen — cascade requires at least one prior message.
(error "st-parse: cascade with no prior message")))))
;; Subsequent cascade message (after the `;`): unary | binary | keyword
(define
parse-cascade-message
(fn
()
(cond
((at-type? "ident")
(let ((t (peek-tok)))
(begin
(advance-tok!)
{:selector (st-tok-value t) :args (list)})))
((at-type? "binary")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(let
((arg (parse-unary-message)))
{:selector (st-tok-value t) :args (list arg)}))))
((at-type? "keyword")
(let
((sel-parts (list)) (args (list)))
(begin
(define
kw-loop
(fn
()
(when
(at-type? "keyword")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! sel-parts (st-tok-value t))
(append! args (parse-binary-message))
(kw-loop))))))
(kw-loop)
{:selector (join "" sel-parts) :args args})))
(else
(error
(str "st-parse: bad cascade message at idx " idx))))))
;; Keyword message: <binary> (kw <binary>)+
(define
parse-keyword-message
(fn
()
(let
((receiver (parse-binary-message)))
(cond
((at-type? "keyword")
(let
((sel-parts (list)) (args (list)))
(begin
(define
kw-loop
(fn
()
(when
(at-type? "keyword")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! sel-parts (st-tok-value t))
(append! args (parse-binary-message))
(kw-loop))))))
(kw-loop)
{:type "send"
:receiver receiver
:selector (join "" sel-parts)
:args args})))
(else receiver)))))
;; Binary message: <unary> (binop <unary>)*
(define
parse-binary-message
(fn
()
(let
((receiver (parse-unary-message)))
(begin
(define
b-loop
(fn
()
(when
(at-type? "binary")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(let
((arg (parse-unary-message)))
(set!
receiver
{:type "send"
:receiver receiver
:selector (st-tok-value t)
:args (list arg)}))
(b-loop))))))
(b-loop)
receiver))))
;; Unary message: <primary> ident* (ident NOT followed by ':')
(define
parse-unary-message
(fn
()
(let
((receiver (parse-primary)))
(begin
(define
u-loop
(fn
()
(when
(and
(at-type? "ident")
(let
((nxt (peek-tok-at 1)))
(not (= (st-tok-type nxt) "assign"))))
(let ((t (peek-tok)))
(begin
(advance-tok!)
(set!
receiver
{:type "send"
:receiver receiver
:selector (st-tok-value t)
:args (list)})
(u-loop))))))
(u-loop)
receiver))))
;; Method header: unary | binary arg | (kw arg)+
(define
parse-method
(fn
()
(let
((sel "") (params (list)) (temps (list)) (body (list)))
(begin
(cond
;; Unary header
((at-type? "ident")
(let ((t (peek-tok)))
(begin (advance-tok!) (set! sel (st-tok-value t)))))
;; Binary header: binop ident
((at-type? "binary")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(set! sel (st-tok-value t))
(let ((p (consume! "ident" nil)))
(append! params (st-tok-value p))))))
;; Keyword header: (kw ident)+
((at-type? "keyword")
(let ((sel-parts (list)))
(begin
(define
kh-loop
(fn
()
(when
(at-type? "keyword")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! sel-parts (st-tok-value t))
(let ((p (consume! "ident" nil)))
(append! params (st-tok-value p)))
(kh-loop))))))
(kh-loop)
(set! sel (join "" sel-parts)))))
(else
(error
(str
"st-parse-method: expected selector header, got "
(st-tok-type (peek-tok))))))
;; Optional temps: | t1 t2 |
(when
(at? "bar" nil)
(begin
(advance-tok!)
(define
th-loop
(fn
()
(when
(at-type? "ident")
(let ((t (peek-tok)))
(begin
(advance-tok!)
(append! temps (st-tok-value t))
(th-loop))))))
(th-loop)
(consume! "bar" nil)))
;; Body statements
(set! body (parse-statements "eof"))
{:type "method"
:selector sel
:params params
:temps temps
:body body}))))
;; Top-level program: statements separated by '.'
(cond
((= mode "expr") (parse-expression))
((= mode "method") (parse-method))
(else
{:type "seq" :exprs (parse-statements "eof")}))))))

View File

@@ -31,6 +31,8 @@ for arg in "$@"; do
done
if [ ${#FILES[@]} -eq 0 ]; then
# tokenize.sx must load first — it defines the st-test helpers reused by
# subsequent test files. Sort enforces this lexicographically.
mapfile -t FILES < <(find lib/smalltalk/tests -maxdepth 2 -name '*.sx' | sort)
fi
@@ -41,7 +43,8 @@ FAILED_FILES=()
for FILE in "${FILES[@]}"; do
[ -f "$FILE" ] || { echo "skip $FILE (not found)"; continue; }
TMPFILE=$(mktemp)
cat > "$TMPFILE" <<EPOCHS
if [ "$(basename "$FILE")" = "tokenize.sx" ]; then
cat > "$TMPFILE" <<EPOCHS
(epoch 1)
(load "lib/smalltalk/tokenizer.sx")
(epoch 2)
@@ -49,14 +52,30 @@ for FILE in "${FILES[@]}"; do
(epoch 3)
(eval "(list st-test-pass st-test-fail)")
EPOCHS
else
cat > "$TMPFILE" <<EPOCHS
(epoch 1)
(load "lib/smalltalk/tokenizer.sx")
(epoch 2)
(load "lib/smalltalk/parser.sx")
(epoch 3)
(load "lib/smalltalk/tests/tokenize.sx")
(epoch 4)
(load "$FILE")
(epoch 5)
(eval "(list st-test-pass st-test-fail)")
EPOCHS
fi
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>&1 || true)
rm -f "$TMPFILE"
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}')
# Final epoch's value: either (ok N (P F)) on one line or
# (ok-len N M)\n(P F) where the value is on the following line.
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len [0-9]+ / {getline; print}' | tail -1)
if [ -z "$LINE" ]; then
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \
| sed -E 's/^\(ok 3 //; s/\)$//')
LINE=$(echo "$OUTPUT" | grep -E '^\(ok [0-9]+ \([0-9]+ [0-9]+\)\)' | tail -1 \
| sed -E 's/^\(ok [0-9]+ //; s/\)$//')
fi
if [ -z "$LINE" ]; then
echo "X $FILE: could not extract summary"
@@ -73,7 +92,8 @@ EPOCHS
FAILED_FILES+=("$FILE")
printf 'X %-40s %d/%d\n' "$FILE" "$P" "$((P+F))"
TMPFILE2=$(mktemp)
cat > "$TMPFILE2" <<EPOCHS
if [ "$(basename "$FILE")" = "tokenize.sx" ]; then
cat > "$TMPFILE2" <<EPOCHS
(epoch 1)
(load "lib/smalltalk/tokenizer.sx")
(epoch 2)
@@ -81,7 +101,21 @@ EPOCHS
(epoch 3)
(eval "(map (fn (f) (get f :name)) st-test-fails)")
EPOCHS
FAILS=$(timeout 60 "$SX_SERVER" < "$TMPFILE2" 2>&1 | grep -E '^\(ok 3 ' || true)
else
cat > "$TMPFILE2" <<EPOCHS
(epoch 1)
(load "lib/smalltalk/tokenizer.sx")
(epoch 2)
(load "lib/smalltalk/parser.sx")
(epoch 3)
(load "lib/smalltalk/tests/tokenize.sx")
(epoch 4)
(load "$FILE")
(epoch 5)
(eval "(map (fn (f) (get f :name)) st-test-fails)")
EPOCHS
fi
FAILS=$(timeout 60 "$SX_SERVER" < "$TMPFILE2" 2>&1 | grep -E '^\(ok [0-9]+ \(' | tail -1 || true)
rm -f "$TMPFILE2"
echo " $FAILS"
elif [ "$VERBOSE" = "1" ]; then

View File

@@ -0,0 +1,365 @@
;; Smalltalk parser tests.
;;
;; Reuses helpers (st-test, st-deep=?) from tokenize.sx. Counters reset
;; here so this file's summary covers parse tests only.
(set! st-test-pass 0)
(set! st-test-fail 0)
(set! st-test-fails (list))
;; ── 1. Atoms ──
(st-test "int" (st-parse-expr "42") {:type "lit-int" :value 42})
(st-test "float" (st-parse-expr "3.14") {:type "lit-float" :value 3.14})
(st-test "string" (st-parse-expr "'hi'") {:type "lit-string" :value "hi"})
(st-test "char" (st-parse-expr "$x") {:type "lit-char" :value "x"})
(st-test "symbol" (st-parse-expr "#foo") {:type "lit-symbol" :value "foo"})
(st-test "binary symbol" (st-parse-expr "#+") {:type "lit-symbol" :value "+"})
(st-test "keyword symbol" (st-parse-expr "#at:put:") {:type "lit-symbol" :value "at:put:"})
(st-test "nil" (st-parse-expr "nil") {:type "lit-nil"})
(st-test "true" (st-parse-expr "true") {:type "lit-true"})
(st-test "false" (st-parse-expr "false") {:type "lit-false"})
(st-test "self" (st-parse-expr "self") {:type "self"})
(st-test "super" (st-parse-expr "super") {:type "super"})
(st-test "ident" (st-parse-expr "x") {:type "ident" :name "x"})
(st-test "negative int" (st-parse-expr "-3") {:type "lit-int" :value -3})
;; ── 2. Literal arrays ──
(st-test
"literal array of ints"
(st-parse-expr "#(1 2 3)")
{:type "lit-array"
:elements (list
{:type "lit-int" :value 1}
{:type "lit-int" :value 2}
{:type "lit-int" :value 3})})
(st-test
"literal array mixed"
(st-parse-expr "#(1 #foo 'x' true)")
{:type "lit-array"
:elements (list
{:type "lit-int" :value 1}
{:type "lit-symbol" :value "foo"}
{:type "lit-string" :value "x"}
{:type "lit-true"})})
(st-test
"literal array bare ident is symbol"
(st-parse-expr "#(foo bar)")
{:type "lit-array"
:elements (list
{:type "lit-symbol" :value "foo"}
{:type "lit-symbol" :value "bar"})})
(st-test
"nested literal array"
(st-parse-expr "#(1 (2 3) 4)")
{:type "lit-array"
:elements (list
{:type "lit-int" :value 1}
{:type "lit-array"
:elements (list
{:type "lit-int" :value 2}
{:type "lit-int" :value 3})}
{:type "lit-int" :value 4})})
(st-test
"byte array"
(st-parse-expr "#[1 2 3]")
{:type "lit-byte-array" :elements (list 1 2 3)})
;; ── 3. Unary messages ──
(st-test
"unary single"
(st-parse-expr "x foo")
{:type "send"
:receiver {:type "ident" :name "x"}
:selector "foo"
:args (list)})
(st-test
"unary chain"
(st-parse-expr "x foo bar baz")
{:type "send"
:receiver {:type "send"
:receiver {:type "send"
:receiver {:type "ident" :name "x"}
:selector "foo"
:args (list)}
:selector "bar"
:args (list)}
:selector "baz"
:args (list)})
(st-test
"unary on literal"
(st-parse-expr "42 printNl")
{:type "send"
:receiver {:type "lit-int" :value 42}
:selector "printNl"
:args (list)})
;; ── 4. Binary messages ──
(st-test
"binary single"
(st-parse-expr "1 + 2")
{:type "send"
:receiver {:type "lit-int" :value 1}
:selector "+"
:args (list {:type "lit-int" :value 2})})
(st-test
"binary left-assoc"
(st-parse-expr "1 + 2 + 3")
{:type "send"
:receiver {:type "send"
:receiver {:type "lit-int" :value 1}
:selector "+"
:args (list {:type "lit-int" :value 2})}
:selector "+"
:args (list {:type "lit-int" :value 3})})
(st-test
"binary same precedence l-to-r"
(st-parse-expr "1 + 2 * 3")
{:type "send"
:receiver {:type "send"
:receiver {:type "lit-int" :value 1}
:selector "+"
:args (list {:type "lit-int" :value 2})}
:selector "*"
:args (list {:type "lit-int" :value 3})})
;; ── 5. Precedence: unary binds tighter than binary ──
(st-test
"unary tighter than binary"
(st-parse-expr "3 + 4 factorial")
{:type "send"
:receiver {:type "lit-int" :value 3}
:selector "+"
:args (list
{:type "send"
:receiver {:type "lit-int" :value 4}
:selector "factorial"
:args (list)})})
;; ── 6. Keyword messages ──
(st-test
"keyword single"
(st-parse-expr "x at: 1")
{:type "send"
:receiver {:type "ident" :name "x"}
:selector "at:"
:args (list {:type "lit-int" :value 1})})
(st-test
"keyword chain"
(st-parse-expr "x at: 1 put: 'a'")
{:type "send"
:receiver {:type "ident" :name "x"}
:selector "at:put:"
:args (list {:type "lit-int" :value 1} {:type "lit-string" :value "a"})})
;; ── 7. Precedence: binary tighter than keyword ──
(st-test
"binary tighter than keyword"
(st-parse-expr "x at: 1 + 2")
{:type "send"
:receiver {:type "ident" :name "x"}
:selector "at:"
:args (list
{:type "send"
:receiver {:type "lit-int" :value 1}
:selector "+"
:args (list {:type "lit-int" :value 2})})})
(st-test
"keyword absorbs trailing unary"
(st-parse-expr "a foo: b bar")
{:type "send"
:receiver {:type "ident" :name "a"}
:selector "foo:"
:args (list
{:type "send"
:receiver {:type "ident" :name "b"}
:selector "bar"
:args (list)})})
;; ── 8. Parens override precedence ──
(st-test
"paren forces grouping"
(st-parse-expr "(1 + 2) * 3")
{:type "send"
:receiver {:type "send"
:receiver {:type "lit-int" :value 1}
:selector "+"
:args (list {:type "lit-int" :value 2})}
:selector "*"
:args (list {:type "lit-int" :value 3})})
;; ── 9. Cascade ──
(st-test
"simple cascade"
(st-parse-expr "x m1; m2")
{:type "cascade"
:receiver {:type "ident" :name "x"}
:messages (list
{:selector "m1" :args (list)}
{:selector "m2" :args (list)})})
(st-test
"cascade with binary and keyword"
(st-parse-expr "Stream new nl; tab; print: 1")
{:type "cascade"
:receiver {:type "send"
:receiver {:type "ident" :name "Stream"}
:selector "new"
:args (list)}
:messages (list
{:selector "nl" :args (list)}
{:selector "tab" :args (list)}
{:selector "print:" :args (list {:type "lit-int" :value 1})})})
;; ── 10. Blocks ──
(st-test
"empty block"
(st-parse-expr "[]")
{:type "block" :params (list) :temps (list) :body (list)})
(st-test
"block one expr"
(st-parse-expr "[1 + 2]")
{:type "block"
:params (list)
:temps (list)
:body (list
{:type "send"
:receiver {:type "lit-int" :value 1}
:selector "+"
:args (list {:type "lit-int" :value 2})})})
(st-test
"block with params"
(st-parse-expr "[:a :b | a + b]")
{:type "block"
:params (list "a" "b")
:temps (list)
:body (list
{:type "send"
:receiver {:type "ident" :name "a"}
:selector "+"
:args (list {:type "ident" :name "b"})})})
(st-test
"block with temps"
(st-parse-expr "[| t | t := 1. t]")
{:type "block"
:params (list)
:temps (list "t")
:body (list
{:type "assign" :name "t" :expr {:type "lit-int" :value 1}}
{:type "ident" :name "t"})})
(st-test
"block with params and temps"
(st-parse-expr "[:x | | t | t := x + 1. t]")
{:type "block"
:params (list "x")
:temps (list "t")
:body (list
{:type "assign"
:name "t"
:expr {:type "send"
:receiver {:type "ident" :name "x"}
:selector "+"
:args (list {:type "lit-int" :value 1})}}
{:type "ident" :name "t"})})
;; ── 11. Assignment / return / statements ──
(st-test
"assignment"
(st-parse-expr "x := 1")
{:type "assign" :name "x" :expr {:type "lit-int" :value 1}})
(st-test
"return"
(st-parse-expr "1")
{:type "lit-int" :value 1})
(st-test
"return statement at top level"
(st-parse "^ 1")
{:type "seq"
:exprs (list {:type "return" :expr {:type "lit-int" :value 1}})})
(st-test
"two statements"
(st-parse "x := 1. y := 2")
{:type "seq"
:exprs (list
{:type "assign" :name "x" :expr {:type "lit-int" :value 1}}
{:type "assign" :name "y" :expr {:type "lit-int" :value 2}})})
(st-test
"trailing dot allowed"
(st-parse "1. 2.")
{:type "seq"
:exprs (list {:type "lit-int" :value 1} {:type "lit-int" :value 2})})
;; ── 12. Method headers ──
(st-test
"unary method"
(st-parse-method "factorial ^ self * (self - 1) factorial")
{:type "method"
:selector "factorial"
:params (list)
:temps (list)
:body (list
{:type "return"
:expr {:type "send"
:receiver {:type "self"}
:selector "*"
:args (list
{:type "send"
:receiver {:type "send"
:receiver {:type "self"}
:selector "-"
:args (list {:type "lit-int" :value 1})}
:selector "factorial"
:args (list)})}})})
(st-test
"binary method"
(st-parse-method "+ other ^ 'plus'")
{:type "method"
:selector "+"
:params (list "other")
:temps (list)
:body (list {:type "return" :expr {:type "lit-string" :value "plus"}})})
(st-test
"keyword method"
(st-parse-method "at: i put: v ^ v")
{:type "method"
:selector "at:put:"
:params (list "i" "v")
:temps (list)
:body (list {:type "return" :expr {:type "ident" :name "v"}})})
(st-test
"method with temps"
(st-parse-method "twice: x | t | t := x + x. ^ t")
{:type "method"
:selector "twice:"
:params (list "x")
:temps (list "t")
:body (list
{:type "assign"
:name "t"
:expr {:type "send"
:receiver {:type "ident" :name "x"}
:selector "+"
:args (list {:type "ident" :name "x"})}}
{:type "return" :expr {:type "ident" :name "t"}})})
(list st-test-pass st-test-fail)

View File

@@ -51,8 +51,9 @@ Core mapping:
### Phase 1 — tokenizer + parser
- [x] Tokenizer: identifiers, keywords (`foo:`), binary selectors (`+`, `==`, `,`, `->`, `~=` etc.), numbers (radix `16r1F`; **scaled `1.5s2` deferred**), strings `'…''…'`, characters `$c`, symbols `#foo` `#'foo bar'` `#+`, byte arrays `#[1 2 3]` (open token), literal arrays `#(1 #foo 'x')` (open token), comments `"…"`
- [ ] Parser: chunk format (`! !` separators), class definitions (`Object subclass: #X instanceVariableNames: '…' classVariableNames: '…' …`), method definitions (`extend: #Foo with: 'bar ^self'`), pragmas `<primitive: 1>`, blocks `[:a :b | | t1 t2 | …]`, cascades, message precedence (unary > binary > keyword)
- [ ] Unit tests in `lib/smalltalk/tests/parse.sx`
- [x] Parser (expression level): blocks `[:a :b | | t1 t2 | …]`, cascades, message precedence (unary > binary > keyword), assignment, return, statement sequences, literal arrays, byte arrays, paren grouping, method headers (`+ other`, `at:put:`, unary, with temps and body). Class-definition keyword messages parse as ordinary keyword sends — no special-case needed.
- [ ] Parser (chunk-stream level): `! !` chunk separators driving a sequence of top-level expressions, pragmas `<primitive: 1>` inside method bodies
- [x] Unit tests in `lib/smalltalk/tests/parse.sx`
### Phase 2 — object model + sequential eval
- [ ] Class table + bootstrap: `Object`, `Behavior`, `Class`, `Metaclass`, `UndefinedObject`, `Boolean`/`True`/`False`, `Number`/`Integer`/`Float`, `String`, `Symbol`, `Array`, `Block`
@@ -107,6 +108,7 @@ Core mapping:
_Newest first. Agent appends on every commit._
- 2026-04-25: expression-level parser + 47 parse tests (`lib/smalltalk/parser.sx`, `lib/smalltalk/tests/parse.sx`). Full message precedence (unary > binary > keyword), cascades, blocks with params/temps, literal/byte arrays, assignment chain, method headers (unary/binary/keyword). Chunk-format `! !` driver deferred to a follow-up box. 110/110 tests pass.
- 2026-04-25: tokenizer + 63 tests (`lib/smalltalk/tokenizer.sx`, `lib/smalltalk/tests/tokenize.sx`, `lib/smalltalk/test.sh`). All token types covered except scaled decimals `1.5s2` (deferred). `#(` and `#[` emit open tokens; literal-array contents lexed as ordinary tokens for the parser to interpret.
## Blockers