Compare commits

...

2 Commits

Author SHA1 Message Date
4e7d2183ad smalltalk: tokenizer + 63 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
2026-04-25 00:19:23 +00:00
6a00df2609 smalltalk: plan + briefing + sx-loops 8th slot
Showcase: blocks with non-local return on captured method-return
continuation. ANSI-ish Smalltalk-80 subset, SUnit + Pharo Kernel-Tests
slice, 7 phases. Worktree: /root/rose-ash-loops/smalltalk on
branch loops/smalltalk.
2026-04-25 00:05:31 +00:00
7 changed files with 1029 additions and 8 deletions

99
lib/smalltalk/test.sh Executable file
View File

@@ -0,0 +1,99 @@
#!/usr/bin/env bash
# Fast Smalltalk-on-SX test runner — pipes directly to sx_server.exe.
# Mirrors lib/haskell/test.sh.
#
# Usage:
# bash lib/smalltalk/test.sh # run all tests
# bash lib/smalltalk/test.sh -v # verbose
# bash lib/smalltalk/test.sh tests/tokenize.sx # run one file
set -uo pipefail
cd "$(git rev-parse --show-toplevel)"
SX_SERVER="hosts/ocaml/_build/default/bin/sx_server.exe"
if [ ! -x "$SX_SERVER" ]; then
MAIN_ROOT=$(git worktree list | head -1 | awk '{print $1}')
if [ -x "$MAIN_ROOT/$SX_SERVER" ]; then
SX_SERVER="$MAIN_ROOT/$SX_SERVER"
else
echo "ERROR: sx_server.exe not found. Run: cd hosts/ocaml && dune build"
exit 1
fi
fi
VERBOSE=""
FILES=()
for arg in "$@"; do
case "$arg" in
-v|--verbose) VERBOSE=1 ;;
*) FILES+=("$arg") ;;
esac
done
if [ ${#FILES[@]} -eq 0 ]; then
mapfile -t FILES < <(find lib/smalltalk/tests -maxdepth 2 -name '*.sx' | sort)
fi
TOTAL_PASS=0
TOTAL_FAIL=0
FAILED_FILES=()
for FILE in "${FILES[@]}"; do
[ -f "$FILE" ] || { echo "skip $FILE (not found)"; continue; }
TMPFILE=$(mktemp)
cat > "$TMPFILE" <<EPOCHS
(epoch 1)
(load "lib/smalltalk/tokenizer.sx")
(epoch 2)
(load "$FILE")
(epoch 3)
(eval "(list st-test-pass st-test-fail)")
EPOCHS
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>&1 || true)
rm -f "$TMPFILE"
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}')
if [ -z "$LINE" ]; then
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \
| sed -E 's/^\(ok 3 //; s/\)$//')
fi
if [ -z "$LINE" ]; then
echo "X $FILE: could not extract summary"
echo "$OUTPUT" | tail -30
TOTAL_FAIL=$((TOTAL_FAIL + 1))
FAILED_FILES+=("$FILE")
continue
fi
P=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\1/')
F=$(echo "$LINE" | sed -E 's/^\(([0-9]+) ([0-9]+)\).*/\2/')
TOTAL_PASS=$((TOTAL_PASS + P))
TOTAL_FAIL=$((TOTAL_FAIL + F))
if [ "$F" -gt 0 ]; then
FAILED_FILES+=("$FILE")
printf 'X %-40s %d/%d\n' "$FILE" "$P" "$((P+F))"
TMPFILE2=$(mktemp)
cat > "$TMPFILE2" <<EPOCHS
(epoch 1)
(load "lib/smalltalk/tokenizer.sx")
(epoch 2)
(load "$FILE")
(epoch 3)
(eval "(map (fn (f) (get f :name)) st-test-fails)")
EPOCHS
FAILS=$(timeout 60 "$SX_SERVER" < "$TMPFILE2" 2>&1 | grep -E '^\(ok 3 ' || true)
rm -f "$TMPFILE2"
echo " $FAILS"
elif [ "$VERBOSE" = "1" ]; then
printf 'OK %-40s %d passed\n' "$FILE" "$P"
fi
done
TOTAL=$((TOTAL_PASS + TOTAL_FAIL))
if [ $TOTAL_FAIL -eq 0 ]; then
echo "OK $TOTAL_PASS/$TOTAL smalltalk-on-sx tests passed"
else
echo "FAIL $TOTAL_PASS/$TOTAL passed, $TOTAL_FAIL failed in: ${FAILED_FILES[*]}"
fi
[ $TOTAL_FAIL -eq 0 ]

View File

@@ -0,0 +1,362 @@
;; Smalltalk tokenizer tests.
;;
;; Lightweight runner: each test checks actual vs expected with structural
;; equality and accumulates pass/fail counters. Final summary read by
;; lib/smalltalk/test.sh.
(define
st-deep=?
(fn
(a b)
(cond
((= a b) true)
((and (dict? a) (dict? b))
(let
((ak (keys a)) (bk (keys b)))
(if
(not (= (len ak) (len bk)))
false
(every?
(fn
(k)
(and (has-key? b k) (st-deep=? (get a k) (get b k))))
ak))))
((and (list? a) (list? b))
(if
(not (= (len a) (len b)))
false
(let
((i 0) (ok true))
(begin
(define
de-loop
(fn
()
(when
(and ok (< i (len a)))
(begin
(when
(not (st-deep=? (nth a i) (nth b i)))
(set! ok false))
(set! i (+ i 1))
(de-loop)))))
(de-loop)
ok))))
(:else false))))
(define st-test-pass 0)
(define st-test-fail 0)
(define st-test-fails (list))
(define
st-test
(fn
(name actual expected)
(if
(st-deep=? actual expected)
(set! st-test-pass (+ st-test-pass 1))
(begin
(set! st-test-fail (+ st-test-fail 1))
(append! st-test-fails {:actual actual :expected expected :name name})))))
;; Strip eof and project to just :type/:value.
(define
st-toks
(fn
(src)
(map
(fn (tok) {:type (get tok :type) :value (get tok :value)})
(filter
(fn (tok) (not (= (get tok :type) "eof")))
(st-tokenize src)))))
;; ── 1. Whitespace / empty ──
(st-test "empty input" (st-toks "") (list))
(st-test "all whitespace" (st-toks " \t\n ") (list))
;; ── 2. Identifiers ──
(st-test
"lowercase ident"
(st-toks "foo")
(list {:type "ident" :value "foo"}))
(st-test
"capitalised ident"
(st-toks "Foo")
(list {:type "ident" :value "Foo"}))
(st-test
"underscore ident"
(st-toks "_x")
(list {:type "ident" :value "_x"}))
(st-test
"digits in ident"
(st-toks "foo123")
(list {:type "ident" :value "foo123"}))
(st-test
"two idents separated"
(st-toks "foo bar")
(list {:type "ident" :value "foo"} {:type "ident" :value "bar"}))
;; ── 3. Keyword selectors ──
(st-test
"keyword selector"
(st-toks "foo:")
(list {:type "keyword" :value "foo:"}))
(st-test
"keyword call"
(st-toks "x at: 1")
(list
{:type "ident" :value "x"}
{:type "keyword" :value "at:"}
{:type "number" :value 1}))
(st-test
"two-keyword chain stays separate"
(st-toks "at: 1 put: 2")
(list
{:type "keyword" :value "at:"}
{:type "number" :value 1}
{:type "keyword" :value "put:"}
{:type "number" :value 2}))
(st-test
"ident then assign — not a keyword"
(st-toks "x := 1")
(list
{:type "ident" :value "x"}
{:type "assign" :value ":="}
{:type "number" :value 1}))
;; ── 4. Numbers ──
(st-test
"integer"
(st-toks "42")
(list {:type "number" :value 42}))
(st-test
"float"
(st-toks "3.14")
(list {:type "number" :value 3.14}))
(st-test
"hex radix"
(st-toks "16rFF")
(list
{:type "number"
:value
{:radix 16 :digits "FF" :value 255 :kind "radix"}}))
(st-test
"binary radix"
(st-toks "2r1011")
(list
{:type "number"
:value
{:radix 2 :digits "1011" :value 11 :kind "radix"}}))
(st-test
"exponent"
(st-toks "1e3")
(list {:type "number" :value 1000}))
(st-test
"negative exponent (parser handles minus)"
(st-toks "1.5e-2")
(list {:type "number" :value 0.015}))
;; ── 5. Strings ──
(st-test
"simple string"
(st-toks "'hi'")
(list {:type "string" :value "hi"}))
(st-test
"empty string"
(st-toks "''")
(list {:type "string" :value ""}))
(st-test
"doubled-quote escape"
(st-toks "'a''b'")
(list {:type "string" :value "a'b"}))
;; ── 6. Characters ──
(st-test
"char literal letter"
(st-toks "$a")
(list {:type "char" :value "a"}))
(st-test
"char literal punct"
(st-toks "$$")
(list {:type "char" :value "$"}))
(st-test
"char literal space"
(st-toks "$ ")
(list {:type "char" :value " "}))
;; ── 7. Symbols ──
(st-test
"symbol ident"
(st-toks "#foo")
(list {:type "symbol" :value "foo"}))
(st-test
"symbol binary"
(st-toks "#+")
(list {:type "symbol" :value "+"}))
(st-test
"symbol arrow"
(st-toks "#->")
(list {:type "symbol" :value "->"}))
(st-test
"symbol keyword chain"
(st-toks "#at:put:")
(list {:type "symbol" :value "at:put:"}))
(st-test
"quoted symbol with spaces"
(st-toks "#'foo bar'")
(list {:type "symbol" :value "foo bar"}))
;; ── 8. Literal arrays / byte arrays ──
(st-test
"literal array open"
(st-toks "#(1 2)")
(list
{:type "array-open" :value "#("}
{:type "number" :value 1}
{:type "number" :value 2}
{:type "rparen" :value ")"}))
(st-test
"byte array open"
(st-toks "#[1 2 3]")
(list
{:type "byte-array-open" :value "#["}
{:type "number" :value 1}
{:type "number" :value 2}
{:type "number" :value 3}
{:type "rbracket" :value "]"}))
;; ── 9. Binary selectors ──
(st-test "plus" (st-toks "+") (list {:type "binary" :value "+"}))
(st-test "minus" (st-toks "-") (list {:type "binary" :value "-"}))
(st-test "star" (st-toks "*") (list {:type "binary" :value "*"}))
(st-test "double-equal" (st-toks "==") (list {:type "binary" :value "=="}))
(st-test "leq" (st-toks "<=") (list {:type "binary" :value "<="}))
(st-test "geq" (st-toks ">=") (list {:type "binary" :value ">="}))
(st-test "neq" (st-toks "~=") (list {:type "binary" :value "~="}))
(st-test "arrow" (st-toks "->") (list {:type "binary" :value "->"}))
(st-test "comma" (st-toks ",") (list {:type "binary" :value ","}))
(st-test
"binary in expression"
(st-toks "a + b")
(list
{:type "ident" :value "a"}
{:type "binary" :value "+"}
{:type "ident" :value "b"}))
;; ── 10. Punctuation ──
(st-test "lparen" (st-toks "(") (list {:type "lparen" :value "("}))
(st-test "rparen" (st-toks ")") (list {:type "rparen" :value ")"}))
(st-test "lbracket" (st-toks "[") (list {:type "lbracket" :value "["}))
(st-test "rbracket" (st-toks "]") (list {:type "rbracket" :value "]"}))
(st-test "lbrace" (st-toks "{") (list {:type "lbrace" :value "{"}))
(st-test "rbrace" (st-toks "}") (list {:type "rbrace" :value "}"}))
(st-test "period" (st-toks ".") (list {:type "period" :value "."}))
(st-test "semi" (st-toks ";") (list {:type "semi" :value ";"}))
(st-test "bar" (st-toks "|") (list {:type "bar" :value "|"}))
(st-test "caret" (st-toks "^") (list {:type "caret" :value "^"}))
(st-test "bang" (st-toks "!") (list {:type "bang" :value "!"}))
(st-test "colon" (st-toks ":") (list {:type "colon" :value ":"}))
(st-test "assign" (st-toks ":=") (list {:type "assign" :value ":="}))
;; ── 11. Comments ──
(st-test "comment skipped" (st-toks "\"hello\"") (list))
(st-test
"comment between tokens"
(st-toks "a \"comment\" b")
(list {:type "ident" :value "a"} {:type "ident" :value "b"}))
(st-test
"multi-line comment"
(st-toks "\"line1\nline2\"42")
(list {:type "number" :value 42}))
;; ── 12. Compound expressions ──
(st-test
"block with params"
(st-toks "[:a :b | a + b]")
(list
{:type "lbracket" :value "["}
{:type "colon" :value ":"}
{:type "ident" :value "a"}
{:type "colon" :value ":"}
{:type "ident" :value "b"}
{:type "bar" :value "|"}
{:type "ident" :value "a"}
{:type "binary" :value "+"}
{:type "ident" :value "b"}
{:type "rbracket" :value "]"}))
(st-test
"cascade"
(st-toks "x m1; m2")
(list
{:type "ident" :value "x"}
{:type "ident" :value "m1"}
{:type "semi" :value ";"}
{:type "ident" :value "m2"}))
(st-test
"method body return"
(st-toks "^ self foo")
(list
{:type "caret" :value "^"}
{:type "ident" :value "self"}
{:type "ident" :value "foo"}))
(st-test
"class declaration head"
(st-toks "Object subclass: #Foo")
(list
{:type "ident" :value "Object"}
{:type "keyword" :value "subclass:"}
{:type "symbol" :value "Foo"}))
(st-test
"temp declaration"
(st-toks "| t1 t2 |")
(list
{:type "bar" :value "|"}
{:type "ident" :value "t1"}
{:type "ident" :value "t2"}
{:type "bar" :value "|"}))
(st-test
"chunk separator"
(st-toks "Foo bar !")
(list
{:type "ident" :value "Foo"}
{:type "ident" :value "bar"}
{:type "bang" :value "!"}))
(st-test
"keyword call with binary precedence"
(st-toks "x foo: 1 + 2")
(list
{:type "ident" :value "x"}
{:type "keyword" :value "foo:"}
{:type "number" :value 1}
{:type "binary" :value "+"}
{:type "number" :value 2}))
(list st-test-pass st-test-fail)

366
lib/smalltalk/tokenizer.sx Normal file
View File

@@ -0,0 +1,366 @@
;; Smalltalk tokenizer.
;;
;; Token types:
;; ident identifier (foo, Foo, _x)
;; keyword selector keyword (foo:) — value is "foo:" with the colon
;; binary binary selector chars run together (+, ==, ->, <=, ~=, ...)
;; number integer or float; radix integers like 16rFF supported
;; string 'hello''world' style
;; char $c
;; symbol #foo, #foo:bar:, #+, #'with spaces'
;; array-open #(
;; byte-array-open #[
;; lparen rparen lbracket rbracket lbrace rbrace
;; period semi bar caret colon assign bang
;; eof
;;
;; Comments "…" are skipped.
(define st-make-token (fn (type value pos) {:type type :value value :pos pos}))
(define st-digit? (fn (c) (and (not (= c nil)) (>= c "0") (<= c "9"))))
(define
st-letter?
(fn
(c)
(and
(not (= c nil))
(or (and (>= c "a") (<= c "z")) (and (>= c "A") (<= c "Z"))))))
(define st-ident-start? (fn (c) (or (st-letter? c) (= c "_"))))
(define st-ident-char? (fn (c) (or (st-ident-start? c) (st-digit? c))))
(define st-ws? (fn (c) (or (= c " ") (= c "\t") (= c "\n") (= c "\r"))))
(define
st-binary-chars
(list "+" "-" "*" "/" "\\" "~" "<" ">" "=" "@" "%" "&" "?" ","))
(define
st-binary-char?
(fn (c) (and (not (= c nil)) (contains? st-binary-chars c))))
(define
st-radix-digit?
(fn
(c)
(and
(not (= c nil))
(or (st-digit? c) (and (>= c "A") (<= c "Z"))))))
(define
st-tokenize
(fn
(src)
(let
((tokens (list)) (pos 0) (src-len (len src)))
(define
pk
(fn
(offset)
(if (< (+ pos offset) src-len) (nth src (+ pos offset)) nil)))
(define cur (fn () (pk 0)))
(define advance! (fn (n) (set! pos (+ pos n))))
(define
push!
(fn
(type value start)
(append! tokens (st-make-token type value start))))
(define
skip-comment!
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "\"") (advance! 1))
(else (begin (advance! 1) (skip-comment!))))))
(define
skip-ws!
(fn
()
(cond
((>= pos src-len) nil)
((st-ws? (cur)) (begin (advance! 1) (skip-ws!)))
((= (cur) "\"") (begin (advance! 1) (skip-comment!) (skip-ws!)))
(else nil))))
(define
read-ident-chars!
(fn
()
(when
(and (< pos src-len) (st-ident-char? (cur)))
(begin (advance! 1) (read-ident-chars!)))))
(define
read-decimal-digits!
(fn
()
(when
(and (< pos src-len) (st-digit? (cur)))
(begin (advance! 1) (read-decimal-digits!)))))
(define
read-radix-digits!
(fn
()
(when
(and (< pos src-len) (st-radix-digit? (cur)))
(begin (advance! 1) (read-radix-digits!)))))
(define
read-exp-part!
(fn
()
(when
(and
(< pos src-len)
(or (= (cur) "e") (= (cur) "E"))
(let
((p1 (pk 1)) (p2 (pk 2)))
(or
(st-digit? p1)
(and (or (= p1 "+") (= p1 "-")) (st-digit? p2)))))
(begin
(advance! 1)
(when
(and (< pos src-len) (or (= (cur) "+") (= (cur) "-")))
(advance! 1))
(read-decimal-digits!)))))
(define
read-number
(fn
(start)
(begin
(read-decimal-digits!)
(cond
((and (< pos src-len) (= (cur) "r"))
(let
((base-str (slice src start pos)))
(begin
(advance! 1)
(let
((rstart pos))
(begin
(read-radix-digits!)
(let
((digits (slice src rstart pos)))
{:radix (parse-number base-str)
:digits digits
:value (parse-radix base-str digits)
:kind "radix"}))))))
((and
(< pos src-len)
(= (cur) ".")
(st-digit? (pk 1)))
(begin
(advance! 1)
(read-decimal-digits!)
(read-exp-part!)
(parse-number (slice src start pos))))
(else
(begin
(read-exp-part!)
(parse-number (slice src start pos))))))))
(define
parse-radix
(fn
(base-str digits)
(let
((base (parse-number base-str))
(chars digits)
(n-len (len digits))
(idx 0)
(acc 0))
(begin
(define
rd-loop
(fn
()
(when
(< idx n-len)
(let
((c (nth chars idx)))
(let
((d (cond
((and (>= c "0") (<= c "9")) (- (char-code c) 48))
((and (>= c "A") (<= c "Z")) (- (char-code c) 55))
(else 0))))
(begin
(set! acc (+ (* acc base) d))
(set! idx (+ idx 1))
(rd-loop)))))))
(rd-loop)
acc))))
(define
read-string
(fn
()
(let
((chars (list)))
(begin
(advance! 1)
(define
loop
(fn
()
(cond
((>= pos src-len) nil)
((= (cur) "'")
(cond
((= (pk 1) "'")
(begin
(append! chars "'")
(advance! 2)
(loop)))
(else (advance! 1))))
(else
(begin (append! chars (cur)) (advance! 1) (loop))))))
(loop)
(join "" chars)))))
(define
read-binary-run!
(fn
()
(let
((start pos))
(begin
(define
bin-loop
(fn
()
(when
(and (< pos src-len) (st-binary-char? (cur)))
(begin (advance! 1) (bin-loop)))))
(bin-loop)
(slice src start pos)))))
(define
read-symbol
(fn
(start)
(cond
;; Quoted symbol: #'whatever'
((= (cur) "'")
(let ((s (read-string))) (push! "symbol" s start)))
;; Binary-char symbol: #+, #==, #->, #|
((or (st-binary-char? (cur)) (= (cur) "|"))
(let ((b (read-binary-run!)))
(cond
((= b "")
;; lone | wasn't binary; consume it
(begin (advance! 1) (push! "symbol" "|" start)))
(else (push! "symbol" b start)))))
;; Identifier or keyword chain: #foo, #foo:bar:
((st-ident-start? (cur))
(let ((id-start pos))
(begin
(read-ident-chars!)
(define
kw-loop
(fn
()
(when
(and (< pos src-len) (= (cur) ":"))
(begin
(advance! 1)
(when
(and (< pos src-len) (st-ident-start? (cur)))
(begin (read-ident-chars!) (kw-loop)))))))
(kw-loop)
(push! "symbol" (slice src id-start pos) start))))
(else
(error
(str "st-tokenize: bad symbol at " pos))))))
(define
step
(fn
()
(begin
(skip-ws!)
(when
(< pos src-len)
(let
((start pos) (c (cur)))
(cond
;; Identifier or keyword
((st-ident-start? c)
(begin
(read-ident-chars!)
(let
((word (slice src start pos)))
(cond
;; ident immediately followed by ':' (and not ':=') => keyword
((and
(< pos src-len)
(= (cur) ":")
(not (= (pk 1) "=")))
(begin
(advance! 1)
(push!
"keyword"
(str word ":")
start)))
(else (push! "ident" word start))))
(step)))
;; Number
((st-digit? c)
(let
((v (read-number start)))
(begin (push! "number" v start) (step))))
;; String
((= c "'")
(let
((s (read-string)))
(begin (push! "string" s start) (step))))
;; Character literal
((= c "$")
(cond
((>= (+ pos 1) src-len)
(error (str "st-tokenize: $ at end of input")))
(else
(begin
(advance! 1)
(push! "char" (cur) start)
(advance! 1)
(step)))))
;; Symbol or array literal
((= c "#")
(cond
((= (pk 1) "(")
(begin (advance! 2) (push! "array-open" "#(" start) (step)))
((= (pk 1) "[")
(begin (advance! 2) (push! "byte-array-open" "#[" start) (step)))
(else
(begin (advance! 1) (read-symbol start) (step)))))
;; Assignment := or bare colon
((= c ":")
(cond
((= (pk 1) "=")
(begin (advance! 2) (push! "assign" ":=" start) (step)))
(else
(begin (advance! 1) (push! "colon" ":" start) (step)))))
;; Single-char structural punctuation
((= c "(") (begin (advance! 1) (push! "lparen" "(" start) (step)))
((= c ")") (begin (advance! 1) (push! "rparen" ")" start) (step)))
((= c "[") (begin (advance! 1) (push! "lbracket" "[" start) (step)))
((= c "]") (begin (advance! 1) (push! "rbracket" "]" start) (step)))
((= c "{") (begin (advance! 1) (push! "lbrace" "{" start) (step)))
((= c "}") (begin (advance! 1) (push! "rbrace" "}" start) (step)))
((= c ".") (begin (advance! 1) (push! "period" "." start) (step)))
((= c ";") (begin (advance! 1) (push! "semi" ";" start) (step)))
((= c "|") (begin (advance! 1) (push! "bar" "|" start) (step)))
((= c "^") (begin (advance! 1) (push! "caret" "^" start) (step)))
((= c "!") (begin (advance! 1) (push! "bang" "!" start) (step)))
;; Binary selector run
((st-binary-char? c)
(let
((b (read-binary-run!)))
(begin (push! "binary" b start) (step))))
(else
(error
(str
"st-tokenize: unexpected char "
c
" at "
pos)))))))))
(step)
(push! "eof" nil pos)
tokens)))

View File

@@ -0,0 +1,77 @@
# smalltalk-on-sx loop agent (single agent, queue-driven)
Role: iterates `plans/smalltalk-on-sx.md` forever. Message-passing OO + **blocks with non-local return** on delimited continuations. Non-local return is the headline showcase — every other Smalltalk reinvents it on the host stack; on SX it falls out of the captured method-return continuation.
```
description: smalltalk-on-sx queue loop
subagent_type: general-purpose
run_in_background: true
isolation: worktree
```
## Prompt
You are the sole background agent working `/root/rose-ash/plans/smalltalk-on-sx.md`. Isolated worktree, forever, one commit per feature. Never push.
## Restart baseline — check before iterating
1. Read `plans/smalltalk-on-sx.md` — roadmap + Progress log.
2. `ls lib/smalltalk/` — pick up from the most advanced file.
3. If `lib/smalltalk/tests/*.sx` exist, run them. Green before new work.
4. If `lib/smalltalk/scoreboard.md` exists, that's your baseline.
## The queue
Phase order per `plans/smalltalk-on-sx.md`:
- **Phase 1** — tokenizer + parser (chunk format, identifiers, keywords `foo:`, binary selectors, `#sym`, `#(…)`, `$c`, blocks `[:a | …]`, cascades, message precedence)
- **Phase 2** — object model + sequential eval (class table bootstrap, message dispatch, `super`, `doesNotUnderstand:`, instance variables)
- **Phase 3** — **THE SHOWCASE**: blocks with non-local return via captured method-return continuation. `whileTrue:` / `ifTrue:ifFalse:` as block sends. 5 classic programs (eight-queens, quicksort, mandelbrot, life, fibonacci) green.
- **Phase 4** — reflection + MOP: `perform:`, `respondsTo:`, runtime method addition, `becomeForward:`, `Exception` / `on:do:` / `ensure:` on top of `handler-bind`/`raise`
- **Phase 5** — collections + numeric tower + streams
- **Phase 6** — port SUnit, vendor Pharo Kernel-Tests slice, drive corpus to 200+
- **Phase 7** — speed (optional): inline caching, block intrinsification
Within a phase, pick the checkbox that unlocks the most tests per effort.
Every iteration: implement → test → commit → tick `[ ]` → Progress log → next.
## Ground rules (hard)
- **Scope:** only `lib/smalltalk/**` and `plans/smalltalk-on-sx.md`. Do **not** edit `spec/`, `hosts/`, `shared/`, other `lib/<lang>/` dirs, `lib/stdlib.sx`, or `lib/` root. Smalltalk primitives go in `lib/smalltalk/runtime.sx`.
- **NEVER call `sx_build`.** 600s watchdog. If sx_server binary broken → Blockers entry, stop.
- **Shared-file issues** → plan's Blockers with minimal repro.
- **Delimited continuations** are in `lib/callcc.sx` + `spec/evaluator.sx` Step 5. `sx_summarise` spec/evaluator.sx first — 2300+ lines.
- **SX files:** `sx-tree` MCP tools ONLY. `sx_validate` after edits.
- **Worktree:** commit locally. Never push. Never touch `main`.
- **Commit granularity:** one feature per commit.
- **Plan file:** update Progress log + tick boxes every commit.
## Smalltalk-specific gotchas
- **Method invocation captures `^k`** — the return continuation. Bind it as the block's escape token. `^expr` from inside any nested block invokes that captured `^k`. Escape past method return raises `BlockContext>>cannotReturn:`.
- **Blocks are lambdas + escape token**, not bare lambdas. `value`/`value:`/… invoke the lambda; `^` invokes the escape.
- **`ifTrue:` / `ifFalse:` / `whileTrue:` are ordinary block sends** — no special form. The runtime intrinsifies them in the JIT path (Tier 1 of bytecode expansion already covers this pattern).
- **Cascade** `r m1; m2; m3` desugars to `(let ((tmp r)) (st-send tmp 'm1 ()) (st-send tmp 'm2 ()) (st-send tmp 'm3 ()))`. Result is the cascade's last send (or first, depending on parser variant — pick one and document).
- **`super` send** looks up starting from the *defining* class's superclass, not the receiver class. Stash the defining class on the method record.
- **Selectors are interned symbols.** Use SX symbols.
- **Receiver dispatch:** tagged ints / floats / strings / symbols / `nil` / `true` / `false` aren't boxed. Their classes (`SmallInteger`, `Float`, `String`, `Symbol`, `UndefinedObject`, `True`, `False`) are looked up by SX type-of, not by an `:class` field.
- **Method precedence:** unary > binary > keyword. `3 + 4 factorial` is `3 + (4 factorial)`. `a foo: b bar` is `a foo: (b bar)` (keyword absorbs trailing unary).
- **Image / fileIn / become: between sessions** = out of scope. One-way `becomeForward:` only.
- **Test corpus:** ~200 hand-written + a slice of Pharo Kernel-Tests. Place programs in `lib/smalltalk/tests/programs/`.
## General gotchas (all loops)
- SX `do` = R7RS iteration. Use `begin` for multi-expr sequences.
- `cond`/`when`/`let` clauses evaluate only the last expr.
- `type-of` on user fn returns `"lambda"`.
- Shell heredoc `||` gets eaten — escape or use `case`.
## Style
- No comments in `.sx` unless non-obvious.
- No new planning docs — update `plans/smalltalk-on-sx.md` inline.
- Short, factual commit messages (`smalltalk: tokenizer + 56 tests`).
- One feature per iteration. Commit. Log. Next.
Go. Read the plan; find first `[ ]`; implement.

116
plans/smalltalk-on-sx.md Normal file
View File

@@ -0,0 +1,116 @@
# Smalltalk-on-SX: blocks with non-local return on delimited continuations
The headline showcase is **blocks** — Smalltalk's closures with non-local return (`^expr` aborts the enclosing *method*, not the block). Every other Smalltalk on top of a host VM (RSqueak on PyPy, GemStone on C, Maxine on Java) reinvents non-local return on whatever stack discipline the host gives them. On SX it's a one-liner: a block holds a captured continuation; `^` just invokes it. Message-passing OO falls out cheaply on top of the existing component / dispatch machinery.
End-state goal: ANSI-ish Smalltalk-80 subset, SUnit working, ~200 hand-written tests + a vendored slice of the Pharo kernel tests, classic corpus (eight queens, quicksort, mandelbrot, Conway's Life).
## Scope decisions (defaults — override by editing before we spawn)
- **Syntax:** Pharo / Squeak chunk format (`!` separators, `Object subclass: #Foo …`). No fileIn/fileOut images — text source only.
- **Conformance:** ANSI X3J20 *as a target*, not bug-for-bug Squeak. "Reads like Smalltalk, runs like Smalltalk."
- **Test corpus:** SUnit ported to SX-Smalltalk + custom programs + a curated slice of Pharo `Kernel-Tests` / `Collections-Tests`.
- **Image:** out of scope. Source-only. No `become:` between sessions, no snapshotting.
- **Reflection:** `class`, `respondsTo:`, `perform:`, `doesNotUnderstand:` in. `become:` (object-identity swap) **in** — it's a good CEK exercise. Method modification at runtime in.
- **GUI / Morphic / threads:** out entirely.
## Ground rules
- **Scope:** only touch `lib/smalltalk/**` and `plans/smalltalk-on-sx.md`. Don't edit `spec/`, `hosts/`, `shared/`, or any other `lib/<lang>/**`. Smalltalk primitives go in `lib/smalltalk/runtime.sx`.
- **SX files:** use `sx-tree` MCP tools only.
- **Commits:** one feature per commit. Keep `## Progress log` updated and tick roadmap boxes.
## Architecture sketch
```
Smalltalk source
lib/smalltalk/tokenizer.sx — selectors, keywords, literals, $c, #sym, #(…), $'…'
lib/smalltalk/parser.sx — AST: classes, methods, blocks, cascades, sends
lib/smalltalk/transpile.sx — AST → SX AST (entry: smalltalk-eval-ast)
lib/smalltalk/runtime.sx — class table, MOP, dispatch, primitives
```
Core mapping:
- **Class** = SX dict `{:name :superclass :ivars :methods :class-methods :metaclass}`. Class table is a flat dict keyed by class name.
- **Object** = SX dict `{:class :ivars}``ivars` keyed by symbol. Tagged ints / floats / strings / symbols are not boxed; their class is looked up by SX type.
- **Method** = SX lambda closing over a `self` binding + temps. Body wrapped in a delimited continuation so `^` can escape.
- **Message send** = `(st-send receiver selector args)` — does class-table lookup, walks superclass chain, falls back to `doesNotUnderstand:` with a `Message` object.
- **Block** `[:x | … ^v … ]` = lambda + captured `^k` (the method-return continuation). Invoking `^` calls `k`; outer block invocation past method return raises `BlockContext>>cannotReturn:`.
- **Cascade** `r m1; m2; m3` = `(let ((tmp r)) (st-send tmp 'm1 ()) (st-send tmp 'm2 ()) (st-send tmp 'm3 ()))`.
- **`ifTrue:ifFalse:` / `whileTrue:`** = ordinary block sends; the runtime intrinsifies them in the JIT path so they compile to native branches (Tier 1 of bytecode expansion already covers this pattern).
- **`become:`** = swap two object identities everywhere — in SX this is a heap walk, but we restrict to `oneWayBecome:` (cheap: rewrite class field) by default.
## Roadmap
### Phase 1 — tokenizer + parser
- [x] Tokenizer: identifiers, keywords (`foo:`), binary selectors (`+`, `==`, `,`, `->`, `~=` etc.), numbers (radix `16r1F`; **scaled `1.5s2` deferred**), strings `'…''…'`, characters `$c`, symbols `#foo` `#'foo bar'` `#+`, byte arrays `#[1 2 3]` (open token), literal arrays `#(1 #foo 'x')` (open token), comments `"…"`
- [ ] Parser: chunk format (`! !` separators), class definitions (`Object subclass: #X instanceVariableNames: '…' classVariableNames: '…' …`), method definitions (`extend: #Foo with: 'bar ^self'`), pragmas `<primitive: 1>`, blocks `[:a :b | | t1 t2 | …]`, cascades, message precedence (unary > binary > keyword)
- [ ] Unit tests in `lib/smalltalk/tests/parse.sx`
### Phase 2 — object model + sequential eval
- [ ] Class table + bootstrap: `Object`, `Behavior`, `Class`, `Metaclass`, `UndefinedObject`, `Boolean`/`True`/`False`, `Number`/`Integer`/`Float`, `String`, `Symbol`, `Array`, `Block`
- [ ] `smalltalk-eval-ast`: literals, variable reference, assignment, message send, cascade, sequence, return
- [ ] Method lookup: walk class → superclass; cache hit-class on `(class, selector)`
- [ ] `doesNotUnderstand:` fallback constructing `Message` object
- [ ] `super` send (lookup starts at superclass of *defining* class, not receiver class)
- [ ] 30+ tests in `lib/smalltalk/tests/eval.sx`
### Phase 3 — blocks + non-local return (THE SHOWCASE)
- [ ] Method invocation captures a `^k` (the return continuation) and binds it as the block's escape
- [ ] `^expr` from inside a block invokes that captured `^k`
- [ ] `BlockContext>>value`, `value:`, `value:value:`, …, `valueWithArguments:`
- [ ] `whileTrue:` / `whileTrue` / `whileFalse:` / `whileFalse` as ordinary block sends — runtime intrinsifies the loop in the bytecode JIT
- [ ] `ifTrue:` / `ifFalse:` / `ifTrue:ifFalse:` as block sends, similarly intrinsified
- [ ] Escape past returned-from method raises `BlockContext>>cannotReturn:`
- [ ] Classic programs in `lib/smalltalk/tests/programs/`:
- [ ] `eight-queens.st`
- [ ] `quicksort.st`
- [ ] `mandelbrot.st`
- [ ] `life.st` (Conway's Life, glider gun)
- [ ] `fibonacci.st` (recursive + memoised)
- [ ] `lib/smalltalk/conformance.sh` + runner, `scoreboard.json` + `scoreboard.md`
### Phase 4 — reflection + MOP
- [ ] `Object>>class`, `class>>name`, `class>>superclass`, `class>>methodDict`, `class>>selectors`
- [ ] `Object>>perform:` / `perform:with:` / `perform:withArguments:`
- [ ] `Object>>respondsTo:`, `Object>>isKindOf:`, `Object>>isMemberOf:`
- [ ] `Behavior>>compile:` — runtime method addition
- [ ] `Object>>becomeForward:` (one-way become; rewrites the class field of `aReceiver`)
- [ ] Exceptions: `Exception`, `Error`, `signal`, `signal:`, `on:do:`, `ensure:`, `ifCurtailed:` — built on top of SX `handler-bind`/`raise`
### Phase 5 — collections + numeric tower
- [ ] `SequenceableCollection`/`OrderedCollection`/`Array`/`String`/`Symbol`
- [ ] `HashedCollection`/`Set`/`Dictionary`/`IdentityDictionary`
- [ ] `Stream` hierarchy: `ReadStream`/`WriteStream`/`ReadWriteStream`
- [ ] `Number` tower: `SmallInteger`/`LargePositiveInteger`/`Float`/`Fraction`
- [ ] `String>>format:`, `printOn:` for everything
### Phase 6 — SUnit + corpus to 200+
- [ ] Port SUnit (TestCase, TestSuite, TestResult) — written in SX-Smalltalk, runs in itself
- [ ] Vendor a slice of Pharo `Kernel-Tests` and `Collections-Tests`
- [ ] Drive the scoreboard up: aim for 200+ green tests
- [ ] Stretch: ANSI Smalltalk validator subset
### Phase 7 — speed (optional)
- [ ] Method-dictionary inline caching (already in CEK as a primitive; just wire selector cache)
- [ ] Block intrinsification beyond `whileTrue:` / `ifTrue:`
- [ ] Compare against GNU Smalltalk on the corpus
## Progress log
_Newest first. Agent appends on every commit._
- 2026-04-25: tokenizer + 63 tests (`lib/smalltalk/tokenizer.sx`, `lib/smalltalk/tests/tokenize.sx`, `lib/smalltalk/test.sh`). All token types covered except scaled decimals `1.5s2` (deferred). `#(` and `#[` emit open tokens; literal-array contents lexed as ordinary tokens for the parser to interpret.
## Blockers
_Shared-file issues that need someone else to fix. Minimal repro only._
- _(none yet)_

View File

@@ -30,7 +30,7 @@ fi
if [ "$CLEAN" = "1" ]; then
cd "$(dirname "$0")/.."
for lang in lua prolog forth erlang haskell js hs; do
for lang in lua prolog forth erlang haskell js hs smalltalk; do
wt="$WORKTREE_BASE/$lang"
if [ -d "$wt" ]; then
git worktree remove --force "$wt" 2>/dev/null || rm -rf "$wt"
@@ -39,5 +39,5 @@ if [ "$CLEAN" = "1" ]; then
done
git worktree prune
echo "Worktree branches (loops/<lang>) are preserved. Delete manually if desired:"
echo " git branch -D loops/lua loops/prolog loops/forth loops/erlang loops/haskell loops/js loops/hs"
echo " git branch -D loops/lua loops/prolog loops/forth loops/erlang loops/haskell loops/js loops/hs loops/smalltalk"
fi

View File

@@ -1,5 +1,5 @@
#!/usr/bin/env bash
# Spawn 7 claude sessions in tmux, one per language loop.
# Spawn 8 claude sessions in tmux, one per language loop.
# Each runs in its own git worktree rooted at /root/rose-ash-loops/<lang>,
# on branch loops/<lang>. No two loops share a working tree, so there's
# zero risk of file collisions between languages.
@@ -9,7 +9,7 @@
#
# After the script prints done:
# tmux a -t sx-loops
# Ctrl-B + <window-number> to switch (0=lua ... 6=hs)
# Ctrl-B + <window-number> to switch (0=lua ... 7=smalltalk)
# Ctrl-B + d to detach (loops keep running, SSH-safe)
#
# Stop: ./scripts/sx-loops-down.sh
@@ -38,8 +38,9 @@ declare -A BRIEFING=(
[haskell]=haskell-loop.md
[js]=loop.md
[hs]=hs-loop.md
[smalltalk]=smalltalk-loop.md
)
ORDER=(lua prolog forth erlang haskell js hs)
ORDER=(lua prolog forth erlang haskell js hs smalltalk)
mkdir -p "$WORKTREE_BASE"
@@ -66,7 +67,7 @@ for lang in "${ORDER[@]:1}"; do
tmux new-window -t "$SESSION" -n "$lang" -c "$WORKTREE_BASE/$lang"
done
echo "Starting 7 claude sessions..."
echo "Starting 8 claude sessions..."
for lang in "${ORDER[@]}"; do
tmux send-keys -t "$SESSION:$lang" "claude" C-m
done
@@ -89,10 +90,10 @@ for lang in "${ORDER[@]}"; do
done
echo ""
echo "Done. 7 loops started in tmux session '$SESSION', each in its own worktree."
echo "Done. 8 loops started in tmux session '$SESSION', each in its own worktree."
echo ""
echo " Attach: tmux a -t $SESSION"
echo " Switch: Ctrl-B <0..6> (0=lua 1=prolog 2=forth 3=erlang 4=haskell 5=js 6=hs)"
echo " Switch: Ctrl-B <0..7> (0=lua 1=prolog 2=forth 3=erlang 4=haskell 5=js 6=hs 7=smalltalk)"
echo " List: Ctrl-B w"
echo " Detach: Ctrl-B d"
echo " Stop: ./scripts/sx-loops-down.sh"