From 33ce994f234325c14214735aca2adb3f94ca4826 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 25 Apr 2026 00:46:03 +0000 Subject: [PATCH] smalltalk: expression parser + 47 parse tests --- lib/smalltalk/parser.sx | 627 +++++++++++++++++++++++++++++++++++ lib/smalltalk/test.sh | 46 ++- lib/smalltalk/tests/parse.sx | 365 ++++++++++++++++++++ plans/smalltalk-on-sx.md | 6 +- 4 files changed, 1036 insertions(+), 8 deletions(-) create mode 100644 lib/smalltalk/parser.sx create mode 100644 lib/smalltalk/tests/parse.sx diff --git a/lib/smalltalk/parser.sx b/lib/smalltalk/parser.sx new file mode 100644 index 00000000..657a854e --- /dev/null +++ b/lib/smalltalk/parser.sx @@ -0,0 +1,627 @@ +;; Smalltalk parser — produces an AST from the tokenizer's token stream. +;; +;; AST node shapes (dicts): +;; {:type "lit-int" :value N} integer +;; {:type "lit-float" :value F} float +;; {:type "lit-string" :value S} string +;; {:type "lit-char" :value C} character +;; {:type "lit-symbol" :value S} symbol literal (#foo) +;; {:type "lit-array" :elements (list ...)} literal array (#(1 2 #foo)) +;; {:type "lit-byte-array" :elements (...)} byte array (#[1 2 3]) +;; {:type "lit-nil" } / "lit-true" / "lit-false" +;; {:type "ident" :name "x"} variable reference +;; {:type "self"} / "super" / "thisContext" pseudo-variables +;; {:type "assign" :name "x" :expr E} x := E +;; {:type "return" :expr E} ^ E +;; {:type "send" :receiver R :selector S :args (list ...)} +;; {:type "cascade" :receiver R :messages (list {:selector :args} ...)} +;; {:type "block" :params (list "a") :temps (list "t") :body (list expr)} +;; {:type "seq" :exprs (list ...)} statement sequence +;; {:type "method" :selector S :params (list ...) :temps (list ...) :body (list ...) :pragmas (list ...)} +;; +;; A "chunk" / class-definition stream is parsed at a higher level (deferred). + +(define st-tok-type (fn (t) (if (= t nil) "eof" (get t :type)))) + +(define st-tok-value (fn (t) (if (= t nil) nil (get t :value)))) + +;; Parse a *single* Smalltalk expression from source. +(define st-parse-expr (fn (src) (st-parse-with src "expr"))) + +;; Parse a sequence of statements separated by '.' Returns a {:type "seq"} node. +(define st-parse (fn (src) (st-parse-with src "seq"))) + +;; Parse a method body — `selector params | temps | body`. +;; Only the "method header + body" form (no chunk delimiters). +(define st-parse-method (fn (src) (st-parse-with src "method"))) + +(define + st-parse-with + (fn + (src mode) + (let + ((tokens (st-tokenize src)) (idx 0) (tok-len 0)) + (begin + (set! tok-len (len tokens)) + (define peek-tok (fn () (nth tokens idx))) + (define + peek-tok-at + (fn (n) (if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil))) + (define advance-tok! (fn () (set! idx (+ idx 1)))) + (define + at? + (fn + (type value) + (let + ((t (peek-tok))) + (and + (= (st-tok-type t) type) + (or (= value nil) (= (st-tok-value t) value)))))) + (define at-type? (fn (type) (= (st-tok-type (peek-tok)) type))) + (define + consume! + (fn + (type value) + (if + (at? type value) + (let ((t (peek-tok))) (begin (advance-tok!) t)) + (error + (str + "st-parse: expected " + type + (if (= value nil) "" (str " '" value "'")) + " got " + (st-tok-type (peek-tok)) + " '" + (st-tok-value (peek-tok)) + "' at idx " + idx))))) + + ;; ── Primary: atoms, paren'd expr, blocks, literal arrays, byte arrays. + (define + parse-primary + (fn + () + (let + ((t (peek-tok))) + (let + ((ty (st-tok-type t)) (v (st-tok-value t))) + (cond + ((= ty "number") + (begin + (advance-tok!) + (cond + ((number? v) {:type (if (integer? v) "lit-int" "lit-float") :value v}) + (else {:type "lit-int" :value v})))) + ((= ty "string") + (begin (advance-tok!) {:type "lit-string" :value v})) + ((= ty "char") + (begin (advance-tok!) {:type "lit-char" :value v})) + ((= ty "symbol") + (begin (advance-tok!) {:type "lit-symbol" :value v})) + ((= ty "array-open") (parse-literal-array)) + ((= ty "byte-array-open") (parse-byte-array)) + ((= ty "lparen") + (begin + (advance-tok!) + (let + ((e (parse-expression))) + (begin (consume! "rparen" nil) e)))) + ((= ty "lbracket") (parse-block)) + ((= ty "ident") + (begin + (advance-tok!) + (cond + ((= v "nil") {:type "lit-nil"}) + ((= v "true") {:type "lit-true"}) + ((= v "false") {:type "lit-false"}) + ((= v "self") {:type "self"}) + ((= v "super") {:type "super"}) + ((= v "thisContext") {:type "thisContext"}) + (else {:type "ident" :name v})))) + ((= ty "binary") + ;; Negative numeric literal: '-' immediately before a number. + (cond + ((and (= v "-") (= (st-tok-type (peek-tok-at 1)) "number")) + (let + ((n (st-tok-value (peek-tok-at 1)))) + (begin + (advance-tok!) + (advance-tok!) + (cond + ((dict? n) {:type "lit-int" :value n}) + ((integer? n) {:type "lit-int" :value (- 0 n)}) + (else {:type "lit-float" :value (- 0 n)}))))) + (else + (error + (str "st-parse: unexpected binary '" v "' at idx " idx))))) + (else + (error + (str + "st-parse: unexpected " + ty + " '" + v + "' at idx " + idx)))))))) + + ;; #(elem elem ...) — elements are atoms or nested parenthesised arrays. + (define + parse-literal-array + (fn + () + (let + ((items (list))) + (begin + (consume! "array-open" nil) + (define + arr-loop + (fn + () + (cond + ((at? "rparen" nil) (advance-tok!)) + (else + (begin + (append! items (parse-array-element)) + (arr-loop)))))) + (arr-loop) + {:type "lit-array" :elements items})))) + + ;; #[1 2 3] + (define + parse-byte-array + (fn + () + (let + ((items (list))) + (begin + (consume! "byte-array-open" nil) + (define + ba-loop + (fn + () + (cond + ((at? "rbracket" nil) (advance-tok!)) + (else + (let + ((t (peek-tok))) + (cond + ((= (st-tok-type t) "number") + (begin + (advance-tok!) + (append! items (st-tok-value t)) + (ba-loop))) + (else + (error + (str + "st-parse: byte array expects number, got " + (st-tok-type t)))))))))) + (ba-loop) + {:type "lit-byte-array" :elements items})))) + + ;; Inside a literal array: bare idents become symbols, nested (...) is a sub-array. + (define + parse-array-element + (fn + () + (let + ((t (peek-tok))) + (let + ((ty (st-tok-type t)) (v (st-tok-value t))) + (cond + ((= ty "number") (begin (advance-tok!) {:type "lit-int" :value v})) + ((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v})) + ((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v})) + ((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v})) + ((= ty "ident") + (begin + (advance-tok!) + (cond + ((= v "nil") {:type "lit-nil"}) + ((= v "true") {:type "lit-true"}) + ((= v "false") {:type "lit-false"}) + (else {:type "lit-symbol" :value v})))) + ((= ty "keyword") (begin (advance-tok!) {:type "lit-symbol" :value v})) + ((= ty "binary") (begin (advance-tok!) {:type "lit-symbol" :value v})) + ((= ty "lparen") + (let ((items (list))) + (begin + (advance-tok!) + (define + sub-loop + (fn + () + (cond + ((at? "rparen" nil) (advance-tok!)) + (else + (begin (append! items (parse-array-element)) (sub-loop)))))) + (sub-loop) + {:type "lit-array" :elements items}))) + ((= ty "array-open") (parse-literal-array)) + ((= ty "byte-array-open") (parse-byte-array)) + (else + (error + (str "st-parse: bad literal-array element " ty " '" v "'")))))))) + + ;; [:a :b | | t1 t2 | body. body. ...] + (define + parse-block + (fn + () + (begin + (consume! "lbracket" nil) + (let + ((params (list)) (temps (list))) + (begin + ;; Block params + (define + p-loop + (fn + () + (when + (at? "colon" nil) + (begin + (advance-tok!) + (let + ((t (consume! "ident" nil))) + (begin + (append! params (st-tok-value t)) + (p-loop))))))) + (p-loop) + (when (> (len params) 0) (consume! "bar" nil)) + ;; Block temps: | t1 t2 | + (when + (and + (at? "bar" nil) + ;; Not `|` followed immediately by binary content — the only + ;; legitimate `|` inside a block here is the temp delimiter. + true) + (begin + (advance-tok!) + (define + t-loop + (fn + () + (when + (at? "ident" nil) + (let + ((t (peek-tok))) + (begin + (advance-tok!) + (append! temps (st-tok-value t)) + (t-loop)))))) + (t-loop) + (consume! "bar" nil))) + ;; Body: statements terminated by `.` or `]` + (let + ((body (parse-statements "rbracket"))) + (begin + (consume! "rbracket" nil) + {:type "block" :params params :temps temps :body body}))))))) + + ;; Parse statements up to a closing token (rbracket or eof). Returns list. + (define + parse-statements + (fn + (terminator) + (let + ((stmts (list))) + (begin + (define + s-loop + (fn + () + (cond + ((at-type? terminator) nil) + ((at-type? "eof") nil) + (else + (begin + (append! stmts (parse-statement)) + ;; consume optional period(s) + (define + dot-loop + (fn + () + (when + (at? "period" nil) + (begin (advance-tok!) (dot-loop))))) + (dot-loop) + (s-loop)))))) + (s-loop) + stmts)))) + + ;; Statement: ^expr | ident := expr | expr + (define + parse-statement + (fn + () + (cond + ((at? "caret" nil) + (begin + (advance-tok!) + {:type "return" :expr (parse-expression)})) + ((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign")) + (let + ((name-tok (peek-tok))) + (begin + (advance-tok!) + (advance-tok!) + {:type "assign" + :name (st-tok-value name-tok) + :expr (parse-expression)}))) + (else (parse-expression))))) + + ;; Top-level expression. Assignment (right-associative chain) sits at + ;; the top; cascade is below. + (define + parse-expression + (fn + () + (cond + ((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign")) + (let + ((name-tok (peek-tok))) + (begin + (advance-tok!) + (advance-tok!) + {:type "assign" + :name (st-tok-value name-tok) + :expr (parse-expression)}))) + (else (parse-cascade))))) + + (define + parse-cascade + (fn + () + (let + ((head (parse-keyword-message))) + (cond + ((at? "semi" nil) + (let + ((receiver (cascade-receiver head)) + (first-msg (cascade-first-message head)) + (msgs (list))) + (begin + (append! msgs first-msg) + (define + c-loop + (fn + () + (when + (at? "semi" nil) + (begin + (advance-tok!) + (append! msgs (parse-cascade-message)) + (c-loop))))) + (c-loop) + {:type "cascade" :receiver receiver :messages msgs}))) + (else head))))) + + ;; Extract the receiver from a head send so cascades share it. + (define + cascade-receiver + (fn + (head) + (cond + ((= (get head :type) "send") (get head :receiver)) + (else head)))) + + (define + cascade-first-message + (fn + (head) + (cond + ((= (get head :type) "send") + {:selector (get head :selector) :args (get head :args)}) + (else + ;; Shouldn't happen — cascade requires at least one prior message. + (error "st-parse: cascade with no prior message"))))) + + ;; Subsequent cascade message (after the `;`): unary | binary | keyword + (define + parse-cascade-message + (fn + () + (cond + ((at-type? "ident") + (let ((t (peek-tok))) + (begin + (advance-tok!) + {:selector (st-tok-value t) :args (list)}))) + ((at-type? "binary") + (let ((t (peek-tok))) + (begin + (advance-tok!) + (let + ((arg (parse-unary-message))) + {:selector (st-tok-value t) :args (list arg)})))) + ((at-type? "keyword") + (let + ((sel-parts (list)) (args (list))) + (begin + (define + kw-loop + (fn + () + (when + (at-type? "keyword") + (let ((t (peek-tok))) + (begin + (advance-tok!) + (append! sel-parts (st-tok-value t)) + (append! args (parse-binary-message)) + (kw-loop)))))) + (kw-loop) + {:selector (join "" sel-parts) :args args}))) + (else + (error + (str "st-parse: bad cascade message at idx " idx)))))) + + ;; Keyword message: (kw )+ + (define + parse-keyword-message + (fn + () + (let + ((receiver (parse-binary-message))) + (cond + ((at-type? "keyword") + (let + ((sel-parts (list)) (args (list))) + (begin + (define + kw-loop + (fn + () + (when + (at-type? "keyword") + (let ((t (peek-tok))) + (begin + (advance-tok!) + (append! sel-parts (st-tok-value t)) + (append! args (parse-binary-message)) + (kw-loop)))))) + (kw-loop) + {:type "send" + :receiver receiver + :selector (join "" sel-parts) + :args args}))) + (else receiver))))) + + ;; Binary message: (binop )* + (define + parse-binary-message + (fn + () + (let + ((receiver (parse-unary-message))) + (begin + (define + b-loop + (fn + () + (when + (at-type? "binary") + (let ((t (peek-tok))) + (begin + (advance-tok!) + (let + ((arg (parse-unary-message))) + (set! + receiver + {:type "send" + :receiver receiver + :selector (st-tok-value t) + :args (list arg)})) + (b-loop)))))) + (b-loop) + receiver)))) + + ;; Unary message: ident* (ident NOT followed by ':') + (define + parse-unary-message + (fn + () + (let + ((receiver (parse-primary))) + (begin + (define + u-loop + (fn + () + (when + (and + (at-type? "ident") + (let + ((nxt (peek-tok-at 1))) + (not (= (st-tok-type nxt) "assign")))) + (let ((t (peek-tok))) + (begin + (advance-tok!) + (set! + receiver + {:type "send" + :receiver receiver + :selector (st-tok-value t) + :args (list)}) + (u-loop)))))) + (u-loop) + receiver)))) + + ;; Method header: unary | binary arg | (kw arg)+ + (define + parse-method + (fn + () + (let + ((sel "") (params (list)) (temps (list)) (body (list))) + (begin + (cond + ;; Unary header + ((at-type? "ident") + (let ((t (peek-tok))) + (begin (advance-tok!) (set! sel (st-tok-value t))))) + ;; Binary header: binop ident + ((at-type? "binary") + (let ((t (peek-tok))) + (begin + (advance-tok!) + (set! sel (st-tok-value t)) + (let ((p (consume! "ident" nil))) + (append! params (st-tok-value p)))))) + ;; Keyword header: (kw ident)+ + ((at-type? "keyword") + (let ((sel-parts (list))) + (begin + (define + kh-loop + (fn + () + (when + (at-type? "keyword") + (let ((t (peek-tok))) + (begin + (advance-tok!) + (append! sel-parts (st-tok-value t)) + (let ((p (consume! "ident" nil))) + (append! params (st-tok-value p))) + (kh-loop)))))) + (kh-loop) + (set! sel (join "" sel-parts))))) + (else + (error + (str + "st-parse-method: expected selector header, got " + (st-tok-type (peek-tok)))))) + ;; Optional temps: | t1 t2 | + (when + (at? "bar" nil) + (begin + (advance-tok!) + (define + th-loop + (fn + () + (when + (at-type? "ident") + (let ((t (peek-tok))) + (begin + (advance-tok!) + (append! temps (st-tok-value t)) + (th-loop)))))) + (th-loop) + (consume! "bar" nil))) + ;; Body statements + (set! body (parse-statements "eof")) + {:type "method" + :selector sel + :params params + :temps temps + :body body})))) + + ;; Top-level program: statements separated by '.' + (cond + ((= mode "expr") (parse-expression)) + ((= mode "method") (parse-method)) + (else + {:type "seq" :exprs (parse-statements "eof")})))))) diff --git a/lib/smalltalk/test.sh b/lib/smalltalk/test.sh index 3f6bf531..90c01854 100755 --- a/lib/smalltalk/test.sh +++ b/lib/smalltalk/test.sh @@ -31,6 +31,8 @@ for arg in "$@"; do done if [ ${#FILES[@]} -eq 0 ]; then + # tokenize.sx must load first — it defines the st-test helpers reused by + # subsequent test files. Sort enforces this lexicographically. mapfile -t FILES < <(find lib/smalltalk/tests -maxdepth 2 -name '*.sx' | sort) fi @@ -41,7 +43,8 @@ FAILED_FILES=() for FILE in "${FILES[@]}"; do [ -f "$FILE" ] || { echo "skip $FILE (not found)"; continue; } TMPFILE=$(mktemp) - cat > "$TMPFILE" < "$TMPFILE" < "$TMPFILE" <&1 || true) rm -f "$TMPFILE" - LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}') + # Final epoch's value: either (ok N (P F)) on one line or + # (ok-len N M)\n(P F) where the value is on the following line. + LINE=$(echo "$OUTPUT" | awk '/^\(ok-len [0-9]+ / {getline; print}' | tail -1) if [ -z "$LINE" ]; then - LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \ - | sed -E 's/^\(ok 3 //; s/\)$//') + LINE=$(echo "$OUTPUT" | grep -E '^\(ok [0-9]+ \([0-9]+ [0-9]+\)\)' | tail -1 \ + | sed -E 's/^\(ok [0-9]+ //; s/\)$//') fi if [ -z "$LINE" ]; then echo "X $FILE: could not extract summary" @@ -73,7 +92,8 @@ EPOCHS FAILED_FILES+=("$FILE") printf 'X %-40s %d/%d\n' "$FILE" "$P" "$((P+F))" TMPFILE2=$(mktemp) - cat > "$TMPFILE2" < "$TMPFILE2" <&1 | grep -E '^\(ok 3 ' || true) + else + cat > "$TMPFILE2" <&1 | grep -E '^\(ok [0-9]+ \(' | tail -1 || true) rm -f "$TMPFILE2" echo " $FAILS" elif [ "$VERBOSE" = "1" ]; then diff --git a/lib/smalltalk/tests/parse.sx b/lib/smalltalk/tests/parse.sx new file mode 100644 index 00000000..edf3419a --- /dev/null +++ b/lib/smalltalk/tests/parse.sx @@ -0,0 +1,365 @@ +;; Smalltalk parser tests. +;; +;; Reuses helpers (st-test, st-deep=?) from tokenize.sx. Counters reset +;; here so this file's summary covers parse tests only. + +(set! st-test-pass 0) +(set! st-test-fail 0) +(set! st-test-fails (list)) + +;; ── 1. Atoms ── +(st-test "int" (st-parse-expr "42") {:type "lit-int" :value 42}) +(st-test "float" (st-parse-expr "3.14") {:type "lit-float" :value 3.14}) +(st-test "string" (st-parse-expr "'hi'") {:type "lit-string" :value "hi"}) +(st-test "char" (st-parse-expr "$x") {:type "lit-char" :value "x"}) +(st-test "symbol" (st-parse-expr "#foo") {:type "lit-symbol" :value "foo"}) +(st-test "binary symbol" (st-parse-expr "#+") {:type "lit-symbol" :value "+"}) +(st-test "keyword symbol" (st-parse-expr "#at:put:") {:type "lit-symbol" :value "at:put:"}) +(st-test "nil" (st-parse-expr "nil") {:type "lit-nil"}) +(st-test "true" (st-parse-expr "true") {:type "lit-true"}) +(st-test "false" (st-parse-expr "false") {:type "lit-false"}) +(st-test "self" (st-parse-expr "self") {:type "self"}) +(st-test "super" (st-parse-expr "super") {:type "super"}) +(st-test "ident" (st-parse-expr "x") {:type "ident" :name "x"}) +(st-test "negative int" (st-parse-expr "-3") {:type "lit-int" :value -3}) + +;; ── 2. Literal arrays ── +(st-test + "literal array of ints" + (st-parse-expr "#(1 2 3)") + {:type "lit-array" + :elements (list + {:type "lit-int" :value 1} + {:type "lit-int" :value 2} + {:type "lit-int" :value 3})}) + +(st-test + "literal array mixed" + (st-parse-expr "#(1 #foo 'x' true)") + {:type "lit-array" + :elements (list + {:type "lit-int" :value 1} + {:type "lit-symbol" :value "foo"} + {:type "lit-string" :value "x"} + {:type "lit-true"})}) + +(st-test + "literal array bare ident is symbol" + (st-parse-expr "#(foo bar)") + {:type "lit-array" + :elements (list + {:type "lit-symbol" :value "foo"} + {:type "lit-symbol" :value "bar"})}) + +(st-test + "nested literal array" + (st-parse-expr "#(1 (2 3) 4)") + {:type "lit-array" + :elements (list + {:type "lit-int" :value 1} + {:type "lit-array" + :elements (list + {:type "lit-int" :value 2} + {:type "lit-int" :value 3})} + {:type "lit-int" :value 4})}) + +(st-test + "byte array" + (st-parse-expr "#[1 2 3]") + {:type "lit-byte-array" :elements (list 1 2 3)}) + +;; ── 3. Unary messages ── +(st-test + "unary single" + (st-parse-expr "x foo") + {:type "send" + :receiver {:type "ident" :name "x"} + :selector "foo" + :args (list)}) + +(st-test + "unary chain" + (st-parse-expr "x foo bar baz") + {:type "send" + :receiver {:type "send" + :receiver {:type "send" + :receiver {:type "ident" :name "x"} + :selector "foo" + :args (list)} + :selector "bar" + :args (list)} + :selector "baz" + :args (list)}) + +(st-test + "unary on literal" + (st-parse-expr "42 printNl") + {:type "send" + :receiver {:type "lit-int" :value 42} + :selector "printNl" + :args (list)}) + +;; ── 4. Binary messages ── +(st-test + "binary single" + (st-parse-expr "1 + 2") + {:type "send" + :receiver {:type "lit-int" :value 1} + :selector "+" + :args (list {:type "lit-int" :value 2})}) + +(st-test + "binary left-assoc" + (st-parse-expr "1 + 2 + 3") + {:type "send" + :receiver {:type "send" + :receiver {:type "lit-int" :value 1} + :selector "+" + :args (list {:type "lit-int" :value 2})} + :selector "+" + :args (list {:type "lit-int" :value 3})}) + +(st-test + "binary same precedence l-to-r" + (st-parse-expr "1 + 2 * 3") + {:type "send" + :receiver {:type "send" + :receiver {:type "lit-int" :value 1} + :selector "+" + :args (list {:type "lit-int" :value 2})} + :selector "*" + :args (list {:type "lit-int" :value 3})}) + +;; ── 5. Precedence: unary binds tighter than binary ── +(st-test + "unary tighter than binary" + (st-parse-expr "3 + 4 factorial") + {:type "send" + :receiver {:type "lit-int" :value 3} + :selector "+" + :args (list + {:type "send" + :receiver {:type "lit-int" :value 4} + :selector "factorial" + :args (list)})}) + +;; ── 6. Keyword messages ── +(st-test + "keyword single" + (st-parse-expr "x at: 1") + {:type "send" + :receiver {:type "ident" :name "x"} + :selector "at:" + :args (list {:type "lit-int" :value 1})}) + +(st-test + "keyword chain" + (st-parse-expr "x at: 1 put: 'a'") + {:type "send" + :receiver {:type "ident" :name "x"} + :selector "at:put:" + :args (list {:type "lit-int" :value 1} {:type "lit-string" :value "a"})}) + +;; ── 7. Precedence: binary tighter than keyword ── +(st-test + "binary tighter than keyword" + (st-parse-expr "x at: 1 + 2") + {:type "send" + :receiver {:type "ident" :name "x"} + :selector "at:" + :args (list + {:type "send" + :receiver {:type "lit-int" :value 1} + :selector "+" + :args (list {:type "lit-int" :value 2})})}) + +(st-test + "keyword absorbs trailing unary" + (st-parse-expr "a foo: b bar") + {:type "send" + :receiver {:type "ident" :name "a"} + :selector "foo:" + :args (list + {:type "send" + :receiver {:type "ident" :name "b"} + :selector "bar" + :args (list)})}) + +;; ── 8. Parens override precedence ── +(st-test + "paren forces grouping" + (st-parse-expr "(1 + 2) * 3") + {:type "send" + :receiver {:type "send" + :receiver {:type "lit-int" :value 1} + :selector "+" + :args (list {:type "lit-int" :value 2})} + :selector "*" + :args (list {:type "lit-int" :value 3})}) + +;; ── 9. Cascade ── +(st-test + "simple cascade" + (st-parse-expr "x m1; m2") + {:type "cascade" + :receiver {:type "ident" :name "x"} + :messages (list + {:selector "m1" :args (list)} + {:selector "m2" :args (list)})}) + +(st-test + "cascade with binary and keyword" + (st-parse-expr "Stream new nl; tab; print: 1") + {:type "cascade" + :receiver {:type "send" + :receiver {:type "ident" :name "Stream"} + :selector "new" + :args (list)} + :messages (list + {:selector "nl" :args (list)} + {:selector "tab" :args (list)} + {:selector "print:" :args (list {:type "lit-int" :value 1})})}) + +;; ── 10. Blocks ── +(st-test + "empty block" + (st-parse-expr "[]") + {:type "block" :params (list) :temps (list) :body (list)}) + +(st-test + "block one expr" + (st-parse-expr "[1 + 2]") + {:type "block" + :params (list) + :temps (list) + :body (list + {:type "send" + :receiver {:type "lit-int" :value 1} + :selector "+" + :args (list {:type "lit-int" :value 2})})}) + +(st-test + "block with params" + (st-parse-expr "[:a :b | a + b]") + {:type "block" + :params (list "a" "b") + :temps (list) + :body (list + {:type "send" + :receiver {:type "ident" :name "a"} + :selector "+" + :args (list {:type "ident" :name "b"})})}) + +(st-test + "block with temps" + (st-parse-expr "[| t | t := 1. t]") + {:type "block" + :params (list) + :temps (list "t") + :body (list + {:type "assign" :name "t" :expr {:type "lit-int" :value 1}} + {:type "ident" :name "t"})}) + +(st-test + "block with params and temps" + (st-parse-expr "[:x | | t | t := x + 1. t]") + {:type "block" + :params (list "x") + :temps (list "t") + :body (list + {:type "assign" + :name "t" + :expr {:type "send" + :receiver {:type "ident" :name "x"} + :selector "+" + :args (list {:type "lit-int" :value 1})}} + {:type "ident" :name "t"})}) + +;; ── 11. Assignment / return / statements ── +(st-test + "assignment" + (st-parse-expr "x := 1") + {:type "assign" :name "x" :expr {:type "lit-int" :value 1}}) + +(st-test + "return" + (st-parse-expr "1") + {:type "lit-int" :value 1}) + +(st-test + "return statement at top level" + (st-parse "^ 1") + {:type "seq" + :exprs (list {:type "return" :expr {:type "lit-int" :value 1}})}) + +(st-test + "two statements" + (st-parse "x := 1. y := 2") + {:type "seq" + :exprs (list + {:type "assign" :name "x" :expr {:type "lit-int" :value 1}} + {:type "assign" :name "y" :expr {:type "lit-int" :value 2}})}) + +(st-test + "trailing dot allowed" + (st-parse "1. 2.") + {:type "seq" + :exprs (list {:type "lit-int" :value 1} {:type "lit-int" :value 2})}) + +;; ── 12. Method headers ── +(st-test + "unary method" + (st-parse-method "factorial ^ self * (self - 1) factorial") + {:type "method" + :selector "factorial" + :params (list) + :temps (list) + :body (list + {:type "return" + :expr {:type "send" + :receiver {:type "self"} + :selector "*" + :args (list + {:type "send" + :receiver {:type "send" + :receiver {:type "self"} + :selector "-" + :args (list {:type "lit-int" :value 1})} + :selector "factorial" + :args (list)})}})}) + +(st-test + "binary method" + (st-parse-method "+ other ^ 'plus'") + {:type "method" + :selector "+" + :params (list "other") + :temps (list) + :body (list {:type "return" :expr {:type "lit-string" :value "plus"}})}) + +(st-test + "keyword method" + (st-parse-method "at: i put: v ^ v") + {:type "method" + :selector "at:put:" + :params (list "i" "v") + :temps (list) + :body (list {:type "return" :expr {:type "ident" :name "v"}})}) + +(st-test + "method with temps" + (st-parse-method "twice: x | t | t := x + x. ^ t") + {:type "method" + :selector "twice:" + :params (list "x") + :temps (list "t") + :body (list + {:type "assign" + :name "t" + :expr {:type "send" + :receiver {:type "ident" :name "x"} + :selector "+" + :args (list {:type "ident" :name "x"})}} + {:type "return" :expr {:type "ident" :name "t"}})}) + +(list st-test-pass st-test-fail) diff --git a/plans/smalltalk-on-sx.md b/plans/smalltalk-on-sx.md index 51412d67..481ef9b0 100644 --- a/plans/smalltalk-on-sx.md +++ b/plans/smalltalk-on-sx.md @@ -51,8 +51,9 @@ Core mapping: ### Phase 1 — tokenizer + parser - [x] Tokenizer: identifiers, keywords (`foo:`), binary selectors (`+`, `==`, `,`, `->`, `~=` etc.), numbers (radix `16r1F`; **scaled `1.5s2` deferred**), strings `'…''…'`, characters `$c`, symbols `#foo` `#'foo bar'` `#+`, byte arrays `#[1 2 3]` (open token), literal arrays `#(1 #foo 'x')` (open token), comments `"…"` -- [ ] Parser: chunk format (`! !` separators), class definitions (`Object subclass: #X instanceVariableNames: '…' classVariableNames: '…' …`), method definitions (`extend: #Foo with: 'bar ^self'`), pragmas ``, blocks `[:a :b | | t1 t2 | …]`, cascades, message precedence (unary > binary > keyword) -- [ ] Unit tests in `lib/smalltalk/tests/parse.sx` +- [x] Parser (expression level): blocks `[:a :b | | t1 t2 | …]`, cascades, message precedence (unary > binary > keyword), assignment, return, statement sequences, literal arrays, byte arrays, paren grouping, method headers (`+ other`, `at:put:`, unary, with temps and body). Class-definition keyword messages parse as ordinary keyword sends — no special-case needed. +- [ ] Parser (chunk-stream level): `! !` chunk separators driving a sequence of top-level expressions, pragmas `` inside method bodies +- [x] Unit tests in `lib/smalltalk/tests/parse.sx` ### Phase 2 — object model + sequential eval - [ ] Class table + bootstrap: `Object`, `Behavior`, `Class`, `Metaclass`, `UndefinedObject`, `Boolean`/`True`/`False`, `Number`/`Integer`/`Float`, `String`, `Symbol`, `Array`, `Block` @@ -107,6 +108,7 @@ Core mapping: _Newest first. Agent appends on every commit._ +- 2026-04-25: expression-level parser + 47 parse tests (`lib/smalltalk/parser.sx`, `lib/smalltalk/tests/parse.sx`). Full message precedence (unary > binary > keyword), cascades, blocks with params/temps, literal/byte arrays, assignment chain, method headers (unary/binary/keyword). Chunk-format `! !` driver deferred to a follow-up box. 110/110 tests pass. - 2026-04-25: tokenizer + 63 tests (`lib/smalltalk/tokenizer.sx`, `lib/smalltalk/tests/tokenize.sx`, `lib/smalltalk/test.sh`). All token types covered except scaled decimals `1.5s2` (deferred). `#(` and `#[` emit open tokens; literal-array contents lexed as ordinary tokens for the parser to interpret. ## Blockers