smalltalk: expression parser + 47 parse tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Has been cancelled
This commit is contained in:
627
lib/smalltalk/parser.sx
Normal file
627
lib/smalltalk/parser.sx
Normal file
@@ -0,0 +1,627 @@
|
||||
;; Smalltalk parser — produces an AST from the tokenizer's token stream.
|
||||
;;
|
||||
;; AST node shapes (dicts):
|
||||
;; {:type "lit-int" :value N} integer
|
||||
;; {:type "lit-float" :value F} float
|
||||
;; {:type "lit-string" :value S} string
|
||||
;; {:type "lit-char" :value C} character
|
||||
;; {:type "lit-symbol" :value S} symbol literal (#foo)
|
||||
;; {:type "lit-array" :elements (list ...)} literal array (#(1 2 #foo))
|
||||
;; {:type "lit-byte-array" :elements (...)} byte array (#[1 2 3])
|
||||
;; {:type "lit-nil" } / "lit-true" / "lit-false"
|
||||
;; {:type "ident" :name "x"} variable reference
|
||||
;; {:type "self"} / "super" / "thisContext" pseudo-variables
|
||||
;; {:type "assign" :name "x" :expr E} x := E
|
||||
;; {:type "return" :expr E} ^ E
|
||||
;; {:type "send" :receiver R :selector S :args (list ...)}
|
||||
;; {:type "cascade" :receiver R :messages (list {:selector :args} ...)}
|
||||
;; {:type "block" :params (list "a") :temps (list "t") :body (list expr)}
|
||||
;; {:type "seq" :exprs (list ...)} statement sequence
|
||||
;; {:type "method" :selector S :params (list ...) :temps (list ...) :body (list ...) :pragmas (list ...)}
|
||||
;;
|
||||
;; A "chunk" / class-definition stream is parsed at a higher level (deferred).
|
||||
|
||||
(define st-tok-type (fn (t) (if (= t nil) "eof" (get t :type))))
|
||||
|
||||
(define st-tok-value (fn (t) (if (= t nil) nil (get t :value))))
|
||||
|
||||
;; Parse a *single* Smalltalk expression from source.
|
||||
(define st-parse-expr (fn (src) (st-parse-with src "expr")))
|
||||
|
||||
;; Parse a sequence of statements separated by '.' Returns a {:type "seq"} node.
|
||||
(define st-parse (fn (src) (st-parse-with src "seq")))
|
||||
|
||||
;; Parse a method body — `selector params | temps | body`.
|
||||
;; Only the "method header + body" form (no chunk delimiters).
|
||||
(define st-parse-method (fn (src) (st-parse-with src "method")))
|
||||
|
||||
(define
|
||||
st-parse-with
|
||||
(fn
|
||||
(src mode)
|
||||
(let
|
||||
((tokens (st-tokenize src)) (idx 0) (tok-len 0))
|
||||
(begin
|
||||
(set! tok-len (len tokens))
|
||||
(define peek-tok (fn () (nth tokens idx)))
|
||||
(define
|
||||
peek-tok-at
|
||||
(fn (n) (if (< (+ idx n) tok-len) (nth tokens (+ idx n)) nil)))
|
||||
(define advance-tok! (fn () (set! idx (+ idx 1))))
|
||||
(define
|
||||
at?
|
||||
(fn
|
||||
(type value)
|
||||
(let
|
||||
((t (peek-tok)))
|
||||
(and
|
||||
(= (st-tok-type t) type)
|
||||
(or (= value nil) (= (st-tok-value t) value))))))
|
||||
(define at-type? (fn (type) (= (st-tok-type (peek-tok)) type)))
|
||||
(define
|
||||
consume!
|
||||
(fn
|
||||
(type value)
|
||||
(if
|
||||
(at? type value)
|
||||
(let ((t (peek-tok))) (begin (advance-tok!) t))
|
||||
(error
|
||||
(str
|
||||
"st-parse: expected "
|
||||
type
|
||||
(if (= value nil) "" (str " '" value "'"))
|
||||
" got "
|
||||
(st-tok-type (peek-tok))
|
||||
" '"
|
||||
(st-tok-value (peek-tok))
|
||||
"' at idx "
|
||||
idx)))))
|
||||
|
||||
;; ── Primary: atoms, paren'd expr, blocks, literal arrays, byte arrays.
|
||||
(define
|
||||
parse-primary
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((t (peek-tok)))
|
||||
(let
|
||||
((ty (st-tok-type t)) (v (st-tok-value t)))
|
||||
(cond
|
||||
((= ty "number")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((number? v) {:type (if (integer? v) "lit-int" "lit-float") :value v})
|
||||
(else {:type "lit-int" :value v}))))
|
||||
((= ty "string")
|
||||
(begin (advance-tok!) {:type "lit-string" :value v}))
|
||||
((= ty "char")
|
||||
(begin (advance-tok!) {:type "lit-char" :value v}))
|
||||
((= ty "symbol")
|
||||
(begin (advance-tok!) {:type "lit-symbol" :value v}))
|
||||
((= ty "array-open") (parse-literal-array))
|
||||
((= ty "byte-array-open") (parse-byte-array))
|
||||
((= ty "lparen")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((e (parse-expression)))
|
||||
(begin (consume! "rparen" nil) e))))
|
||||
((= ty "lbracket") (parse-block))
|
||||
((= ty "ident")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((= v "nil") {:type "lit-nil"})
|
||||
((= v "true") {:type "lit-true"})
|
||||
((= v "false") {:type "lit-false"})
|
||||
((= v "self") {:type "self"})
|
||||
((= v "super") {:type "super"})
|
||||
((= v "thisContext") {:type "thisContext"})
|
||||
(else {:type "ident" :name v}))))
|
||||
((= ty "binary")
|
||||
;; Negative numeric literal: '-' immediately before a number.
|
||||
(cond
|
||||
((and (= v "-") (= (st-tok-type (peek-tok-at 1)) "number"))
|
||||
(let
|
||||
((n (st-tok-value (peek-tok-at 1))))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((dict? n) {:type "lit-int" :value n})
|
||||
((integer? n) {:type "lit-int" :value (- 0 n)})
|
||||
(else {:type "lit-float" :value (- 0 n)})))))
|
||||
(else
|
||||
(error
|
||||
(str "st-parse: unexpected binary '" v "' at idx " idx)))))
|
||||
(else
|
||||
(error
|
||||
(str
|
||||
"st-parse: unexpected "
|
||||
ty
|
||||
" '"
|
||||
v
|
||||
"' at idx "
|
||||
idx))))))))
|
||||
|
||||
;; #(elem elem ...) — elements are atoms or nested parenthesised arrays.
|
||||
(define
|
||||
parse-literal-array
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((items (list)))
|
||||
(begin
|
||||
(consume! "array-open" nil)
|
||||
(define
|
||||
arr-loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at? "rparen" nil) (advance-tok!))
|
||||
(else
|
||||
(begin
|
||||
(append! items (parse-array-element))
|
||||
(arr-loop))))))
|
||||
(arr-loop)
|
||||
{:type "lit-array" :elements items}))))
|
||||
|
||||
;; #[1 2 3]
|
||||
(define
|
||||
parse-byte-array
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((items (list)))
|
||||
(begin
|
||||
(consume! "byte-array-open" nil)
|
||||
(define
|
||||
ba-loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at? "rbracket" nil) (advance-tok!))
|
||||
(else
|
||||
(let
|
||||
((t (peek-tok)))
|
||||
(cond
|
||||
((= (st-tok-type t) "number")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! items (st-tok-value t))
|
||||
(ba-loop)))
|
||||
(else
|
||||
(error
|
||||
(str
|
||||
"st-parse: byte array expects number, got "
|
||||
(st-tok-type t))))))))))
|
||||
(ba-loop)
|
||||
{:type "lit-byte-array" :elements items}))))
|
||||
|
||||
;; Inside a literal array: bare idents become symbols, nested (...) is a sub-array.
|
||||
(define
|
||||
parse-array-element
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((t (peek-tok)))
|
||||
(let
|
||||
((ty (st-tok-type t)) (v (st-tok-value t)))
|
||||
(cond
|
||||
((= ty "number") (begin (advance-tok!) {:type "lit-int" :value v}))
|
||||
((= ty "string") (begin (advance-tok!) {:type "lit-string" :value v}))
|
||||
((= ty "char") (begin (advance-tok!) {:type "lit-char" :value v}))
|
||||
((= ty "symbol") (begin (advance-tok!) {:type "lit-symbol" :value v}))
|
||||
((= ty "ident")
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(cond
|
||||
((= v "nil") {:type "lit-nil"})
|
||||
((= v "true") {:type "lit-true"})
|
||||
((= v "false") {:type "lit-false"})
|
||||
(else {:type "lit-symbol" :value v}))))
|
||||
((= ty "keyword") (begin (advance-tok!) {:type "lit-symbol" :value v}))
|
||||
((= ty "binary") (begin (advance-tok!) {:type "lit-symbol" :value v}))
|
||||
((= ty "lparen")
|
||||
(let ((items (list)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(define
|
||||
sub-loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at? "rparen" nil) (advance-tok!))
|
||||
(else
|
||||
(begin (append! items (parse-array-element)) (sub-loop))))))
|
||||
(sub-loop)
|
||||
{:type "lit-array" :elements items})))
|
||||
((= ty "array-open") (parse-literal-array))
|
||||
((= ty "byte-array-open") (parse-byte-array))
|
||||
(else
|
||||
(error
|
||||
(str "st-parse: bad literal-array element " ty " '" v "'"))))))))
|
||||
|
||||
;; [:a :b | | t1 t2 | body. body. ...]
|
||||
(define
|
||||
parse-block
|
||||
(fn
|
||||
()
|
||||
(begin
|
||||
(consume! "lbracket" nil)
|
||||
(let
|
||||
((params (list)) (temps (list)))
|
||||
(begin
|
||||
;; Block params
|
||||
(define
|
||||
p-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at? "colon" nil)
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((t (consume! "ident" nil)))
|
||||
(begin
|
||||
(append! params (st-tok-value t))
|
||||
(p-loop)))))))
|
||||
(p-loop)
|
||||
(when (> (len params) 0) (consume! "bar" nil))
|
||||
;; Block temps: | t1 t2 |
|
||||
(when
|
||||
(and
|
||||
(at? "bar" nil)
|
||||
;; Not `|` followed immediately by binary content — the only
|
||||
;; legitimate `|` inside a block here is the temp delimiter.
|
||||
true)
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(define
|
||||
t-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at? "ident" nil)
|
||||
(let
|
||||
((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! temps (st-tok-value t))
|
||||
(t-loop))))))
|
||||
(t-loop)
|
||||
(consume! "bar" nil)))
|
||||
;; Body: statements terminated by `.` or `]`
|
||||
(let
|
||||
((body (parse-statements "rbracket")))
|
||||
(begin
|
||||
(consume! "rbracket" nil)
|
||||
{:type "block" :params params :temps temps :body body})))))))
|
||||
|
||||
;; Parse statements up to a closing token (rbracket or eof). Returns list.
|
||||
(define
|
||||
parse-statements
|
||||
(fn
|
||||
(terminator)
|
||||
(let
|
||||
((stmts (list)))
|
||||
(begin
|
||||
(define
|
||||
s-loop
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at-type? terminator) nil)
|
||||
((at-type? "eof") nil)
|
||||
(else
|
||||
(begin
|
||||
(append! stmts (parse-statement))
|
||||
;; consume optional period(s)
|
||||
(define
|
||||
dot-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at? "period" nil)
|
||||
(begin (advance-tok!) (dot-loop)))))
|
||||
(dot-loop)
|
||||
(s-loop))))))
|
||||
(s-loop)
|
||||
stmts))))
|
||||
|
||||
;; Statement: ^expr | ident := expr | expr
|
||||
(define
|
||||
parse-statement
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at? "caret" nil)
|
||||
(begin
|
||||
(advance-tok!)
|
||||
{:type "return" :expr (parse-expression)}))
|
||||
((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign"))
|
||||
(let
|
||||
((name-tok (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(advance-tok!)
|
||||
{:type "assign"
|
||||
:name (st-tok-value name-tok)
|
||||
:expr (parse-expression)})))
|
||||
(else (parse-expression)))))
|
||||
|
||||
;; Top-level expression. Assignment (right-associative chain) sits at
|
||||
;; the top; cascade is below.
|
||||
(define
|
||||
parse-expression
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((and (at-type? "ident") (= (st-tok-type (peek-tok-at 1)) "assign"))
|
||||
(let
|
||||
((name-tok (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(advance-tok!)
|
||||
{:type "assign"
|
||||
:name (st-tok-value name-tok)
|
||||
:expr (parse-expression)})))
|
||||
(else (parse-cascade)))))
|
||||
|
||||
(define
|
||||
parse-cascade
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((head (parse-keyword-message)))
|
||||
(cond
|
||||
((at? "semi" nil)
|
||||
(let
|
||||
((receiver (cascade-receiver head))
|
||||
(first-msg (cascade-first-message head))
|
||||
(msgs (list)))
|
||||
(begin
|
||||
(append! msgs first-msg)
|
||||
(define
|
||||
c-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at? "semi" nil)
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! msgs (parse-cascade-message))
|
||||
(c-loop)))))
|
||||
(c-loop)
|
||||
{:type "cascade" :receiver receiver :messages msgs})))
|
||||
(else head)))))
|
||||
|
||||
;; Extract the receiver from a head send so cascades share it.
|
||||
(define
|
||||
cascade-receiver
|
||||
(fn
|
||||
(head)
|
||||
(cond
|
||||
((= (get head :type) "send") (get head :receiver))
|
||||
(else head))))
|
||||
|
||||
(define
|
||||
cascade-first-message
|
||||
(fn
|
||||
(head)
|
||||
(cond
|
||||
((= (get head :type) "send")
|
||||
{:selector (get head :selector) :args (get head :args)})
|
||||
(else
|
||||
;; Shouldn't happen — cascade requires at least one prior message.
|
||||
(error "st-parse: cascade with no prior message")))))
|
||||
|
||||
;; Subsequent cascade message (after the `;`): unary | binary | keyword
|
||||
(define
|
||||
parse-cascade-message
|
||||
(fn
|
||||
()
|
||||
(cond
|
||||
((at-type? "ident")
|
||||
(let ((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
{:selector (st-tok-value t) :args (list)})))
|
||||
((at-type? "binary")
|
||||
(let ((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((arg (parse-unary-message)))
|
||||
{:selector (st-tok-value t) :args (list arg)}))))
|
||||
((at-type? "keyword")
|
||||
(let
|
||||
((sel-parts (list)) (args (list)))
|
||||
(begin
|
||||
(define
|
||||
kw-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-type? "keyword")
|
||||
(let ((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! sel-parts (st-tok-value t))
|
||||
(append! args (parse-binary-message))
|
||||
(kw-loop))))))
|
||||
(kw-loop)
|
||||
{:selector (join "" sel-parts) :args args})))
|
||||
(else
|
||||
(error
|
||||
(str "st-parse: bad cascade message at idx " idx))))))
|
||||
|
||||
;; Keyword message: <binary> (kw <binary>)+
|
||||
(define
|
||||
parse-keyword-message
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((receiver (parse-binary-message)))
|
||||
(cond
|
||||
((at-type? "keyword")
|
||||
(let
|
||||
((sel-parts (list)) (args (list)))
|
||||
(begin
|
||||
(define
|
||||
kw-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-type? "keyword")
|
||||
(let ((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! sel-parts (st-tok-value t))
|
||||
(append! args (parse-binary-message))
|
||||
(kw-loop))))))
|
||||
(kw-loop)
|
||||
{:type "send"
|
||||
:receiver receiver
|
||||
:selector (join "" sel-parts)
|
||||
:args args})))
|
||||
(else receiver)))))
|
||||
|
||||
;; Binary message: <unary> (binop <unary>)*
|
||||
(define
|
||||
parse-binary-message
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((receiver (parse-unary-message)))
|
||||
(begin
|
||||
(define
|
||||
b-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-type? "binary")
|
||||
(let ((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(let
|
||||
((arg (parse-unary-message)))
|
||||
(set!
|
||||
receiver
|
||||
{:type "send"
|
||||
:receiver receiver
|
||||
:selector (st-tok-value t)
|
||||
:args (list arg)}))
|
||||
(b-loop))))))
|
||||
(b-loop)
|
||||
receiver))))
|
||||
|
||||
;; Unary message: <primary> ident* (ident NOT followed by ':')
|
||||
(define
|
||||
parse-unary-message
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((receiver (parse-primary)))
|
||||
(begin
|
||||
(define
|
||||
u-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(and
|
||||
(at-type? "ident")
|
||||
(let
|
||||
((nxt (peek-tok-at 1)))
|
||||
(not (= (st-tok-type nxt) "assign"))))
|
||||
(let ((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(set!
|
||||
receiver
|
||||
{:type "send"
|
||||
:receiver receiver
|
||||
:selector (st-tok-value t)
|
||||
:args (list)})
|
||||
(u-loop))))))
|
||||
(u-loop)
|
||||
receiver))))
|
||||
|
||||
;; Method header: unary | binary arg | (kw arg)+
|
||||
(define
|
||||
parse-method
|
||||
(fn
|
||||
()
|
||||
(let
|
||||
((sel "") (params (list)) (temps (list)) (body (list)))
|
||||
(begin
|
||||
(cond
|
||||
;; Unary header
|
||||
((at-type? "ident")
|
||||
(let ((t (peek-tok)))
|
||||
(begin (advance-tok!) (set! sel (st-tok-value t)))))
|
||||
;; Binary header: binop ident
|
||||
((at-type? "binary")
|
||||
(let ((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(set! sel (st-tok-value t))
|
||||
(let ((p (consume! "ident" nil)))
|
||||
(append! params (st-tok-value p))))))
|
||||
;; Keyword header: (kw ident)+
|
||||
((at-type? "keyword")
|
||||
(let ((sel-parts (list)))
|
||||
(begin
|
||||
(define
|
||||
kh-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-type? "keyword")
|
||||
(let ((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! sel-parts (st-tok-value t))
|
||||
(let ((p (consume! "ident" nil)))
|
||||
(append! params (st-tok-value p)))
|
||||
(kh-loop))))))
|
||||
(kh-loop)
|
||||
(set! sel (join "" sel-parts)))))
|
||||
(else
|
||||
(error
|
||||
(str
|
||||
"st-parse-method: expected selector header, got "
|
||||
(st-tok-type (peek-tok))))))
|
||||
;; Optional temps: | t1 t2 |
|
||||
(when
|
||||
(at? "bar" nil)
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(define
|
||||
th-loop
|
||||
(fn
|
||||
()
|
||||
(when
|
||||
(at-type? "ident")
|
||||
(let ((t (peek-tok)))
|
||||
(begin
|
||||
(advance-tok!)
|
||||
(append! temps (st-tok-value t))
|
||||
(th-loop))))))
|
||||
(th-loop)
|
||||
(consume! "bar" nil)))
|
||||
;; Body statements
|
||||
(set! body (parse-statements "eof"))
|
||||
{:type "method"
|
||||
:selector sel
|
||||
:params params
|
||||
:temps temps
|
||||
:body body}))))
|
||||
|
||||
;; Top-level program: statements separated by '.'
|
||||
(cond
|
||||
((= mode "expr") (parse-expression))
|
||||
((= mode "method") (parse-method))
|
||||
(else
|
||||
{:type "seq" :exprs (parse-statements "eof")}))))))
|
||||
@@ -31,6 +31,8 @@ for arg in "$@"; do
|
||||
done
|
||||
|
||||
if [ ${#FILES[@]} -eq 0 ]; then
|
||||
# tokenize.sx must load first — it defines the st-test helpers reused by
|
||||
# subsequent test files. Sort enforces this lexicographically.
|
||||
mapfile -t FILES < <(find lib/smalltalk/tests -maxdepth 2 -name '*.sx' | sort)
|
||||
fi
|
||||
|
||||
@@ -41,7 +43,8 @@ FAILED_FILES=()
|
||||
for FILE in "${FILES[@]}"; do
|
||||
[ -f "$FILE" ] || { echo "skip $FILE (not found)"; continue; }
|
||||
TMPFILE=$(mktemp)
|
||||
cat > "$TMPFILE" <<EPOCHS
|
||||
if [ "$(basename "$FILE")" = "tokenize.sx" ]; then
|
||||
cat > "$TMPFILE" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/smalltalk/tokenizer.sx")
|
||||
(epoch 2)
|
||||
@@ -49,14 +52,30 @@ for FILE in "${FILES[@]}"; do
|
||||
(epoch 3)
|
||||
(eval "(list st-test-pass st-test-fail)")
|
||||
EPOCHS
|
||||
else
|
||||
cat > "$TMPFILE" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/smalltalk/tokenizer.sx")
|
||||
(epoch 2)
|
||||
(load "lib/smalltalk/parser.sx")
|
||||
(epoch 3)
|
||||
(load "lib/smalltalk/tests/tokenize.sx")
|
||||
(epoch 4)
|
||||
(load "$FILE")
|
||||
(epoch 5)
|
||||
(eval "(list st-test-pass st-test-fail)")
|
||||
EPOCHS
|
||||
fi
|
||||
|
||||
OUTPUT=$(timeout 60 "$SX_SERVER" < "$TMPFILE" 2>&1 || true)
|
||||
rm -f "$TMPFILE"
|
||||
|
||||
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len 3 / {getline; print; exit}')
|
||||
# Final epoch's value: either (ok N (P F)) on one line or
|
||||
# (ok-len N M)\n(P F) where the value is on the following line.
|
||||
LINE=$(echo "$OUTPUT" | awk '/^\(ok-len [0-9]+ / {getline; print}' | tail -1)
|
||||
if [ -z "$LINE" ]; then
|
||||
LINE=$(echo "$OUTPUT" | grep -E '^\(ok 3 \([0-9]+ [0-9]+\)\)' | tail -1 \
|
||||
| sed -E 's/^\(ok 3 //; s/\)$//')
|
||||
LINE=$(echo "$OUTPUT" | grep -E '^\(ok [0-9]+ \([0-9]+ [0-9]+\)\)' | tail -1 \
|
||||
| sed -E 's/^\(ok [0-9]+ //; s/\)$//')
|
||||
fi
|
||||
if [ -z "$LINE" ]; then
|
||||
echo "X $FILE: could not extract summary"
|
||||
@@ -73,7 +92,8 @@ EPOCHS
|
||||
FAILED_FILES+=("$FILE")
|
||||
printf 'X %-40s %d/%d\n' "$FILE" "$P" "$((P+F))"
|
||||
TMPFILE2=$(mktemp)
|
||||
cat > "$TMPFILE2" <<EPOCHS
|
||||
if [ "$(basename "$FILE")" = "tokenize.sx" ]; then
|
||||
cat > "$TMPFILE2" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/smalltalk/tokenizer.sx")
|
||||
(epoch 2)
|
||||
@@ -81,7 +101,21 @@ EPOCHS
|
||||
(epoch 3)
|
||||
(eval "(map (fn (f) (get f :name)) st-test-fails)")
|
||||
EPOCHS
|
||||
FAILS=$(timeout 60 "$SX_SERVER" < "$TMPFILE2" 2>&1 | grep -E '^\(ok 3 ' || true)
|
||||
else
|
||||
cat > "$TMPFILE2" <<EPOCHS
|
||||
(epoch 1)
|
||||
(load "lib/smalltalk/tokenizer.sx")
|
||||
(epoch 2)
|
||||
(load "lib/smalltalk/parser.sx")
|
||||
(epoch 3)
|
||||
(load "lib/smalltalk/tests/tokenize.sx")
|
||||
(epoch 4)
|
||||
(load "$FILE")
|
||||
(epoch 5)
|
||||
(eval "(map (fn (f) (get f :name)) st-test-fails)")
|
||||
EPOCHS
|
||||
fi
|
||||
FAILS=$(timeout 60 "$SX_SERVER" < "$TMPFILE2" 2>&1 | grep -E '^\(ok [0-9]+ \(' | tail -1 || true)
|
||||
rm -f "$TMPFILE2"
|
||||
echo " $FAILS"
|
||||
elif [ "$VERBOSE" = "1" ]; then
|
||||
|
||||
365
lib/smalltalk/tests/parse.sx
Normal file
365
lib/smalltalk/tests/parse.sx
Normal file
@@ -0,0 +1,365 @@
|
||||
;; Smalltalk parser tests.
|
||||
;;
|
||||
;; Reuses helpers (st-test, st-deep=?) from tokenize.sx. Counters reset
|
||||
;; here so this file's summary covers parse tests only.
|
||||
|
||||
(set! st-test-pass 0)
|
||||
(set! st-test-fail 0)
|
||||
(set! st-test-fails (list))
|
||||
|
||||
;; ── 1. Atoms ──
|
||||
(st-test "int" (st-parse-expr "42") {:type "lit-int" :value 42})
|
||||
(st-test "float" (st-parse-expr "3.14") {:type "lit-float" :value 3.14})
|
||||
(st-test "string" (st-parse-expr "'hi'") {:type "lit-string" :value "hi"})
|
||||
(st-test "char" (st-parse-expr "$x") {:type "lit-char" :value "x"})
|
||||
(st-test "symbol" (st-parse-expr "#foo") {:type "lit-symbol" :value "foo"})
|
||||
(st-test "binary symbol" (st-parse-expr "#+") {:type "lit-symbol" :value "+"})
|
||||
(st-test "keyword symbol" (st-parse-expr "#at:put:") {:type "lit-symbol" :value "at:put:"})
|
||||
(st-test "nil" (st-parse-expr "nil") {:type "lit-nil"})
|
||||
(st-test "true" (st-parse-expr "true") {:type "lit-true"})
|
||||
(st-test "false" (st-parse-expr "false") {:type "lit-false"})
|
||||
(st-test "self" (st-parse-expr "self") {:type "self"})
|
||||
(st-test "super" (st-parse-expr "super") {:type "super"})
|
||||
(st-test "ident" (st-parse-expr "x") {:type "ident" :name "x"})
|
||||
(st-test "negative int" (st-parse-expr "-3") {:type "lit-int" :value -3})
|
||||
|
||||
;; ── 2. Literal arrays ──
|
||||
(st-test
|
||||
"literal array of ints"
|
||||
(st-parse-expr "#(1 2 3)")
|
||||
{:type "lit-array"
|
||||
:elements (list
|
||||
{:type "lit-int" :value 1}
|
||||
{:type "lit-int" :value 2}
|
||||
{:type "lit-int" :value 3})})
|
||||
|
||||
(st-test
|
||||
"literal array mixed"
|
||||
(st-parse-expr "#(1 #foo 'x' true)")
|
||||
{:type "lit-array"
|
||||
:elements (list
|
||||
{:type "lit-int" :value 1}
|
||||
{:type "lit-symbol" :value "foo"}
|
||||
{:type "lit-string" :value "x"}
|
||||
{:type "lit-true"})})
|
||||
|
||||
(st-test
|
||||
"literal array bare ident is symbol"
|
||||
(st-parse-expr "#(foo bar)")
|
||||
{:type "lit-array"
|
||||
:elements (list
|
||||
{:type "lit-symbol" :value "foo"}
|
||||
{:type "lit-symbol" :value "bar"})})
|
||||
|
||||
(st-test
|
||||
"nested literal array"
|
||||
(st-parse-expr "#(1 (2 3) 4)")
|
||||
{:type "lit-array"
|
||||
:elements (list
|
||||
{:type "lit-int" :value 1}
|
||||
{:type "lit-array"
|
||||
:elements (list
|
||||
{:type "lit-int" :value 2}
|
||||
{:type "lit-int" :value 3})}
|
||||
{:type "lit-int" :value 4})})
|
||||
|
||||
(st-test
|
||||
"byte array"
|
||||
(st-parse-expr "#[1 2 3]")
|
||||
{:type "lit-byte-array" :elements (list 1 2 3)})
|
||||
|
||||
;; ── 3. Unary messages ──
|
||||
(st-test
|
||||
"unary single"
|
||||
(st-parse-expr "x foo")
|
||||
{:type "send"
|
||||
:receiver {:type "ident" :name "x"}
|
||||
:selector "foo"
|
||||
:args (list)})
|
||||
|
||||
(st-test
|
||||
"unary chain"
|
||||
(st-parse-expr "x foo bar baz")
|
||||
{:type "send"
|
||||
:receiver {:type "send"
|
||||
:receiver {:type "send"
|
||||
:receiver {:type "ident" :name "x"}
|
||||
:selector "foo"
|
||||
:args (list)}
|
||||
:selector "bar"
|
||||
:args (list)}
|
||||
:selector "baz"
|
||||
:args (list)})
|
||||
|
||||
(st-test
|
||||
"unary on literal"
|
||||
(st-parse-expr "42 printNl")
|
||||
{:type "send"
|
||||
:receiver {:type "lit-int" :value 42}
|
||||
:selector "printNl"
|
||||
:args (list)})
|
||||
|
||||
;; ── 4. Binary messages ──
|
||||
(st-test
|
||||
"binary single"
|
||||
(st-parse-expr "1 + 2")
|
||||
{:type "send"
|
||||
:receiver {:type "lit-int" :value 1}
|
||||
:selector "+"
|
||||
:args (list {:type "lit-int" :value 2})})
|
||||
|
||||
(st-test
|
||||
"binary left-assoc"
|
||||
(st-parse-expr "1 + 2 + 3")
|
||||
{:type "send"
|
||||
:receiver {:type "send"
|
||||
:receiver {:type "lit-int" :value 1}
|
||||
:selector "+"
|
||||
:args (list {:type "lit-int" :value 2})}
|
||||
:selector "+"
|
||||
:args (list {:type "lit-int" :value 3})})
|
||||
|
||||
(st-test
|
||||
"binary same precedence l-to-r"
|
||||
(st-parse-expr "1 + 2 * 3")
|
||||
{:type "send"
|
||||
:receiver {:type "send"
|
||||
:receiver {:type "lit-int" :value 1}
|
||||
:selector "+"
|
||||
:args (list {:type "lit-int" :value 2})}
|
||||
:selector "*"
|
||||
:args (list {:type "lit-int" :value 3})})
|
||||
|
||||
;; ── 5. Precedence: unary binds tighter than binary ──
|
||||
(st-test
|
||||
"unary tighter than binary"
|
||||
(st-parse-expr "3 + 4 factorial")
|
||||
{:type "send"
|
||||
:receiver {:type "lit-int" :value 3}
|
||||
:selector "+"
|
||||
:args (list
|
||||
{:type "send"
|
||||
:receiver {:type "lit-int" :value 4}
|
||||
:selector "factorial"
|
||||
:args (list)})})
|
||||
|
||||
;; ── 6. Keyword messages ──
|
||||
(st-test
|
||||
"keyword single"
|
||||
(st-parse-expr "x at: 1")
|
||||
{:type "send"
|
||||
:receiver {:type "ident" :name "x"}
|
||||
:selector "at:"
|
||||
:args (list {:type "lit-int" :value 1})})
|
||||
|
||||
(st-test
|
||||
"keyword chain"
|
||||
(st-parse-expr "x at: 1 put: 'a'")
|
||||
{:type "send"
|
||||
:receiver {:type "ident" :name "x"}
|
||||
:selector "at:put:"
|
||||
:args (list {:type "lit-int" :value 1} {:type "lit-string" :value "a"})})
|
||||
|
||||
;; ── 7. Precedence: binary tighter than keyword ──
|
||||
(st-test
|
||||
"binary tighter than keyword"
|
||||
(st-parse-expr "x at: 1 + 2")
|
||||
{:type "send"
|
||||
:receiver {:type "ident" :name "x"}
|
||||
:selector "at:"
|
||||
:args (list
|
||||
{:type "send"
|
||||
:receiver {:type "lit-int" :value 1}
|
||||
:selector "+"
|
||||
:args (list {:type "lit-int" :value 2})})})
|
||||
|
||||
(st-test
|
||||
"keyword absorbs trailing unary"
|
||||
(st-parse-expr "a foo: b bar")
|
||||
{:type "send"
|
||||
:receiver {:type "ident" :name "a"}
|
||||
:selector "foo:"
|
||||
:args (list
|
||||
{:type "send"
|
||||
:receiver {:type "ident" :name "b"}
|
||||
:selector "bar"
|
||||
:args (list)})})
|
||||
|
||||
;; ── 8. Parens override precedence ──
|
||||
(st-test
|
||||
"paren forces grouping"
|
||||
(st-parse-expr "(1 + 2) * 3")
|
||||
{:type "send"
|
||||
:receiver {:type "send"
|
||||
:receiver {:type "lit-int" :value 1}
|
||||
:selector "+"
|
||||
:args (list {:type "lit-int" :value 2})}
|
||||
:selector "*"
|
||||
:args (list {:type "lit-int" :value 3})})
|
||||
|
||||
;; ── 9. Cascade ──
|
||||
(st-test
|
||||
"simple cascade"
|
||||
(st-parse-expr "x m1; m2")
|
||||
{:type "cascade"
|
||||
:receiver {:type "ident" :name "x"}
|
||||
:messages (list
|
||||
{:selector "m1" :args (list)}
|
||||
{:selector "m2" :args (list)})})
|
||||
|
||||
(st-test
|
||||
"cascade with binary and keyword"
|
||||
(st-parse-expr "Stream new nl; tab; print: 1")
|
||||
{:type "cascade"
|
||||
:receiver {:type "send"
|
||||
:receiver {:type "ident" :name "Stream"}
|
||||
:selector "new"
|
||||
:args (list)}
|
||||
:messages (list
|
||||
{:selector "nl" :args (list)}
|
||||
{:selector "tab" :args (list)}
|
||||
{:selector "print:" :args (list {:type "lit-int" :value 1})})})
|
||||
|
||||
;; ── 10. Blocks ──
|
||||
(st-test
|
||||
"empty block"
|
||||
(st-parse-expr "[]")
|
||||
{:type "block" :params (list) :temps (list) :body (list)})
|
||||
|
||||
(st-test
|
||||
"block one expr"
|
||||
(st-parse-expr "[1 + 2]")
|
||||
{:type "block"
|
||||
:params (list)
|
||||
:temps (list)
|
||||
:body (list
|
||||
{:type "send"
|
||||
:receiver {:type "lit-int" :value 1}
|
||||
:selector "+"
|
||||
:args (list {:type "lit-int" :value 2})})})
|
||||
|
||||
(st-test
|
||||
"block with params"
|
||||
(st-parse-expr "[:a :b | a + b]")
|
||||
{:type "block"
|
||||
:params (list "a" "b")
|
||||
:temps (list)
|
||||
:body (list
|
||||
{:type "send"
|
||||
:receiver {:type "ident" :name "a"}
|
||||
:selector "+"
|
||||
:args (list {:type "ident" :name "b"})})})
|
||||
|
||||
(st-test
|
||||
"block with temps"
|
||||
(st-parse-expr "[| t | t := 1. t]")
|
||||
{:type "block"
|
||||
:params (list)
|
||||
:temps (list "t")
|
||||
:body (list
|
||||
{:type "assign" :name "t" :expr {:type "lit-int" :value 1}}
|
||||
{:type "ident" :name "t"})})
|
||||
|
||||
(st-test
|
||||
"block with params and temps"
|
||||
(st-parse-expr "[:x | | t | t := x + 1. t]")
|
||||
{:type "block"
|
||||
:params (list "x")
|
||||
:temps (list "t")
|
||||
:body (list
|
||||
{:type "assign"
|
||||
:name "t"
|
||||
:expr {:type "send"
|
||||
:receiver {:type "ident" :name "x"}
|
||||
:selector "+"
|
||||
:args (list {:type "lit-int" :value 1})}}
|
||||
{:type "ident" :name "t"})})
|
||||
|
||||
;; ── 11. Assignment / return / statements ──
|
||||
(st-test
|
||||
"assignment"
|
||||
(st-parse-expr "x := 1")
|
||||
{:type "assign" :name "x" :expr {:type "lit-int" :value 1}})
|
||||
|
||||
(st-test
|
||||
"return"
|
||||
(st-parse-expr "1")
|
||||
{:type "lit-int" :value 1})
|
||||
|
||||
(st-test
|
||||
"return statement at top level"
|
||||
(st-parse "^ 1")
|
||||
{:type "seq"
|
||||
:exprs (list {:type "return" :expr {:type "lit-int" :value 1}})})
|
||||
|
||||
(st-test
|
||||
"two statements"
|
||||
(st-parse "x := 1. y := 2")
|
||||
{:type "seq"
|
||||
:exprs (list
|
||||
{:type "assign" :name "x" :expr {:type "lit-int" :value 1}}
|
||||
{:type "assign" :name "y" :expr {:type "lit-int" :value 2}})})
|
||||
|
||||
(st-test
|
||||
"trailing dot allowed"
|
||||
(st-parse "1. 2.")
|
||||
{:type "seq"
|
||||
:exprs (list {:type "lit-int" :value 1} {:type "lit-int" :value 2})})
|
||||
|
||||
;; ── 12. Method headers ──
|
||||
(st-test
|
||||
"unary method"
|
||||
(st-parse-method "factorial ^ self * (self - 1) factorial")
|
||||
{:type "method"
|
||||
:selector "factorial"
|
||||
:params (list)
|
||||
:temps (list)
|
||||
:body (list
|
||||
{:type "return"
|
||||
:expr {:type "send"
|
||||
:receiver {:type "self"}
|
||||
:selector "*"
|
||||
:args (list
|
||||
{:type "send"
|
||||
:receiver {:type "send"
|
||||
:receiver {:type "self"}
|
||||
:selector "-"
|
||||
:args (list {:type "lit-int" :value 1})}
|
||||
:selector "factorial"
|
||||
:args (list)})}})})
|
||||
|
||||
(st-test
|
||||
"binary method"
|
||||
(st-parse-method "+ other ^ 'plus'")
|
||||
{:type "method"
|
||||
:selector "+"
|
||||
:params (list "other")
|
||||
:temps (list)
|
||||
:body (list {:type "return" :expr {:type "lit-string" :value "plus"}})})
|
||||
|
||||
(st-test
|
||||
"keyword method"
|
||||
(st-parse-method "at: i put: v ^ v")
|
||||
{:type "method"
|
||||
:selector "at:put:"
|
||||
:params (list "i" "v")
|
||||
:temps (list)
|
||||
:body (list {:type "return" :expr {:type "ident" :name "v"}})})
|
||||
|
||||
(st-test
|
||||
"method with temps"
|
||||
(st-parse-method "twice: x | t | t := x + x. ^ t")
|
||||
{:type "method"
|
||||
:selector "twice:"
|
||||
:params (list "x")
|
||||
:temps (list "t")
|
||||
:body (list
|
||||
{:type "assign"
|
||||
:name "t"
|
||||
:expr {:type "send"
|
||||
:receiver {:type "ident" :name "x"}
|
||||
:selector "+"
|
||||
:args (list {:type "ident" :name "x"})}}
|
||||
{:type "return" :expr {:type "ident" :name "t"}})})
|
||||
|
||||
(list st-test-pass st-test-fail)
|
||||
@@ -51,8 +51,9 @@ Core mapping:
|
||||
|
||||
### Phase 1 — tokenizer + parser
|
||||
- [x] Tokenizer: identifiers, keywords (`foo:`), binary selectors (`+`, `==`, `,`, `->`, `~=` etc.), numbers (radix `16r1F`; **scaled `1.5s2` deferred**), strings `'…''…'`, characters `$c`, symbols `#foo` `#'foo bar'` `#+`, byte arrays `#[1 2 3]` (open token), literal arrays `#(1 #foo 'x')` (open token), comments `"…"`
|
||||
- [ ] Parser: chunk format (`! !` separators), class definitions (`Object subclass: #X instanceVariableNames: '…' classVariableNames: '…' …`), method definitions (`extend: #Foo with: 'bar ^self'`), pragmas `<primitive: 1>`, blocks `[:a :b | | t1 t2 | …]`, cascades, message precedence (unary > binary > keyword)
|
||||
- [ ] Unit tests in `lib/smalltalk/tests/parse.sx`
|
||||
- [x] Parser (expression level): blocks `[:a :b | | t1 t2 | …]`, cascades, message precedence (unary > binary > keyword), assignment, return, statement sequences, literal arrays, byte arrays, paren grouping, method headers (`+ other`, `at:put:`, unary, with temps and body). Class-definition keyword messages parse as ordinary keyword sends — no special-case needed.
|
||||
- [ ] Parser (chunk-stream level): `! !` chunk separators driving a sequence of top-level expressions, pragmas `<primitive: 1>` inside method bodies
|
||||
- [x] Unit tests in `lib/smalltalk/tests/parse.sx`
|
||||
|
||||
### Phase 2 — object model + sequential eval
|
||||
- [ ] Class table + bootstrap: `Object`, `Behavior`, `Class`, `Metaclass`, `UndefinedObject`, `Boolean`/`True`/`False`, `Number`/`Integer`/`Float`, `String`, `Symbol`, `Array`, `Block`
|
||||
@@ -107,6 +108,7 @@ Core mapping:
|
||||
|
||||
_Newest first. Agent appends on every commit._
|
||||
|
||||
- 2026-04-25: expression-level parser + 47 parse tests (`lib/smalltalk/parser.sx`, `lib/smalltalk/tests/parse.sx`). Full message precedence (unary > binary > keyword), cascades, blocks with params/temps, literal/byte arrays, assignment chain, method headers (unary/binary/keyword). Chunk-format `! !` driver deferred to a follow-up box. 110/110 tests pass.
|
||||
- 2026-04-25: tokenizer + 63 tests (`lib/smalltalk/tokenizer.sx`, `lib/smalltalk/tests/tokenize.sx`, `lib/smalltalk/test.sh`). All token types covered except scaled decimals `1.5s2` (deferred). `#(` and `#[` emit open tokens; literal-array contents lexed as ordinary tokens for the parser to interpret.
|
||||
|
||||
## Blockers
|
||||
|
||||
Reference in New Issue
Block a user