From df62c02a21b1a3d38afd3b5dde3094fa9f082b8a Mon Sep 17 00:00:00 2001
From: giles <giles.bradshaw@rose-ash.com>
Date: Sat, 25 Apr 2026 15:30:36 +0000
Subject: [PATCH] smalltalk: per-call-site inline cache + 10 IC tests

---
 lib/smalltalk/eval.sx               | 47 ++++++++++++++++-
 lib/smalltalk/runtime.sx            | 11 ++++
 lib/smalltalk/tests/inline_cache.sx | 78 +++++++++++++++++++++++++++++
 plans/smalltalk-on-sx.md            |  3 +-
 4 files changed, 137 insertions(+), 2 deletions(-)
 create mode 100644 lib/smalltalk/tests/inline_cache.sx

diff --git a/lib/smalltalk/eval.sx b/lib/smalltalk/eval.sx
index 7b3f32c2..a25f6b14 100644
--- a/lib/smalltalk/eval.sx
+++ b/lib/smalltalk/eval.sx
@@ -212,6 +212,23 @@
           exprs)
         result))))
 
+;; Per-call-site monomorphic inline cache: each `send` AST node stores
+;; the receiver class and method record from the last dispatch. When the
+;; next dispatch sees the same class AND the runtime's IC generation
+;; hasn't changed, we skip the global method-lookup. Mutations to the
+;; class table bump `st-ic-generation` (defined in runtime.sx) so stale
+;; method records can't fire.
+(define st-ic-hits 0)
+(define st-ic-misses 0)
+
+(define
+  st-ic-reset-stats!
+  (fn () (begin (set! st-ic-hits 0) (set! st-ic-misses 0))))
+
+(define
+  st-ic-stats
+  (fn () {:hits st-ic-hits :misses st-ic-misses :gen st-ic-generation}))
+
 (define
   st-eval-send
   (fn
@@ -223,7 +240,35 @@
       (cond
         (super?
           (st-super-send (get frame :self) selector args (get frame :method-class)))
-        (else (st-send receiver selector args))))))
+        (else
+          (let ((cls (st-class-of-for-send receiver)))
+            (cond
+              ;; Inline-cache hit: same receiver class, same generation.
+              ((and (has-key? ast :ic-class)
+                    (= (get ast :ic-class) cls)
+                    (has-key? ast :ic-gen)
+                    (= (get ast :ic-gen) st-ic-generation)
+                    (has-key? ast :ic-method))
+                (begin
+                  (set! st-ic-hits (+ st-ic-hits 1))
+                  (st-invoke (get ast :ic-method) receiver args)))
+              (else
+                (begin
+                  (set! st-ic-misses (+ st-ic-misses 1))
+                  (let
+                    ((class-side? (st-class-ref? receiver))
+                      (recv-class (if (st-class-ref? receiver)
+                                      (get receiver :name)
+                                      cls)))
+                    (let ((method (st-method-lookup recv-class selector class-side?)))
+                      (cond
+                        ((not (= method nil))
+                          (begin
+                            (dict-set! ast :ic-class cls)
+                            (dict-set! ast :ic-method method)
+                            (dict-set! ast :ic-gen st-ic-generation)
+                            (st-invoke method receiver args)))
+                        (else (st-send receiver selector args))))))))))))))
 
 (define
   st-eval-cascade
diff --git a/lib/smalltalk/runtime.sx b/lib/smalltalk/runtime.sx
index 1aeb774f..19198f22 100644
--- a/lib/smalltalk/runtime.sx
+++ b/lib/smalltalk/runtime.sx
@@ -29,6 +29,14 @@
   st-method-cache-clear!
   (fn () (set! st-method-cache {})))
 
+;; Inline-cache generation. Eval-time IC slots check this; bumping it
+;; invalidates every cached call-site method record across the program.
+(define st-ic-generation 0)
+
+(define
+  st-ic-bump-generation!
+  (fn () (set! st-ic-generation (+ st-ic-generation 1))))
+
 (define
   st-method-cache-key
   (fn (cls sel class-side?) (str cls "|" sel "|" (if class-side? "c" "i"))))
@@ -154,6 +162,7 @@
                     :methods
                     (assoc (get cls :methods) selector m))))
               (st-method-cache-clear!)
+              (st-ic-bump-generation!)
               selector)))))))
 
 (define
@@ -178,6 +187,7 @@
                     :class-methods
                     (assoc (get cls :class-methods) selector m))))
               (st-method-cache-clear!)
+              (st-ic-bump-generation!)
               selector)))))))
 
 ;; Remove a method from a class (instance side). Mostly for tests; runtime
@@ -208,6 +218,7 @@
                         cls-name
                         (assoc cls :methods new-md)))
                     (st-method-cache-clear!)
+                    (st-ic-bump-generation!)
                     true))))))))))
 
 ;; Walk-only lookup. Returns the method record (with :defining-class) or nil.
diff --git a/lib/smalltalk/tests/inline_cache.sx b/lib/smalltalk/tests/inline_cache.sx
new file mode 100644
index 00000000..77b2de17
--- /dev/null
+++ b/lib/smalltalk/tests/inline_cache.sx
@@ -0,0 +1,78 @@
+;; Inline-cache tests — verify the per-call-site IC slot fires on hot
+;; sends and is invalidated by class-table mutations.
+
+(set! st-test-pass 0)
+(set! st-test-fail 0)
+(set! st-test-fails (list))
+
+(st-bootstrap-classes!)
+(define ev (fn (src) (smalltalk-eval src)))
+(define evp (fn (src) (smalltalk-eval-program src)))
+
+;; ── 1. Counters exist ──
+(st-test "stats has :hits"   (has-key? (st-ic-stats) :hits) true)
+(st-test "stats has :misses" (has-key? (st-ic-stats) :misses) true)
+(st-test "stats has :gen"    (has-key? (st-ic-stats) :gen) true)
+
+;; ── 2. Repeated send to user method hits the IC ──
+(st-class-define! "Pinger" "Object" (list))
+(st-class-add-method! "Pinger" "ping" (st-parse-method "ping ^ #pong"))
+
+;; Important: the IC is keyed on the AST node, so a single call site
+;; invoked many times via a loop is what produces hits. Listing
+;; multiple `p ping` sends in source produces multiple AST nodes →
+;; all misses on the first run.
+(st-ic-reset-stats!)
+(evp "| p | p := Pinger new.
+      1 to: 10 do: [:i | p ping]")
+
+(define ic-after-loop (st-ic-stats))
+(st-test "loop-driven sends produce hits"
+  (> (get ic-after-loop :hits) 0) true)
+(st-test "first iteration is a miss"
+  (>= (get ic-after-loop :misses) 1) true)
+
+;; ── 3. Different receiver class causes a miss ──
+(st-class-define! "Cooer" "Object" (list))
+(st-class-add-method! "Cooer" "ping" (st-parse-method "ping ^ #coo"))
+
+(st-ic-reset-stats!)
+(evp "| p c |
+      p := Pinger new.
+      c := Cooer new.
+      ^ {p ping. c ping. p ping. c ping}")
+;; First p ping → miss. c ping with same call site → miss (class changed).
+;; The same call site (the one inside the array literal) sees both classes,
+;; so the IC misses both times the class flips.
+(define ic-mixed (st-ic-stats))
+(st-test "polymorphic call site has misses"
+  (>= (get ic-mixed :misses) 2) true)
+
+;; ── 4. Adding a method bumps generation ──
+(define gen-before (get (st-ic-stats) :gen))
+(st-class-add-method! "Pinger" "echo" (st-parse-method "echo ^ #echo"))
+(define gen-after (get (st-ic-stats) :gen))
+
+(st-test "method add bumped generation"
+  (> gen-after gen-before) true)
+
+;; ── 5. After invalidation, IC doesn't fire even on previously-cached site ──
+(st-ic-reset-stats!)
+(evp "| p | p := Pinger new. ^ p ping")  ;; warm
+(evp "| p | p := Pinger new. ^ p ping")  ;; should hit
+(st-class-add-method! "Pinger" "ping" (st-parse-method "ping ^ #newPong"))
+(evp "| p | p := Pinger new. ^ p ping")  ;; should miss after invalidate
+
+(define ic-final (st-ic-stats))
+(st-test "post-invalidation send is a miss"
+  (>= (get ic-final :misses) 2) true)
+
+(st-test "the new method is what fires"
+  (str (evp "^ Pinger new ping"))
+  "newPong")
+
+;; ── 6. Default IC generation starts at >= 0 ──
+(st-test "generation is non-negative"
+  (>= (get (st-ic-stats) :gen) 0) true)
+
+(list st-test-pass st-test-fail)
diff --git a/plans/smalltalk-on-sx.md b/plans/smalltalk-on-sx.md
index f97e4792..dfa7701a 100644
--- a/plans/smalltalk-on-sx.md
+++ b/plans/smalltalk-on-sx.md
@@ -100,7 +100,7 @@ Core mapping:
 - [x] Stretch: ANSI Smalltalk validator subset (`lib/smalltalk/tests/ansi.sx`). 62 tests organised by ANSI X3J20 §6.10 Object, §6.11 Boolean, §6.12 Number, §6.13 Integer, §6.16 Symbol, §6.17 String, §6.18 Array, §6.19 BlockContext. Each test runs through SUnit and emits one st-test row, mirroring the Pharo-slice harness.
 
 ### Phase 7 — speed (optional)
-- [ ] Method-dictionary inline caching (already in CEK as a primitive; just wire selector cache)
+- [x] Method-dictionary inline caching. Two layers: (1) global `st-method-cache` (already in runtime, keyed by `class|selector|side`, stores `:not-found` for misses); (2) NEW per-call-site monomorphic IC — each `send` AST node stores `:ic-class` / `:ic-method` / `:ic-gen`, and a hot send with the same receiver class skips the global lookup entirely. `st-ic-generation` (in runtime.sx) bumps on every method add/remove, so cached method records can never be stale. `st-ic-stats` / `st-ic-reset-stats!` for tests + later debugging. 10 dedicated IC tests in `lib/smalltalk/tests/inline_cache.sx`.
 - [ ] Block intrinsification beyond `whileTrue:` / `ifTrue:`
 - [ ] Compare against GNU Smalltalk on the corpus
 
@@ -108,6 +108,7 @@ Core mapping:
 
 _Newest first. Agent appends on every commit._
 
+- 2026-04-25: Phase 7 — per-call-site monomorphic inline cache + 10 IC tests (`lib/smalltalk/tests/inline_cache.sx`). `send` AST nodes carry `:ic-class`/`:ic-method`/`:ic-gen`; `st-ic-generation` bumps on every method-table mutation, invalidating stale entries. 823/823 total.
 - 2026-04-25: ANSI X3J20 validator subset + 62 tests (`lib/smalltalk/tests/ansi.sx`). One TestCase subclass per ANSI §6.x protocol; runs through SUnit. **Phase 6 complete.** 813/813 total.
 - 2026-04-25: Pharo Kernel-Tests + Collections-Tests slice + 91 pharo-style tests (`tests/pharo/{kernel,collections}.st` + `tests/pharo.sx`). Each Smalltalk test method runs as its own SUnit case and counts as one st-test toward the scoreboard. 751/751 total — past the Phase 6 "200+ green tests" target.
 - 2026-04-25: SUnit port (`lib/smalltalk/sunit.sx`, `lib/smalltalk/tests/sunit.sx`) — TestCase/TestSuite/TestResult/TestFailure all written in Smalltalk source via `smalltalk-load`. Full assert family + should:raise: + setUp/tearDown threading. 19 tests verify the framework. test.sh now bootstraps SUnit alongside runtime/eval. 660/660 total.