Files
rose-ash/lib/smalltalk/tests/tokenize.sx
giles 952ff2289c vm-ext: enable JIT in epoch serving mode (Smalltalk 847/847, Datalog 356/356)
register_jit_hook is now installed in the persistent (epoch) serving-mode
branch of sx_server.ml, not just --http/cli/site. Smalltalk-on-SX conformance
under JIT is 847/847 — identical to the no-JIT baseline; Datalog 356/356.
run_tests --jit/no-jit are byte-identical before/after (no regression).

Five distinct root causes fixed (not one "miscompile"):

1. Serving mode never loaded lib/compiler.sx, so JIT used the native
   Sx_compiler.compile stub (arity-0 bytecode, params as GLOBAL_GET →
   "VM undefined: <param>"). Server-mode branch now loads compiler.sx
   before registering the hook, matching http/cli/site.

2. compile-cond / compile-case-clauses / compile-guard-clauses only treated
   keyword :else and true as the catch-all, not the bare symbol `else` that
   the CEK's is-else-clause? accepts → GLOBAL_GET "else". (lib/compiler.sx)

3. OP_DIV produced a float for non-divisible Integer/Integer (1/2 → 0.5)
   instead of the exact Rational the "/" primitive returns. Now delegates to
   the primitive, matching CEK. (sx_vm.ml)

4. OP_EQ / _fast_eq lacked Rational/ListRef cases that the "=" primitive's
   safe_eq has → (= 1/2 1/2) false under JIT. OP_EQ now delegates non-scalars
   to the "=" primitive; _fast_eq gained rational + ListRef. (sx_vm.ml,
   sx_runtime.ml)

5. Continuation-based control flow (Smalltalk ^expr non-local return, block
   escape, exceptions via call/cc) can't run in the stack VM. New data-driven
   exclusion set Sx_types.jit_excluded + `jit-exclude!` primitive, consulted in
   jit_compile_lambda (covers both the CEK hook and vm_call's tiered path).
   lib/smalltalk/eval.sx self-declares its continuation dispatch core
   interpret-only; pure helpers still JIT. The SUnit suite-runner test helper
   pharo-test-class miscompiles mid-loop and is excluded in tests/tokenize.sx.

Also adds SX_JIT_DENY / SX_JIT_ONLY env-var bisection filters to the serving
hook. Known residual documented in plans/jit-bytecode-correctness.md: the hook
re-runs a failed VM execution via CEK (correct result, possible duplicate side
effects); adopting run_tests' propagate-don't-rerun semantics is deferred to
avoid changing shared VM/CEK behavior under this loop.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-19 20:36:30 +00:00

370 lines
9.2 KiB
Plaintext

;; Smalltalk tokenizer tests.
;;
;; Lightweight runner: each test checks actual vs expected with structural
;; equality and accumulates pass/fail counters. Final summary read by
;; lib/smalltalk/test.sh.
(define
st-deep=?
(fn
(a b)
(cond
((= a b) true)
((and (dict? a) (dict? b))
(let
((ak (keys a)) (bk (keys b)))
(if
(not (= (len ak) (len bk)))
false
(every?
(fn
(k)
(and (has-key? b k) (st-deep=? (get a k) (get b k))))
ak))))
((and (list? a) (list? b))
(if
(not (= (len a) (len b)))
false
(let
((i 0) (ok true))
(begin
(define
de-loop
(fn
()
(when
(and ok (< i (len a)))
(begin
(when
(not (st-deep=? (nth a i) (nth b i)))
(set! ok false))
(set! i (+ i 1))
(de-loop)))))
(de-loop)
ok))))
(:else false))))
(define st-test-pass 0)
(define st-test-fail 0)
(define st-test-fails (list))
(define
st-test
(fn
(name actual expected)
(if
(st-deep=? actual expected)
(set! st-test-pass (+ st-test-pass 1))
(begin
(set! st-test-fail (+ st-test-fail 1))
(append! st-test-fails {:actual actual :expected expected :name name})))))
;; Strip eof and project to just :type/:value.
(define
st-toks
(fn
(src)
(map
(fn (tok) {:type (get tok :type) :value (get tok :value)})
(filter
(fn (tok) (not (= (get tok :type) "eof")))
(st-tokenize src)))))
;; ── 1. Whitespace / empty ──
(st-test "empty input" (st-toks "") (list))
(st-test "all whitespace" (st-toks " \t\n ") (list))
;; ── 2. Identifiers ──
(st-test
"lowercase ident"
(st-toks "foo")
(list {:type "ident" :value "foo"}))
(st-test
"capitalised ident"
(st-toks "Foo")
(list {:type "ident" :value "Foo"}))
(st-test
"underscore ident"
(st-toks "_x")
(list {:type "ident" :value "_x"}))
(st-test
"digits in ident"
(st-toks "foo123")
(list {:type "ident" :value "foo123"}))
(st-test
"two idents separated"
(st-toks "foo bar")
(list {:type "ident" :value "foo"} {:type "ident" :value "bar"}))
;; ── 3. Keyword selectors ──
(st-test
"keyword selector"
(st-toks "foo:")
(list {:type "keyword" :value "foo:"}))
(st-test
"keyword call"
(st-toks "x at: 1")
(list
{:type "ident" :value "x"}
{:type "keyword" :value "at:"}
{:type "number" :value 1}))
(st-test
"two-keyword chain stays separate"
(st-toks "at: 1 put: 2")
(list
{:type "keyword" :value "at:"}
{:type "number" :value 1}
{:type "keyword" :value "put:"}
{:type "number" :value 2}))
(st-test
"ident then assign — not a keyword"
(st-toks "x := 1")
(list
{:type "ident" :value "x"}
{:type "assign" :value ":="}
{:type "number" :value 1}))
;; ── 4. Numbers ──
(st-test
"integer"
(st-toks "42")
(list {:type "number" :value 42}))
(st-test
"float"
(st-toks "3.14")
(list {:type "number" :value 3.14}))
(st-test
"hex radix"
(st-toks "16rFF")
(list
{:type "number"
:value
{:radix 16 :digits "FF" :value 255 :kind "radix"}}))
(st-test
"binary radix"
(st-toks "2r1011")
(list
{:type "number"
:value
{:radix 2 :digits "1011" :value 11 :kind "radix"}}))
(st-test
"exponent"
(st-toks "1e3")
(list {:type "number" :value 1000}))
(st-test
"negative exponent (parser handles minus)"
(st-toks "1.5e-2")
(list {:type "number" :value 0.015}))
;; ── 5. Strings ──
(st-test
"simple string"
(st-toks "'hi'")
(list {:type "string" :value "hi"}))
(st-test
"empty string"
(st-toks "''")
(list {:type "string" :value ""}))
(st-test
"doubled-quote escape"
(st-toks "'a''b'")
(list {:type "string" :value "a'b"}))
;; ── 6. Characters ──
(st-test
"char literal letter"
(st-toks "$a")
(list {:type "char" :value "a"}))
(st-test
"char literal punct"
(st-toks "$$")
(list {:type "char" :value "$"}))
(st-test
"char literal space"
(st-toks "$ ")
(list {:type "char" :value " "}))
;; ── 7. Symbols ──
(st-test
"symbol ident"
(st-toks "#foo")
(list {:type "symbol" :value "foo"}))
(st-test
"symbol binary"
(st-toks "#+")
(list {:type "symbol" :value "+"}))
(st-test
"symbol arrow"
(st-toks "#->")
(list {:type "symbol" :value "->"}))
(st-test
"symbol keyword chain"
(st-toks "#at:put:")
(list {:type "symbol" :value "at:put:"}))
(st-test
"quoted symbol with spaces"
(st-toks "#'foo bar'")
(list {:type "symbol" :value "foo bar"}))
;; ── 8. Literal arrays / byte arrays ──
(st-test
"literal array open"
(st-toks "#(1 2)")
(list
{:type "array-open" :value "#("}
{:type "number" :value 1}
{:type "number" :value 2}
{:type "rparen" :value ")"}))
(st-test
"byte array open"
(st-toks "#[1 2 3]")
(list
{:type "byte-array-open" :value "#["}
{:type "number" :value 1}
{:type "number" :value 2}
{:type "number" :value 3}
{:type "rbracket" :value "]"}))
;; ── 9. Binary selectors ──
(st-test "plus" (st-toks "+") (list {:type "binary" :value "+"}))
(st-test "minus" (st-toks "-") (list {:type "binary" :value "-"}))
(st-test "star" (st-toks "*") (list {:type "binary" :value "*"}))
(st-test "double-equal" (st-toks "==") (list {:type "binary" :value "=="}))
(st-test "leq" (st-toks "<=") (list {:type "binary" :value "<="}))
(st-test "geq" (st-toks ">=") (list {:type "binary" :value ">="}))
(st-test "neq" (st-toks "~=") (list {:type "binary" :value "~="}))
(st-test "arrow" (st-toks "->") (list {:type "binary" :value "->"}))
(st-test "comma" (st-toks ",") (list {:type "binary" :value ","}))
(st-test
"binary in expression"
(st-toks "a + b")
(list
{:type "ident" :value "a"}
{:type "binary" :value "+"}
{:type "ident" :value "b"}))
;; ── 10. Punctuation ──
(st-test "lparen" (st-toks "(") (list {:type "lparen" :value "("}))
(st-test "rparen" (st-toks ")") (list {:type "rparen" :value ")"}))
(st-test "lbracket" (st-toks "[") (list {:type "lbracket" :value "["}))
(st-test "rbracket" (st-toks "]") (list {:type "rbracket" :value "]"}))
(st-test "lbrace" (st-toks "{") (list {:type "lbrace" :value "{"}))
(st-test "rbrace" (st-toks "}") (list {:type "rbrace" :value "}"}))
(st-test "period" (st-toks ".") (list {:type "period" :value "."}))
(st-test "semi" (st-toks ";") (list {:type "semi" :value ";"}))
(st-test "bar" (st-toks "|") (list {:type "bar" :value "|"}))
(st-test "caret" (st-toks "^") (list {:type "caret" :value "^"}))
(st-test "bang" (st-toks "!") (list {:type "bang" :value "!"}))
(st-test "colon" (st-toks ":") (list {:type "colon" :value ":"}))
(st-test "assign" (st-toks ":=") (list {:type "assign" :value ":="}))
;; ── 11. Comments ──
(st-test "comment skipped" (st-toks "\"hello\"") (list))
(st-test
"comment between tokens"
(st-toks "a \"comment\" b")
(list {:type "ident" :value "a"} {:type "ident" :value "b"}))
(st-test
"multi-line comment"
(st-toks "\"line1\nline2\"42")
(list {:type "number" :value 42}))
;; ── 12. Compound expressions ──
(st-test
"block with params"
(st-toks "[:a :b | a + b]")
(list
{:type "lbracket" :value "["}
{:type "colon" :value ":"}
{:type "ident" :value "a"}
{:type "colon" :value ":"}
{:type "ident" :value "b"}
{:type "bar" :value "|"}
{:type "ident" :value "a"}
{:type "binary" :value "+"}
{:type "ident" :value "b"}
{:type "rbracket" :value "]"}))
(st-test
"cascade"
(st-toks "x m1; m2")
(list
{:type "ident" :value "x"}
{:type "ident" :value "m1"}
{:type "semi" :value ";"}
{:type "ident" :value "m2"}))
(st-test
"method body return"
(st-toks "^ self foo")
(list
{:type "caret" :value "^"}
{:type "ident" :value "self"}
{:type "ident" :value "foo"}))
(st-test
"class declaration head"
(st-toks "Object subclass: #Foo")
(list
{:type "ident" :value "Object"}
{:type "keyword" :value "subclass:"}
{:type "symbol" :value "Foo"}))
(st-test
"temp declaration"
(st-toks "| t1 t2 |")
(list
{:type "bar" :value "|"}
{:type "ident" :value "t1"}
{:type "ident" :value "t2"}
{:type "bar" :value "|"}))
(st-test
"chunk separator"
(st-toks "Foo bar !")
(list
{:type "ident" :value "Foo"}
{:type "ident" :value "bar"}
{:type "bang" :value "!"}))
(st-test
"keyword call with binary precedence"
(st-toks "x foo: 1 + 2")
(list
{:type "ident" :value "x"}
{:type "keyword" :value "foo:"}
{:type "number" :value 1}
{:type "binary" :value "+"}
{:type "number" :value 2}))
(list st-test-pass st-test-fail)
;; The SUnit suite-runner `pharo-test-class` (defined in tests/pharo.sx and
;; tests/ansi.sx) drives the interpret-only Smalltalk evaluator through
;; smalltalk-eval-program in a loop and accumulates results via st-test
;; (a side-effecting accumulator). Under JIT it can fail mid-loop and re-run
;; via CEK, double-counting already-emitted rows. Keep it interpret-only.
(jit-exclude! "pharo-test-class")