From 8e1466032adbf2098e1f03666069837e7c4b040d Mon Sep 17 00:00:00 2001 From: giles Date: Fri, 24 Apr 2026 22:26:58 +0000 Subject: [PATCH] =?UTF-8?q?forth:=20LSHIFT/RSHIFT=20+=2032-bit=20arith=20t?= =?UTF-8?q?runcation=20+=20early=20binding=20(Hayes=20174=E2=86=92268)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lib/forth/compiler.sx | 29 +++++++++++++- lib/forth/conformance.sh | 17 ++++----- lib/forth/runtime.sx | 70 +++++++++++++++++++++++++++------- lib/forth/scoreboard.json | 14 +++---- lib/forth/scoreboard.md | 22 +++++------ lib/forth/tests/test-phase4.sx | 17 +++++++++ plans/forth-on-sx.md | 19 ++++++++- 7 files changed, 143 insertions(+), 45 deletions(-) diff --git a/lib/forth/compiler.sx b/lib/forth/compiler.sx index 6eb00545..79cbba8e 100644 --- a/lib/forth/compiler.sx +++ b/lib/forth/compiler.sx @@ -30,12 +30,37 @@ (forth-compile-lit state n) (forth-error state (str tok " ?")))))))) +;; Resolve the word NOW (early binding) so that `: X X ;` compiles a call +;; to the prior X — matching standard Forth redefinition semantics. +;; RECURSE is the one exception: it stays late-bound against the not-yet- +;; installed current definition. (define forth-compile-call (fn (state name) (let - ((op (fn (s) (let ((w (forth-lookup s name))) (if (nil? w) (forth-error s (str name " ? (compiled)")) (forth-execute-word s w)))))) + ((w (forth-lookup state name))) + (if + (nil? w) + (forth-error state (str name " ?")) + (let + ((op (fn (s) (forth-execute-word s w)))) + (forth-def-append! state op)))))) + +(define + forth-compile-recurse + (fn + (state name) + (let + ((op + (fn + (s) + (let + ((w (forth-lookup s name))) + (if + (nil? w) + (forth-error s (str "RECURSE: " name " not yet installed")) + (forth-execute-word s w)))))) (forth-def-append! state op)))) (define @@ -287,7 +312,7 @@ (forth-error s "RECURSE only in definition")) (let ((name (get (get s "current-def") "name"))) - (forth-compile-call s name)))) + (forth-compile-recurse s name)))) (forth-def-prim-imm! state "IF" diff --git a/lib/forth/conformance.sh b/lib/forth/conformance.sh index 3ddf257b..6a8c5d04 100755 --- a/lib/forth/conformance.sh +++ b/lib/forth/conformance.sh @@ -38,11 +38,10 @@ awk ' # 2 + 3: split into chunks at each `}T` and emit as a SX file # -# Cap chunks via MAX_CHUNKS env (default 590) — a small number of later -# tests enter infinite runtime loops (e.g. COUNT-BITS with unsigned wrap) -# that our bignum-based interpreter can't terminate. Raise the cap as -# those tests unblock. -MAX_CHUNKS="${MAX_CHUNKS:-590}" +# Cap chunks via MAX_CHUNKS env (default 638 = full Hayes Core). Lower +# it temporarily if later tests regress into an infinite loop while you +# are iterating on primitives. +MAX_CHUNKS="${MAX_CHUNKS:-638}" MAX_CHUNKS="$MAX_CHUNKS" python3 - "$PREPROC" "$CHUNKS_SX" <<'PY' import os, re, sys @@ -153,11 +152,9 @@ covers tests whose \`->\` / \`}T\` comparison mismatched. ### Chunk cap \`conformance.sh\` processes the first \`\$MAX_CHUNKS\` chunks (default -**590**). Past that, \`core.fr\` ships tests that rely on unsigned -integer wrap-around (e.g. \`COUNT-BITS\` using \`BEGIN DUP WHILE … 2* -REPEAT\`), which never terminates on our bignum-based interpreter. The -cap should rise as those tests unblock — run with \`MAX_CHUNKS=639 -./conformance.sh\` once they do. +**638**, i.e. the whole Hayes Core file). Lower the cap temporarily +while iterating on primitives if a regression re-opens an infinite +loop in later tests. MD echo "$SUMMARY" diff --git a/lib/forth/runtime.sx b/lib/forth/runtime.sx index 4ab5b6f8..d918cf3d 100644 --- a/lib/forth/runtime.sx +++ b/lib/forth/runtime.sx @@ -274,6 +274,17 @@ (define forth-bits-width 32) +;; Truncate a number to the Forth 32-bit signed range (two's-complement). +;; Used by arithmetic primitives so wrap-around matches ANS semantics and +;; loop idioms that rely on MSB becoming 0 after enough shifts terminate. +(define + forth-clip + (fn + (n) + (forth-from-unsigned + (forth-to-unsigned n forth-bits-width) + forth-bits-width))) + (define forth-to-unsigned (fn (n w) (let ((m (pow 2 w))) (mod (+ (mod n m) m) m)))) @@ -475,11 +486,17 @@ (forth-push s d) (forth-push s a) (forth-push s b)))) - (forth-def-prim! state "+" (forth-binop (fn (a b) (+ a b)))) - (forth-def-prim! state "-" (forth-binop (fn (a b) (- a b)))) - (forth-def-prim! state "*" (forth-binop (fn (a b) (* a b)))) - (forth-def-prim! state "/" (forth-binop forth-div)) - (forth-def-prim! state "MOD" (forth-binop forth-mod)) + (forth-def-prim! state "+" (forth-binop (fn (a b) (forth-clip (+ a b))))) + (forth-def-prim! state "-" (forth-binop (fn (a b) (forth-clip (- a b))))) + (forth-def-prim! state "*" (forth-binop (fn (a b) (forth-clip (* a b))))) + (forth-def-prim! + state + "/" + (forth-binop (fn (a b) (forth-clip (forth-div a b))))) + (forth-def-prim! + state + "MOD" + (forth-binop (fn (a b) (forth-clip (forth-mod a b))))) (forth-def-prim! state "/MOD" @@ -489,8 +506,8 @@ ((b (forth-pop s)) (a (forth-pop s))) (forth-push s (forth-mod a b)) (forth-push s (forth-div a b))))) - (forth-def-prim! state "NEGATE" (forth-unop (fn (a) (- 0 a)))) - (forth-def-prim! state "ABS" (forth-unop abs)) + (forth-def-prim! state "NEGATE" (forth-unop (fn (a) (forth-clip (- 0 a))))) + (forth-def-prim! state "ABS" (forth-unop (fn (a) (forth-clip (abs a))))) (forth-def-prim! state "MIN" @@ -499,12 +516,15 @@ state "MAX" (forth-binop (fn (a b) (if (> a b) a b)))) - (forth-def-prim! state "1+" (forth-unop (fn (a) (+ a 1)))) - (forth-def-prim! state "1-" (forth-unop (fn (a) (- a 1)))) - (forth-def-prim! state "2+" (forth-unop (fn (a) (+ a 2)))) - (forth-def-prim! state "2-" (forth-unop (fn (a) (- a 2)))) - (forth-def-prim! state "2*" (forth-unop (fn (a) (* a 2)))) - (forth-def-prim! state "2/" (forth-unop (fn (a) (floor (/ a 2))))) + (forth-def-prim! state "1+" (forth-unop (fn (a) (forth-clip (+ a 1))))) + (forth-def-prim! state "1-" (forth-unop (fn (a) (forth-clip (- a 1))))) + (forth-def-prim! state "2+" (forth-unop (fn (a) (forth-clip (+ a 2))))) + (forth-def-prim! state "2-" (forth-unop (fn (a) (forth-clip (- a 2))))) + (forth-def-prim! state "2*" (forth-unop (fn (a) (forth-clip (* a 2))))) + (forth-def-prim! + state + "2/" + (forth-unop (fn (a) (forth-clip (floor (/ a 2)))))) (forth-def-prim! state "=" (forth-cmp (fn (a b) (= a b)))) (forth-def-prim! state "<>" (forth-cmp (fn (a b) (not (= a b))))) (forth-def-prim! state "<" (forth-cmp (fn (a b) (< a b)))) @@ -519,6 +539,30 @@ (forth-def-prim! state "OR" (forth-binop forth-bit-or)) (forth-def-prim! state "XOR" (forth-binop forth-bit-xor)) (forth-def-prim! state "INVERT" (forth-unop forth-bit-invert)) + (forth-def-prim! + state + "LSHIFT" + (fn + (s) + (let + ((u (forth-pop s)) (x (forth-pop s))) + (let + ((ux (forth-to-unsigned x forth-bits-width))) + (let + ((res (mod (* ux (pow 2 u)) (pow 2 forth-bits-width)))) + (forth-push s (forth-from-unsigned res forth-bits-width))))))) + (forth-def-prim! + state + "RSHIFT" + (fn + (s) + (let + ((u (forth-pop s)) (x (forth-pop s))) + (let + ((ux (forth-to-unsigned x forth-bits-width))) + (let + ((res (floor (/ ux (pow 2 u))))) + (forth-push s (forth-from-unsigned res forth-bits-width))))))) (forth-def-prim! state "." diff --git a/lib/forth/scoreboard.json b/lib/forth/scoreboard.json index 4cdbcb0d..532be043 100644 --- a/lib/forth/scoreboard.json +++ b/lib/forth/scoreboard.json @@ -1,12 +1,12 @@ { "source": "gerryjackson/forth2012-test-suite src/core.fr", - "generated_at": "2026-04-24T21:06:54Z", + "generated_at": "2026-04-24T22:26:31Z", "chunks_available": 638, - "chunks_fed": 590, - "total": 590, - "pass": 174, - "fail": 0, - "error": 416, - "percent": 29, + "chunks_fed": 638, + "total": 638, + "pass": 268, + "fail": 2, + "error": 368, + "percent": 42, "note": "completed" } diff --git a/lib/forth/scoreboard.md b/lib/forth/scoreboard.md index 9ccebb0a..1ffdc4ca 100644 --- a/lib/forth/scoreboard.md +++ b/lib/forth/scoreboard.md @@ -3,15 +3,15 @@ | metric | value | | ----------------- | ----: | | chunks available | 638 | -| chunks fed | 590 | -| total | 590 | -| pass | 174 | -| fail | 0 | -| error | 416 | -| percent | 29% | +| chunks fed | 638 | +| total | 638 | +| pass | 268 | +| fail | 2 | +| error | 368 | +| percent | 42% | - **Source**: `gerryjackson/forth2012-test-suite` `src/core.fr` -- **Generated**: 2026-04-24T21:06:54Z +- **Generated**: 2026-04-24T22:26:31Z - **Note**: completed A "chunk" is any preprocessed segment ending at a `}T` (every Hayes test @@ -23,8 +23,6 @@ covers tests whose `->` / `}T` comparison mismatched. ### Chunk cap `conformance.sh` processes the first `$MAX_CHUNKS` chunks (default -**590**). Past that, `core.fr` ships tests that rely on unsigned -integer wrap-around (e.g. `COUNT-BITS` using `BEGIN DUP WHILE … 2* -REPEAT`), which never terminates on our bignum-based interpreter. The -cap should rise as those tests unblock — run with `MAX_CHUNKS=639 -./conformance.sh` once they do. +**638**, i.e. the whole Hayes Core file). Lower the cap temporarily +while iterating on primitives if a regression re-opens an infinite +loop in later tests. diff --git a/lib/forth/tests/test-phase4.sx b/lib/forth/tests/test-phase4.sx index 1e24785a..406f5bf1 100644 --- a/lib/forth/tests/test-phase4.sx +++ b/lib/forth/tests/test-phase4.sx @@ -171,6 +171,22 @@ ((r (forth-run "1000 2 ACCEPT"))) (let ((stk (nth r 2))) (forth-p4-assert "ACCEPT empty buf -> 0" (list 0) stk))))) +(define + forth-p4-shift-tests + (fn + () + (forth-p4-check-top "1 0 LSHIFT" "1 0 LSHIFT" 1) + (forth-p4-check-top "1 1 LSHIFT" "1 1 LSHIFT" 2) + (forth-p4-check-top "1 2 LSHIFT" "1 2 LSHIFT" 4) + (forth-p4-check-top "1 15 LSHIFT" "1 15 LSHIFT" 32768) + (forth-p4-check-top "1 31 LSHIFT" "1 31 LSHIFT" -2147483648) + (forth-p4-check-top "1 0 RSHIFT" "1 0 RSHIFT" 1) + (forth-p4-check-top "1 1 RSHIFT" "1 1 RSHIFT" 0) + (forth-p4-check-top "2 1 RSHIFT" "2 1 RSHIFT" 1) + (forth-p4-check-top "4 2 RSHIFT" "4 2 RSHIFT" 1) + (forth-p4-check-top "-1 1 RSHIFT (logical, not arithmetic)" "-1 1 RSHIFT" 2147483647) + (forth-p4-check-top "MSB via 1S 1 RSHIFT INVERT" "0 INVERT 1 RSHIFT INVERT" -2147483648))) + (define forth-p4-sp-tests (fn @@ -245,6 +261,7 @@ (forth-p4-char-tests) (forth-p4-key-accept-tests) (forth-p4-base-tests) + (forth-p4-shift-tests) (forth-p4-sp-tests) (dict "passed" diff --git a/plans/forth-on-sx.md b/plans/forth-on-sx.md index a67dd360..da4908fd 100644 --- a/plans/forth-on-sx.md +++ b/plans/forth-on-sx.md @@ -82,7 +82,7 @@ Representation: - [x] `CHAR`, `[CHAR]`, `KEY`, `ACCEPT` - [x] `BASE` manipulation: `DECIMAL`, `HEX` - [x] `DEPTH`, `SP@`, `SP!` -- [ ] Drive Hayes Core pass-rate up +- [x] Drive Hayes Core pass-rate up ### Phase 5 — Core Extension + optional word sets - [ ] Full Core + Core Extension @@ -99,6 +99,23 @@ Representation: _Newest first._ +- **Phase 4 close — LSHIFT/RSHIFT, 32-bit arith truncation, early + binding; Hayes 174→268 (42%).** Added `LSHIFT` / `RSHIFT` as logical + shifts on 32-bit unsigned values, converted through + `forth-to-unsigned`/`forth-from-unsigned`. All arithmetic + primitives (`+` `-` `*` `/` `MOD` `NEGATE` `ABS` `1+` `1-` `2+` + `2-` `2*` `2/`) now clip results to 32-bit signed via a new + `forth-clip` helper, so loop idioms that rely on `2*` shifting the + MSB out (e.g. Hayes' `BITS` counter) actually terminate. + Changed colon-def call compilation from late-binding to early + binding: `forth-compile-call` now resolves the target word at + compile time, which makes `: GDX 123 ; : GDX GDX 234 ;` behave + per ANS (inner `GDX` → old def, not infinite recursion). `RECURSE` + keeps its late-binding thunk via the new `forth-compile-recurse` + helper. Raised `MAX_CHUNKS` default to 638 (full `core.fr`) now + that the BITS and COUNT-BITS loops terminate. Hayes: 268 pass / + 368 error / 2 fail. + - **Phase 4 — `SP@`/`SP!` (+4; Hayes unchanged; `DEPTH` was already present).** `SP@` pushes the current data-stack depth (our closest analogue to a stack pointer — SX lists have no addressable backing). `SP!` pops a