diff --git a/lib/forth/compiler.sx b/lib/forth/compiler.sx index 5e64e0ea..6806c4ae 100644 --- a/lib/forth/compiler.sx +++ b/lib/forth/compiler.sx @@ -139,6 +139,50 @@ (forth-rpush s next) (dict-set! pc "v" (get (get op "target") "v")))))))))) +;; Parse input tokens until one ends in `"`. Returns joined content with +;; single spaces between tokens (emulating standard Forth S"-style parse). +(define + forth-parse-quote + (fn + (state) + (forth-parse-quote-loop state (list)))) + +(define + forth-parse-quote-loop + (fn + (state parts) + (let + ((tok (forth-next-token! state))) + (if + (nil? tok) + (forth-error state "unterminated string") + (let + ((n (len tok))) + (if + (and (> n 0) (= (substr tok (- n 1) 1) "\"")) + (let + ((final (substr tok 0 (- n 1)))) + (forth-join-parts (concat parts (list final)) " ")) + (forth-parse-quote-loop state (concat parts (list tok))))))))) + +(define + forth-join-parts + (fn + (parts sep) + (if + (= (len parts) 0) + "" + (forth-join-loop (rest parts) sep (first parts))))) + +(define + forth-join-loop + (fn + (xs sep acc) + (if + (= (len xs) 0) + acc + (forth-join-loop (rest-of xs) sep (str acc sep (first xs)))))) + (define forth-find-do (fn @@ -413,6 +457,51 @@ (for-each (fn (t) (dict-set! t "v" exit-pc)) (get marker "leaves")))))) + (forth-def-prim-imm! + state + "S\"" + (fn + (s) + (let + ((content (forth-parse-quote s))) + (if + (get s "compiling") + (let + ((addr (forth-alloc-bytes! s (len content)))) + (forth-mem-write-string! s addr content) + (forth-def-append! s (fn (ss) (forth-push ss addr))) + (forth-def-append! s (fn (ss) (forth-push ss (len content))))) + (let + ((addr (forth-alloc-bytes! s (len content)))) + (forth-mem-write-string! s addr content) + (forth-push s addr) + (forth-push s (len content))))))) + (forth-def-prim-imm! + state + "C\"" + (fn + (s) + (let + ((content (forth-parse-quote s))) + (let + ((addr (forth-alloc-bytes! s (+ 1 (len content))))) + (forth-mem-write! s addr (len content)) + (forth-mem-write-string! s (+ addr 1) content) + (if + (get s "compiling") + (forth-def-append! s (fn (ss) (forth-push ss addr))) + (forth-push s addr)))))) + (forth-def-prim-imm! + state + ".\"" + (fn + (s) + (let + ((content (forth-parse-quote s))) + (if + (get s "compiling") + (forth-def-append! s (fn (ss) (forth-emit-str ss content))) + (forth-emit-str s content))))) (forth-def-prim-imm! state "LEAVE" diff --git a/lib/forth/runtime.sx b/lib/forth/runtime.sx index 673ba07e..059b813b 100644 --- a/lib/forth/runtime.sx +++ b/lib/forth/runtime.sx @@ -21,8 +21,97 @@ (dict-set! s "base" 10) (dict-set! s "vars" (dict)) (dict-set! s "cstack" (list)) + (dict-set! s "mem" (dict)) + (dict-set! s "here" 0) s))) +(define + forth-mem-write! + (fn (state addr u) (dict-set! (get state "mem") (str addr) u))) + +(define + forth-mem-read + (fn + (state addr) + (or (get (get state "mem") (str addr)) 0))) + +(define + forth-alloc-bytes! + (fn + (state n) + (let + ((addr (get state "here"))) + (dict-set! state "here" (+ addr n)) + addr))) + +(define + forth-mem-write-string! + (fn + (state addr s) + (let + ((n (len s))) + (forth-mem-write-string-loop! state addr s 0 n)))) + +(define + forth-mem-write-string-loop! + (fn + (state addr s i n) + (when + (< i n) + (begin + (forth-mem-write! state (+ addr i) (char-code (substr s i 1))) + (forth-mem-write-string-loop! state addr s (+ i 1) n))))) + +(define + forth-mem-read-string + (fn + (state addr n) + (forth-mem-read-string-loop state addr 0 n ""))) + +(define + forth-mem-read-string-loop + (fn + (state addr i n acc) + (if + (>= i n) + acc + (forth-mem-read-string-loop + state + addr + (+ i 1) + n + (str acc (char-from-code (forth-mem-read state (+ addr i)))))))) + +(define + forth-fill-loop + (fn + (state addr u char i) + (when + (< i u) + (begin + (forth-mem-write! state (+ addr i) char) + (forth-fill-loop state addr u char (+ i 1)))))) + +(define + forth-cmove-loop + (fn + (state src dst u i) + (when + (< i u) + (begin + (forth-mem-write! state (+ dst i) (forth-mem-read state (+ src i))) + (forth-cmove-loop state src dst u (+ i 1)))))) + +(define + forth-cmove-loop-desc + (fn + (state src dst u i) + (when + (>= i 0) + (begin + (forth-mem-write! state (+ dst i) (forth-mem-read state (+ src i))) + (forth-cmove-loop-desc state src dst u (- i 1)))))) + (define forth-cpush (fn (state v) (dict-set! state "cstack" (cons v (get state "cstack"))))) @@ -487,4 +576,82 @@ (forth-error s "return stack underflow")) (forth-push s (nth rs 1)) (forth-push s (nth rs 0))))) + (forth-def-prim! + state + "C@" + (fn + (s) + (let ((addr (forth-pop s))) (forth-push s (forth-mem-read s addr))))) + (forth-def-prim! + state + "C!" + (fn + (s) + (let + ((addr (forth-pop s)) (v (forth-pop s))) + (forth-mem-write! s addr v)))) + (forth-def-prim! state "CHAR+" (fn (s) (forth-push s (+ (forth-pop s) 1)))) + (forth-def-prim! state "CHARS" (fn (s) nil)) + (forth-def-prim! + state + "TYPE" + (fn + (s) + (let + ((u (forth-pop s)) (addr (forth-pop s))) + (forth-emit-str s (forth-mem-read-string s addr u))))) + (forth-def-prim! + state + "COUNT" + (fn + (s) + (let + ((addr (forth-pop s))) + (let + ((u (forth-mem-read s addr))) + (forth-push s (+ addr 1)) + (forth-push s u))))) + (forth-def-prim! + state + "FILL" + (fn + (s) + (let + ((char (forth-pop s)) (u (forth-pop s)) (addr (forth-pop s))) + (forth-fill-loop s addr u char 0)))) + (forth-def-prim! + state + "BLANK" + (fn + (s) + (let + ((u (forth-pop s)) (addr (forth-pop s))) + (forth-fill-loop s addr u 32 0)))) + (forth-def-prim! + state + "CMOVE" + (fn + (s) + (let + ((u (forth-pop s)) (dst (forth-pop s)) (src (forth-pop s))) + (forth-cmove-loop s src dst u 0)))) + (forth-def-prim! + state + "CMOVE>" + (fn + (s) + (let + ((u (forth-pop s)) (dst (forth-pop s)) (src (forth-pop s))) + (forth-cmove-loop-desc s src dst u (- u 1))))) + (forth-def-prim! + state + "MOVE" + (fn + (s) + (let + ((u (forth-pop s)) (dst (forth-pop s)) (src (forth-pop s))) + (if + (or (<= dst src) (>= dst (+ src u))) + (forth-cmove-loop s src dst u 0) + (forth-cmove-loop-desc s src dst u (- u 1)))))) state)) diff --git a/lib/forth/scoreboard.json b/lib/forth/scoreboard.json index e1f2d9b2..bc7449e5 100644 --- a/lib/forth/scoreboard.json +++ b/lib/forth/scoreboard.json @@ -1,12 +1,12 @@ { "source": "gerryjackson/forth2012-test-suite src/core.fr", - "generated_at": "2026-04-24T19:13:12Z", + "generated_at": "2026-04-24T19:45:15Z", "chunks_available": 638, "chunks_fed": 590, "total": 590, - "pass": 165, + "pass": 168, "fail": 0, - "error": 425, - "percent": 27, + "error": 422, + "percent": 28, "note": "completed" } diff --git a/lib/forth/scoreboard.md b/lib/forth/scoreboard.md index e4d5a468..5cd55b41 100644 --- a/lib/forth/scoreboard.md +++ b/lib/forth/scoreboard.md @@ -5,13 +5,13 @@ | chunks available | 638 | | chunks fed | 590 | | total | 590 | -| pass | 165 | +| pass | 168 | | fail | 0 | -| error | 425 | -| percent | 27% | +| error | 422 | +| percent | 28% | - **Source**: `gerryjackson/forth2012-test-suite` `src/core.fr` -- **Generated**: 2026-04-24T19:13:12Z +- **Generated**: 2026-04-24T19:45:15Z - **Note**: completed A "chunk" is any preprocessed segment ending at a `}T` (every Hayes test diff --git a/lib/forth/tests/test-phase4.sx b/lib/forth/tests/test-phase4.sx new file mode 100644 index 00000000..0c9816cd --- /dev/null +++ b/lib/forth/tests/test-phase4.sx @@ -0,0 +1,164 @@ +;; Phase 4 — strings + more Core. +;; Uses the byte-memory model on state ("mem" dict + "here" cursor). + +(define forth-p4-passed 0) +(define forth-p4-failed 0) +(define forth-p4-failures (list)) + +(define + forth-p4-assert + (fn + (label expected actual) + (if + (= expected actual) + (set! forth-p4-passed (+ forth-p4-passed 1)) + (begin + (set! forth-p4-failed (+ forth-p4-failed 1)) + (set! + forth-p4-failures + (concat + forth-p4-failures + (list + (str label ": expected " (str expected) " got " (str actual))))))))) + +(define + forth-p4-check-output + (fn + (label src expected) + (let ((r (forth-run src))) (forth-p4-assert label expected (nth r 1))))) + +(define + forth-p4-check-stack-size + (fn + (label src expected-n) + (let + ((r (forth-run src))) + (forth-p4-assert label expected-n (len (nth r 2)))))) + +(define + forth-p4-check-top + (fn + (label src expected) + (let + ((r (forth-run src))) + (let + ((stk (nth r 2))) + (forth-p4-assert label expected (nth stk (- (len stk) 1))))))) + +(define + forth-p4-check-typed + (fn + (label src expected) + (forth-p4-check-output label (str src " TYPE") expected))) + +(define + forth-p4-string-tests + (fn + () + (forth-p4-check-typed + "S\" + TYPE — hello" + "S\" HELLO\"" + "HELLO") + (forth-p4-check-typed + "S\" + TYPE — two words" + "S\" HELLO WORLD\"" + "HELLO WORLD") + (forth-p4-check-typed + "S\" + TYPE — empty" + "S\" \"" + "") + (forth-p4-check-typed + "S\" + TYPE — single char" + "S\" X\"" + "X") + (forth-p4-check-stack-size + "S\" pushes (addr len)" + "S\" HI\"" + 2) + (forth-p4-check-top + "S\" length is correct" + "S\" HELLO\"" + 5) + (forth-p4-check-output + ".\" prints at interpret time" + ".\" HELLO\"" + "HELLO") + (forth-p4-check-output + ".\" in colon def" + ": GREET .\" HI \" ; GREET GREET" + "HI HI "))) + +(define + forth-p4-count-tests + (fn + () + (forth-p4-check-typed + "C\" + COUNT + TYPE" + "C\" ABC\" COUNT" + "ABC") + (forth-p4-check-typed + "C\" then COUNT leaves right len" + "C\" HI THERE\" COUNT" + "HI THERE"))) + +(define + forth-p4-fill-tests + (fn + () + (forth-p4-check-typed + "FILL overwrites prefix bytes" + "S\" ABCDE\" 2DUP DROP 3 65 FILL" + "AAADE") + (forth-p4-check-typed + "BLANK sets spaces" + "S\" XYZAB\" 2DUP DROP 3 BLANK" + " AB"))) + +(define + forth-p4-cmove-tests + (fn + () + (forth-p4-check-output + "CMOVE copies HELLO forward" + ": MKH 72 0 C! 69 1 C! 76 2 C! 76 3 C! 79 4 C! ; + : T MKH 0 10 5 CMOVE 10 5 TYPE ; T" + "HELLO") + (forth-p4-check-output + "CMOVE> copies overlapping backward" + ": MKA 65 0 C! 66 1 C! 67 2 C! ; + : T MKA 0 1 2 CMOVE> 0 3 TYPE ; T" + "AAB") + (forth-p4-check-output + "MOVE picks direction for overlap" + ": MKA 65 0 C! 66 1 C! 67 2 C! ; + : T MKA 0 1 2 MOVE 0 3 TYPE ; T" + "AAB"))) + +(define + forth-p4-charplus-tests + (fn + () + (forth-p4-check-top + "CHAR+ increments" + "5 CHAR+" + 6))) + +(define + forth-p4-run-all + (fn + () + (set! forth-p4-passed 0) + (set! forth-p4-failed 0) + (set! forth-p4-failures (list)) + (forth-p4-string-tests) + (forth-p4-count-tests) + (forth-p4-fill-tests) + (forth-p4-cmove-tests) + (forth-p4-charplus-tests) + (dict + "passed" + forth-p4-passed + "failed" + forth-p4-failed + "failures" + forth-p4-failures))) diff --git a/plans/forth-on-sx.md b/plans/forth-on-sx.md index 73ebbca9..4539fbb3 100644 --- a/plans/forth-on-sx.md +++ b/plans/forth-on-sx.md @@ -78,7 +78,7 @@ Representation: - [x] Baseline: probably 30-50% Core passing after phase 3 ### Phase 4 — strings + more Core -- [ ] `S"`, `C"`, `."`, `TYPE`, `COUNT`, `CMOVE`, `FILL`, `BLANK` +- [x] `S"`, `C"`, `."`, `TYPE`, `COUNT`, `CMOVE`, `FILL`, `BLANK` - [ ] `CHAR`, `[CHAR]`, `KEY`, `ACCEPT` - [ ] `BASE` manipulation: `DECIMAL`, `HEX` - [ ] `DEPTH`, `SP@`, `SP!` @@ -99,6 +99,20 @@ Representation: _Newest first._ +- **Phase 4 — strings: `S"`/`C"`/`."`/`TYPE`/`COUNT`/`CMOVE`/`CMOVE>`/`MOVE`/`FILL`/`BLANK`/`C@`/`C!`/`CHAR+`/`CHARS` (+16 / Hayes 165→168).** + Added a byte-addressable memory model to state: `mem` (dict keyed by + stringified address → integer byte) and `here` (next-free integer + addr). Helpers `forth-alloc-bytes!` / `forth-mem-write-string!` / + `forth-mem-read-string`. `S"`/`C"`/`."` are IMMEDIATE parsing words + that consume tokens until one ends with `"`, then either copy content + into memory at compile time (and emit a push of `addr`/`addr len` for + the colon-def body) or do it inline in interpret mode. `TYPE` emits + `u` bytes from `addr` via `char-from-code`. `COUNT` reads the length + byte at a counted-string address and pushes (`addr+1`, `u`). `FILL`, + `BLANK` (FILL with space), `CMOVE` (forward), `CMOVE>` (backward), + and `MOVE` (auto-directional) mutate the byte dict. 193/193 internal + tests, Hayes 168/590 (+3). + - **Phase 3 — Hayes conformance runner + baseline scoreboard (165/590, 28%).** `lib/forth/conformance.sh` preprocesses `ans-tests/core.fr` (strips `\` and `( ... )` comments + `TESTING` lines), splits the source on every