From 1b2935828c41b74ec7866d418b674928a80d7059 Mon Sep 17 00:00:00 2001 From: giles Date: Sat, 25 Apr 2026 02:53:46 +0000 Subject: [PATCH] forth: String word set COMPARE/SEARCH/SLITERAL (+9) --- lib/forth/compiler.sx | 47 ++++++++++++++++++++++++++++++++++ lib/forth/runtime.sx | 40 +++++++++++++++++++++++++++++ lib/forth/scoreboard.json | 2 +- lib/forth/scoreboard.md | 2 +- lib/forth/tests/test-phase5.sx | 33 ++++++++++++++++++++++++ plans/forth-on-sx.md | 13 +++++++++- 6 files changed, 134 insertions(+), 3 deletions(-) diff --git a/lib/forth/compiler.sx b/lib/forth/compiler.sx index 605c43d3..a4d1eefd 100644 --- a/lib/forth/compiler.sx +++ b/lib/forth/compiler.sx @@ -793,6 +793,53 @@ (let ((w (forth-pop s))) (forth-push s (or (get w "body-addr") 0))))) + (forth-def-prim-imm! + state + "SLITERAL" + (fn + (s) + (let + ((u (forth-pop s)) (c-addr (forth-pop s))) + (let + ((content (forth-mem-read-string s c-addr u))) + (let + ((new-addr (forth-alloc-bytes! s u))) + (forth-mem-write-string! s new-addr content) + (forth-def-append! s (fn (ss) (forth-push ss new-addr))) + (forth-def-append! s (fn (ss) (forth-push ss u)))))))) + (forth-def-prim! + state + "COMPARE" + (fn + (s) + (let + ((u2 (forth-pop s)) + (a2 (forth-pop s)) + (u1 (forth-pop s)) + (a1 (forth-pop s))) + (forth-push s (forth-compare-bytes-loop s a1 u1 a2 u2 0))))) + (forth-def-prim! + state + "SEARCH" + (fn + (s) + (let + ((u2 (forth-pop s)) + (a2 (forth-pop s)) + (u1 (forth-pop s)) + (a1 (forth-pop s))) + (let + ((idx (forth-search-bytes s a1 u1 a2 u2 0))) + (if + (< idx 0) + (begin + (forth-push s a1) + (forth-push s u1) + (forth-push s 0)) + (begin + (forth-push s (+ a1 idx)) + (forth-push s (- u1 idx)) + (forth-push s -1))))))) (forth-def-prim! state "R/O" (fn (s) (forth-push s 0))) (forth-def-prim! state "W/O" (fn (s) (forth-push s 1))) (forth-def-prim! state "R/W" (fn (s) (forth-push s 2))) diff --git a/lib/forth/runtime.sx b/lib/forth/runtime.sx index a19881d9..bc6e84ad 100644 --- a/lib/forth/runtime.sx +++ b/lib/forth/runtime.sx @@ -381,6 +381,46 @@ (dict-set! state "hold" (cons ch (get state "hold"))) (forth-double-push-u state rest))))))) +(define + forth-compare-bytes-loop + (fn + (state a1 u1 a2 u2 i) + (cond + ((and (= i u1) (= i u2)) 0) + ((= i u1) -1) + ((= i u2) 1) + (else + (let + ((b1 (forth-mem-read state (+ a1 i))) + (b2 (forth-mem-read state (+ a2 i)))) + (cond + ((< b1 b2) -1) + ((> b1 b2) 1) + (else (forth-compare-bytes-loop state a1 u1 a2 u2 (+ i 1))))))))) + +(define + forth-match-at + (fn + (state a1 start a2 u2 j) + (cond + ((= j u2) true) + ((not + (= + (forth-mem-read state (+ a1 (+ start j))) + (forth-mem-read state (+ a2 j)))) + false) + (else (forth-match-at state a1 start a2 u2 (+ j 1)))))) + +(define + forth-search-bytes + (fn + (state a1 u1 a2 u2 i) + (cond + ((= u2 0) 0) + ((> (+ i u2) u1) -1) + ((forth-match-at state a1 i a2 u2 0) i) + (else (forth-search-bytes state a1 u1 a2 u2 (+ i 1)))))) + (define forth-pic-S-loop (fn diff --git a/lib/forth/scoreboard.json b/lib/forth/scoreboard.json index c56fd51d..02ad1ba6 100644 --- a/lib/forth/scoreboard.json +++ b/lib/forth/scoreboard.json @@ -1,6 +1,6 @@ { "source": "gerryjackson/forth2012-test-suite src/core.fr", - "generated_at": "2026-04-25T02:24:33Z", + "generated_at": "2026-04-25T02:53:26Z", "chunks_available": 638, "chunks_fed": 638, "total": 638, diff --git a/lib/forth/scoreboard.md b/lib/forth/scoreboard.md index 6cf97957..6de5a764 100644 --- a/lib/forth/scoreboard.md +++ b/lib/forth/scoreboard.md @@ -11,7 +11,7 @@ | percent | 76% | - **Source**: `gerryjackson/forth2012-test-suite` `src/core.fr` -- **Generated**: 2026-04-25T02:24:33Z +- **Generated**: 2026-04-25T02:53:26Z - **Note**: completed A "chunk" is any preprocessed segment ending at a `}T` (every Hayes test diff --git a/lib/forth/tests/test-phase5.sx b/lib/forth/tests/test-phase5.sx index ba015882..38c29c47 100644 --- a/lib/forth/tests/test-phase5.sx +++ b/lib/forth/tests/test-phase5.sx @@ -268,6 +268,38 @@ "S\" /tmp/nope.fxf\" R/O OPEN-FILE SWAP DROP 0 =" 0))) +(define + forth-p5-string-tests + (fn + () + (forth-p5-check-top "COMPARE equal" "S\" ABC\" S\" ABC\" COMPARE" 0) + (forth-p5-check-top "COMPARE less" "S\" ABC\" S\" ABD\" COMPARE" -1) + (forth-p5-check-top "COMPARE greater" "S\" ABD\" S\" ABC\" COMPARE" 1) + (forth-p5-check-top + "COMPARE prefix less" + "S\" AB\" S\" ABC\" COMPARE" + -1) + (forth-p5-check-top + "COMPARE prefix greater" + "S\" ABC\" S\" AB\" COMPARE" + 1) + (forth-p5-check-top + "SEARCH found flag" + "S\" HELLO WORLD\" S\" WORLD\" SEARCH" + -1) + (forth-p5-check-top + "SEARCH not found flag" + "S\" HELLO\" S\" XYZ\" SEARCH" + 0) + (forth-p5-check-top + "SEARCH empty needle flag" + "S\" HELLO\" S\" \" SEARCH" + -1) + (forth-p5-check-top + "SLITERAL via [ S\" ... \" ]" + ": A [ S\" HI\" ] SLITERAL ; A SWAP DROP" + 2))) + (define forth-p4-check-output-passthrough (fn @@ -291,6 +323,7 @@ (forth-p5-state-tests) (forth-p5-misc-tests) (forth-p5-fa-tests) + (forth-p5-string-tests) (dict "passed" forth-p5-passed diff --git a/plans/forth-on-sx.md b/plans/forth-on-sx.md index 21f4a55f..fa9f0907 100644 --- a/plans/forth-on-sx.md +++ b/plans/forth-on-sx.md @@ -94,7 +94,7 @@ Representation: - [x] Source/state: `EVALUATE`, `STATE`, `[`, `]` (`SOURCE`/`>IN` stubbed; tokenized input means the exact byte/offset semantics aren't useful here) - [x] Misc Core: `WITHIN`, `MAX`/`MIN` (already), `ABORT`, `ABORT"`, `EXIT`, `UNLOOP` - [x] File Access word set (in-memory — `read-file` is not reachable from the epoch eval env) -- [ ] String word set (`SLITERAL`, `COMPARE`, `SEARCH`) +- [x] String word set (`SLITERAL`, `COMPARE`, `SEARCH`) - [ ] Target: 100% Hayes Core ### Phase 6 — speed @@ -106,6 +106,17 @@ Representation: _Newest first._ +- **Phase 5 — String word set `COMPARE`/`SEARCH`/`SLITERAL` (+9).** + `COMPARE` walks bytes via the new `forth-compare-bytes-loop`, + returning -1/0/1 with standard prefix semantics (shorter string + compares less than its extension). `SEARCH` scans the haystack + with a helper `forth-search-bytes` and `forth-match-at`, returning + the tail after the first match or the original string with flag=0. + Empty needle returns at offset 0 with flag=-1 per ANS. `SLITERAL` + is IMMEDIATE: pops `(c-addr u)` at compile time, copies the bytes + into a fresh allocation, and emits the two pushes so the compiled + word yields the interned string at runtime. + - **Phase 5 — File Access word set (in-memory backing; +4).** `OPEN-FILE`/`CREATE-FILE`/`CLOSE-FILE`/`READ-FILE`/`WRITE-FILE`/ `FILE-POSITION`/`FILE-SIZE`/`REPOSITION-FILE`/`DELETE-FILE` plus