diff --git a/lib/search/api.sx b/lib/search/api.sx index 84918b5e..cef49db4 100644 --- a/lib/search/api.sx +++ b/lib/search/api.sx @@ -4,7 +4,8 @@ ;; interpreter. Public Haskell entry points: indexDoc, lookupTerm, deleteDoc, ;; docFreq, allTerms, tokens, positioned, evalQuery, parseQuery, searchQuery, ;; rankTfIdf, rankBm25, topNTfIdf, topNBm25, fedIndex, aclFilter, searchTfIdfAcl, -;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf. +;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf, +;; paginate, pageTfIdf, pageBm25, resultCount. (define search/src @@ -21,4 +22,6 @@ "\n" search/fed-src "\n" - search/prefix-src)) + search/prefix-src + "\n" + search/page-src)) diff --git a/lib/search/conformance.conf b/lib/search/conformance.conf index c5d09b5c..79b14819 100644 --- a/lib/search/conformance.conf +++ b/lib/search/conformance.conf @@ -25,6 +25,7 @@ PRELOADS=( lib/search/rank.sx lib/search/fed.sx lib/search/prefix.sx + lib/search/page.sx lib/search/api.sx lib/search/testlib.sx ) @@ -36,4 +37,5 @@ SUITES=( "rank:lib/search/tests/rank.sx" "integration:lib/search/tests/integration.sx" "prefix:lib/search/tests/prefix.sx" + "page:lib/search/tests/page.sx" ) diff --git a/lib/search/page.sx b/lib/search/page.sx new file mode 100644 index 00000000..93b57dd4 --- /dev/null +++ b/lib/search/page.sx @@ -0,0 +1,11 @@ +;; search pagination — Haskell source fragment. Depends on rank. +;; Windows a ranked result list by offset/limit (offset >= length -> empty; +;; limit clamps to what remains). +;; paginate :: Int -> Int -> [DocId] -> [DocId] (offset, limit) +;; pageTfIdf :: Int -> Int -> [Term] -> Index -> [DocId] +;; pageBm25 :: Int -> Int -> Float -> Float -> [Term] -> Index -> [DocId] +;; resultCount :: [Term] -> Index -> Int + +(define + search/page-src + "paginate off lim docs = take lim (drop off docs)\npageTfIdf off lim ts idx = paginate off lim (rankTfIdf ts idx)\npageBm25 off lim k1 b ts idx = paginate off lim (rankBm25 k1 b ts idx)\nresultCount ts idx = length (rankTfIdf ts idx)\n") diff --git a/lib/search/scoreboard.json b/lib/search/scoreboard.json index df5e60d7..16472224 100644 --- a/lib/search/scoreboard.json +++ b/lib/search/scoreboard.json @@ -1,15 +1,16 @@ { "lang": "search", - "total_passed": 136, + "total_passed": 148, "total_failed": 0, - "total": 136, + "total": 148, "suites": [ {"name":"index","passed":18,"failed":0,"total":18}, {"name":"boolean","passed":28,"failed":0,"total":28}, {"name":"parse","passed":32,"failed":0,"total":32}, {"name":"rank","passed":23,"failed":0,"total":23}, {"name":"integration","passed":21,"failed":0,"total":21}, - {"name":"prefix","passed":14,"failed":0,"total":14} + {"name":"prefix","passed":14,"failed":0,"total":14}, + {"name":"page","passed":12,"failed":0,"total":12} ], - "generated": "2026-06-06T20:21:41+00:00" + "generated": "2026-06-06T20:54:50+00:00" } diff --git a/lib/search/scoreboard.md b/lib/search/scoreboard.md index 0578f296..9cdc93b3 100644 --- a/lib/search/scoreboard.md +++ b/lib/search/scoreboard.md @@ -1,6 +1,6 @@ # search scoreboard -**136 / 136 passing** (0 failure(s)). +**148 / 148 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| @@ -10,3 +10,4 @@ | rank | 23 | 23 | ok | | integration | 21 | 21 | ok | | prefix | 14 | 14 | ok | +| page | 12 | 12 | ok | diff --git a/lib/search/tests/page.sx b/lib/search/tests/page.sx new file mode 100644 index 00000000..6ad77310 --- /dev/null +++ b/lib/search/tests/page.sx @@ -0,0 +1,53 @@ +;; Extension — result pagination (offset / limit) over ranked results. +;; Corpus (tf of "x" descending): 1 x4 2 x3 3 x2 4 x1 5 y(no x) +;; rankTfIdf ["x"] -> [1,2,3,4] + +(define + page-setup + "idx = indexDoc 5 \"y\" (indexDoc 4 \"x\" (indexDoc 3 \"x x\" (indexDoc 2 \"x x x\" (indexDoc 1 \"x x x x other\" emptyIndex))))\n") + +(define + page-cases + (list + (list "first page" "pageTfIdf 0 2 [\"x\"] idx" (list 1 2)) + (list + "second page" + "pageTfIdf 2 2 [\"x\"] idx" + (list 3 4)) + (list + "sliding window" + "pageTfIdf 1 2 [\"x\"] idx" + (list 2 3)) + (list + "limit exceeds remaining" + "pageTfIdf 3 10 [\"x\"] idx" + (list 4)) + (list "offset past end" "pageTfIdf 4 2 [\"x\"] idx" (list)) + (list "limit zero" "pageTfIdf 0 0 [\"x\"] idx" (list)) + (list + "whole result" + "pageTfIdf 0 10 [\"x\"] idx" + (list 1 2 3 4)) + (list + "paginate raw list" + "paginate 1 2 [10, 20, 30, 40]" + (list 20 30)) + (list "paginate raw past end" "paginate 9 2 [10, 20]" (list)) + (list + "bm25 page window size" + "[length (pageBm25 0 2 1.5 0.75 [\"x\"] idx)]" + (list 2)) + (list "result count" "[resultCount [\"x\"] idx]" (list 4)) + (list "result count zero" "[resultCount [\"zzz\"] idx]" (list 0)))) + +(define + page-results + (search-batch page-setup (map (fn (c) (nth c 1)) page-cases))) + +(map-indexed + (fn + (i c) + (hk-test (nth c 0) (nth page-results i) (nth c 2))) + page-cases) + +{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails} diff --git a/plans/search-on-sx.md b/plans/search-on-sx.md index 2f440bd0..32444f20 100644 --- a/plans/search-on-sx.md +++ b/plans/search-on-sx.md @@ -112,12 +112,17 @@ lib/search/index.sx lib/search/eval.sx - [x] prefix / wildcard queries (`prefixTerms`, `prefixDocs`, `prefixRankTfIdf`) — 14 tests - [ ] fuzzy matching — edit distance term expansion -- [ ] result pagination (offset / limit) +- [x] result pagination (offset / limit) — `paginate`, `pageTfIdf`, `pageBm25`, + `resultCount` — 12 tests - [ ] snippet / highlight generation - [ ] stemming (suffix stripping) — recall-improving normalizer ## Progress log +- **Extension: pagination (148/148 total).** `paginate off lim` windows a ranked list + (take lim . drop off); `pageTfIdf`/`pageBm25` + `resultCount`. 12 tests. Note the + full conformance now runs 8 suites sequentially and needs an overall timeout ~1900s + under the heavy box load. - **Extension: prefix/wildcard queries (136/136 total).** `prefixTerms` matches every indexed term starting with a prefix (via allTerms + isPrefixOf); `prefixDocs` unions their docs; `prefixRankTfIdf` ranks treating the matched terms as the query. 14 tests.