search: result pagination (offset/limit) + 12 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 26s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 26s
paginate windows a ranked list (take lim . drop off); pageTfIdf/pageBm25 and resultCount. 148/148. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,7 +4,8 @@
|
||||
;; interpreter. Public Haskell entry points: indexDoc, lookupTerm, deleteDoc,
|
||||
;; docFreq, allTerms, tokens, positioned, evalQuery, parseQuery, searchQuery,
|
||||
;; rankTfIdf, rankBm25, topNTfIdf, topNBm25, fedIndex, aclFilter, searchTfIdfAcl,
|
||||
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf.
|
||||
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf,
|
||||
;; paginate, pageTfIdf, pageBm25, resultCount.
|
||||
|
||||
(define
|
||||
search/src
|
||||
@@ -21,4 +22,6 @@
|
||||
"\n"
|
||||
search/fed-src
|
||||
"\n"
|
||||
search/prefix-src))
|
||||
search/prefix-src
|
||||
"\n"
|
||||
search/page-src))
|
||||
|
||||
@@ -25,6 +25,7 @@ PRELOADS=(
|
||||
lib/search/rank.sx
|
||||
lib/search/fed.sx
|
||||
lib/search/prefix.sx
|
||||
lib/search/page.sx
|
||||
lib/search/api.sx
|
||||
lib/search/testlib.sx
|
||||
)
|
||||
@@ -36,4 +37,5 @@ SUITES=(
|
||||
"rank:lib/search/tests/rank.sx"
|
||||
"integration:lib/search/tests/integration.sx"
|
||||
"prefix:lib/search/tests/prefix.sx"
|
||||
"page:lib/search/tests/page.sx"
|
||||
)
|
||||
|
||||
11
lib/search/page.sx
Normal file
11
lib/search/page.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
;; search pagination — Haskell source fragment. Depends on rank.
|
||||
;; Windows a ranked result list by offset/limit (offset >= length -> empty;
|
||||
;; limit clamps to what remains).
|
||||
;; paginate :: Int -> Int -> [DocId] -> [DocId] (offset, limit)
|
||||
;; pageTfIdf :: Int -> Int -> [Term] -> Index -> [DocId]
|
||||
;; pageBm25 :: Int -> Int -> Float -> Float -> [Term] -> Index -> [DocId]
|
||||
;; resultCount :: [Term] -> Index -> Int
|
||||
|
||||
(define
|
||||
search/page-src
|
||||
"paginate off lim docs = take lim (drop off docs)\npageTfIdf off lim ts idx = paginate off lim (rankTfIdf ts idx)\npageBm25 off lim k1 b ts idx = paginate off lim (rankBm25 k1 b ts idx)\nresultCount ts idx = length (rankTfIdf ts idx)\n")
|
||||
@@ -1,15 +1,16 @@
|
||||
{
|
||||
"lang": "search",
|
||||
"total_passed": 136,
|
||||
"total_passed": 148,
|
||||
"total_failed": 0,
|
||||
"total": 136,
|
||||
"total": 148,
|
||||
"suites": [
|
||||
{"name":"index","passed":18,"failed":0,"total":18},
|
||||
{"name":"boolean","passed":28,"failed":0,"total":28},
|
||||
{"name":"parse","passed":32,"failed":0,"total":32},
|
||||
{"name":"rank","passed":23,"failed":0,"total":23},
|
||||
{"name":"integration","passed":21,"failed":0,"total":21},
|
||||
{"name":"prefix","passed":14,"failed":0,"total":14}
|
||||
{"name":"prefix","passed":14,"failed":0,"total":14},
|
||||
{"name":"page","passed":12,"failed":0,"total":12}
|
||||
],
|
||||
"generated": "2026-06-06T20:21:41+00:00"
|
||||
"generated": "2026-06-06T20:54:50+00:00"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# search scoreboard
|
||||
|
||||
**136 / 136 passing** (0 failure(s)).
|
||||
**148 / 148 passing** (0 failure(s)).
|
||||
|
||||
| Suite | Passed | Total | Status |
|
||||
|-------|--------|-------|--------|
|
||||
@@ -10,3 +10,4 @@
|
||||
| rank | 23 | 23 | ok |
|
||||
| integration | 21 | 21 | ok |
|
||||
| prefix | 14 | 14 | ok |
|
||||
| page | 12 | 12 | ok |
|
||||
|
||||
53
lib/search/tests/page.sx
Normal file
53
lib/search/tests/page.sx
Normal file
@@ -0,0 +1,53 @@
|
||||
;; Extension — result pagination (offset / limit) over ranked results.
|
||||
;; Corpus (tf of "x" descending): 1 x4 2 x3 3 x2 4 x1 5 y(no x)
|
||||
;; rankTfIdf ["x"] -> [1,2,3,4]
|
||||
|
||||
(define
|
||||
page-setup
|
||||
"idx = indexDoc 5 \"y\" (indexDoc 4 \"x\" (indexDoc 3 \"x x\" (indexDoc 2 \"x x x\" (indexDoc 1 \"x x x x other\" emptyIndex))))\n")
|
||||
|
||||
(define
|
||||
page-cases
|
||||
(list
|
||||
(list "first page" "pageTfIdf 0 2 [\"x\"] idx" (list 1 2))
|
||||
(list
|
||||
"second page"
|
||||
"pageTfIdf 2 2 [\"x\"] idx"
|
||||
(list 3 4))
|
||||
(list
|
||||
"sliding window"
|
||||
"pageTfIdf 1 2 [\"x\"] idx"
|
||||
(list 2 3))
|
||||
(list
|
||||
"limit exceeds remaining"
|
||||
"pageTfIdf 3 10 [\"x\"] idx"
|
||||
(list 4))
|
||||
(list "offset past end" "pageTfIdf 4 2 [\"x\"] idx" (list))
|
||||
(list "limit zero" "pageTfIdf 0 0 [\"x\"] idx" (list))
|
||||
(list
|
||||
"whole result"
|
||||
"pageTfIdf 0 10 [\"x\"] idx"
|
||||
(list 1 2 3 4))
|
||||
(list
|
||||
"paginate raw list"
|
||||
"paginate 1 2 [10, 20, 30, 40]"
|
||||
(list 20 30))
|
||||
(list "paginate raw past end" "paginate 9 2 [10, 20]" (list))
|
||||
(list
|
||||
"bm25 page window size"
|
||||
"[length (pageBm25 0 2 1.5 0.75 [\"x\"] idx)]"
|
||||
(list 2))
|
||||
(list "result count" "[resultCount [\"x\"] idx]" (list 4))
|
||||
(list "result count zero" "[resultCount [\"zzz\"] idx]" (list 0))))
|
||||
|
||||
(define
|
||||
page-results
|
||||
(search-batch page-setup (map (fn (c) (nth c 1)) page-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth page-results i) (nth c 2)))
|
||||
page-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
@@ -112,12 +112,17 @@ lib/search/index.sx lib/search/eval.sx
|
||||
|
||||
- [x] prefix / wildcard queries (`prefixTerms`, `prefixDocs`, `prefixRankTfIdf`) — 14 tests
|
||||
- [ ] fuzzy matching — edit distance term expansion
|
||||
- [ ] result pagination (offset / limit)
|
||||
- [x] result pagination (offset / limit) — `paginate`, `pageTfIdf`, `pageBm25`,
|
||||
`resultCount` — 12 tests
|
||||
- [ ] snippet / highlight generation
|
||||
- [ ] stemming (suffix stripping) — recall-improving normalizer
|
||||
|
||||
## Progress log
|
||||
|
||||
- **Extension: pagination (148/148 total).** `paginate off lim` windows a ranked list
|
||||
(take lim . drop off); `pageTfIdf`/`pageBm25` + `resultCount`. 12 tests. Note the
|
||||
full conformance now runs 8 suites sequentially and needs an overall timeout ~1900s
|
||||
under the heavy box load.
|
||||
- **Extension: prefix/wildcard queries (136/136 total).** `prefixTerms` matches every
|
||||
indexed term starting with a prefix (via allTerms + isPrefixOf); `prefixDocs` unions
|
||||
their docs; `prefixRankTfIdf` ranks treating the matched terms as the query. 14 tests.
|
||||
|
||||
Reference in New Issue
Block a user