search: boolean-filtered ranked search + 11 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 40s

searchRankTfIdf/searchRankBm25 parse a boolean query, filter docs via evalQuery,
then rank survivors by relevance over the query's leaf terms (queryTerms) — the
filter-then-rank pattern. 225/225.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-06 23:58:37 +00:00
parent cfa68c3db3
commit db2a5dc6ab
7 changed files with 98 additions and 7 deletions

View File

@@ -7,7 +7,8 @@
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf,
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
;; nearDocs, expandTerm, synDocs, synRankTfIdf.
;; nearDocs, expandTerm, synDocs, synRankTfIdf, queryTerms, searchRankTfIdf,
;; searchRankBm25.
(define
search/src
@@ -36,4 +37,6 @@
"\n"
search/near-src
"\n"
search/syn-src))
search/syn-src
"\n"
search/rankq-src))

View File

@@ -31,6 +31,7 @@ PRELOADS=(
lib/search/stem.sx
lib/search/near.sx
lib/search/syn.sx
lib/search/rankq.sx
lib/search/api.sx
lib/search/testlib.sx
)
@@ -48,4 +49,5 @@ SUITES=(
"stem:lib/search/tests/stem.sx"
"near:lib/search/tests/near.sx"
"syn:lib/search/tests/syn.sx"
"rankq:lib/search/tests/rankq.sx"
)

11
lib/search/rankq.sx Normal file
View File

@@ -0,0 +1,11 @@
;; search boolean-filtered ranked search — Haskell source fragment.
;; Depends on parse (parseQuery/Query), query (evalQuery), rank (tfidfDoc/bm25Doc/
;; cmpScore). Filters by the boolean query, then ranks the surviving docs by
;; relevance over the query's leaf terms — the real-world filter-then-rank pattern.
;; queryTerms :: Query -> [Term]
;; searchRankTfIdf :: String -> Index -> [DocId]
;; searchRankBm25 :: Float -> Float -> String -> Index -> [DocId]
(define
search/rankq-src
"queryTerms (Term t) = [t]\nqueryTerms (And a b) = queryTerms a ++ queryTerms b\nqueryTerms (Or a b) = queryTerms a ++ queryTerms b\nqueryTerms (Not a) = queryTerms a\nqueryTerms (Phrase ts) = ts\nmkSubPair f terms idx d = (f terms idx d, d)\nrankSubsetWith f terms docs idx = map snd (sortBy cmpScore (map (mkSubPair f terms idx) docs))\nsearchRankTfIdf s idx = let q = parseQuery s in rankSubsetWith tfidfDoc (queryTerms q) (evalQuery idx q) idx\nsearchRankBm25 k1 b s idx = let q = parseQuery s in rankSubsetWith (bm25Doc k1 b) (queryTerms q) (evalQuery idx q) idx\n")

View File

@@ -1,8 +1,8 @@
{
"lang": "search",
"total_passed": 214,
"total_passed": 225,
"total_failed": 0,
"total": 214,
"total": 225,
"suites": [
{"name":"index","passed":18,"failed":0,"total":18},
{"name":"boolean","passed":28,"failed":0,"total":28},
@@ -15,7 +15,8 @@
{"name":"highlight","passed":12,"failed":0,"total":12},
{"name":"stem","passed":18,"failed":0,"total":18},
{"name":"near","passed":9,"failed":0,"total":9},
{"name":"syn","passed":9,"failed":0,"total":9}
{"name":"syn","passed":9,"failed":0,"total":9},
{"name":"rankq","passed":11,"failed":0,"total":11}
],
"generated": "2026-06-06T23:25:35+00:00"
"generated": "2026-06-06T23:58:05+00:00"
}

View File

@@ -1,6 +1,6 @@
# search scoreboard
**214 / 214 passing** (0 failure(s)).
**225 / 225 passing** (0 failure(s)).
| Suite | Passed | Total | Status |
|-------|--------|-------|--------|
@@ -16,3 +16,4 @@
| stem | 18 | 18 | ok |
| near | 9 | 9 | ok |
| syn | 9 | 9 | ok |
| rankq | 11 | 11 | ok |

67
lib/search/tests/rankq.sx Normal file
View File

@@ -0,0 +1,67 @@
;; Extension — boolean-filtered ranked search (filter then rank by relevance).
;; Corpus:
;; 1 "apple apple banana" apple2 banana1
;; 2 "apple cherry" apple1 cherry1
;; 3 "banana cherry" banana1 cherry1
;; 4 "apple banana cherry" apple1 banana1 cherry1
(define
rankq-setup
"idx = indexDoc 4 \"apple banana cherry\" (indexDoc 3 \"banana cherry\" (indexDoc 2 \"apple cherry\" (indexDoc 1 \"apple apple banana\" emptyIndex)))\n")
(define
rankq-cases
(list
(list
"queryTerms and"
"queryTerms (parseQuery \"apple AND banana\")"
(list "apple" "banana"))
(list
"queryTerms or not"
"queryTerms (parseQuery \"a OR NOT b\")"
(list "a" "b"))
(list
"queryTerms phrase"
"queryTerms (parseQuery \"\\\"x y\\\" OR z\")"
(list "x" "y" "z"))
(list
"and filter ranked by tf"
"searchRankTfIdf \"apple AND banana\" idx"
(list 1 4))
(list
"single term ranked tie"
"searchRankTfIdf \"cherry\" idx"
(list 2 3 4))
(list
"or filter ranked"
"searchRankTfIdf \"apple OR banana\" idx"
(list 1 4 2 3))
(list
"and-not narrows then ranks"
"searchRankTfIdf \"apple AND NOT banana\" idx"
(list 2))
(list
"phrase filter ranked"
"searchRankTfIdf \"\\\"apple banana\\\"\" idx"
(list 1 4))
(list "no matches" "searchRankTfIdf \"zzz\" idx" (list))
(list
"bm25 boolean ranked subset"
"sort (searchRankBm25 1.5 0.75 \"apple OR banana\" idx)"
(list 1 2 3 4))
(list
"bm25 and filter"
"searchRankBm25 1.5 0.75 \"apple AND NOT banana\" idx"
(list 2))))
(define
rankq-results
(search-batch rankq-setup (map (fn (c) (nth c 1)) rankq-cases)))
(map-indexed
(fn
(i c)
(hk-test (nth c 0) (nth rankq-results i) (nth c 2)))
rankq-cases)
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}

View File

@@ -120,9 +120,15 @@ lib/search/index.sx lib/search/eval.sx
— 18 tests
- [x] proximity / NEAR — `nearDocs k t1 t2` (unordered, within k positions) — 9 tests
- [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests
- [x] boolean-filtered ranked search — `queryTerms`, `searchRankTfIdf`,
`searchRankBm25` (filter by boolean query, rank survivors by relevance) — 11 tests
## Progress log
- **Extension: boolean-filtered ranked search (225/225 total).** `searchRankTfIdf`/
`searchRankBm25` parse a boolean query, filter docs via evalQuery, then rank the
survivors by relevance over the query's leaf terms (`queryTerms`) — the real-world
filter-then-rank pattern. 11 tests.
- **Extension: synonyms/query expansion (214/214 total).** A synonym map
`[(Term,[Term])]` expands a query term to itself + synonyms (`expandTerm`); `synDocs`
unions, `synRankTfIdf` ranks the expanded set. 9 tests.