search: boolean-filtered ranked search + 11 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 40s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 40s
searchRankTfIdf/searchRankBm25 parse a boolean query, filter docs via evalQuery, then rank survivors by relevance over the query's leaf terms (queryTerms) — the filter-then-rank pattern. 225/225. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -7,7 +7,8 @@
|
||||
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf,
|
||||
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
|
||||
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
|
||||
;; nearDocs, expandTerm, synDocs, synRankTfIdf.
|
||||
;; nearDocs, expandTerm, synDocs, synRankTfIdf, queryTerms, searchRankTfIdf,
|
||||
;; searchRankBm25.
|
||||
|
||||
(define
|
||||
search/src
|
||||
@@ -36,4 +37,6 @@
|
||||
"\n"
|
||||
search/near-src
|
||||
"\n"
|
||||
search/syn-src))
|
||||
search/syn-src
|
||||
"\n"
|
||||
search/rankq-src))
|
||||
|
||||
@@ -31,6 +31,7 @@ PRELOADS=(
|
||||
lib/search/stem.sx
|
||||
lib/search/near.sx
|
||||
lib/search/syn.sx
|
||||
lib/search/rankq.sx
|
||||
lib/search/api.sx
|
||||
lib/search/testlib.sx
|
||||
)
|
||||
@@ -48,4 +49,5 @@ SUITES=(
|
||||
"stem:lib/search/tests/stem.sx"
|
||||
"near:lib/search/tests/near.sx"
|
||||
"syn:lib/search/tests/syn.sx"
|
||||
"rankq:lib/search/tests/rankq.sx"
|
||||
)
|
||||
|
||||
11
lib/search/rankq.sx
Normal file
11
lib/search/rankq.sx
Normal file
@@ -0,0 +1,11 @@
|
||||
;; search boolean-filtered ranked search — Haskell source fragment.
|
||||
;; Depends on parse (parseQuery/Query), query (evalQuery), rank (tfidfDoc/bm25Doc/
|
||||
;; cmpScore). Filters by the boolean query, then ranks the surviving docs by
|
||||
;; relevance over the query's leaf terms — the real-world filter-then-rank pattern.
|
||||
;; queryTerms :: Query -> [Term]
|
||||
;; searchRankTfIdf :: String -> Index -> [DocId]
|
||||
;; searchRankBm25 :: Float -> Float -> String -> Index -> [DocId]
|
||||
|
||||
(define
|
||||
search/rankq-src
|
||||
"queryTerms (Term t) = [t]\nqueryTerms (And a b) = queryTerms a ++ queryTerms b\nqueryTerms (Or a b) = queryTerms a ++ queryTerms b\nqueryTerms (Not a) = queryTerms a\nqueryTerms (Phrase ts) = ts\nmkSubPair f terms idx d = (f terms idx d, d)\nrankSubsetWith f terms docs idx = map snd (sortBy cmpScore (map (mkSubPair f terms idx) docs))\nsearchRankTfIdf s idx = let q = parseQuery s in rankSubsetWith tfidfDoc (queryTerms q) (evalQuery idx q) idx\nsearchRankBm25 k1 b s idx = let q = parseQuery s in rankSubsetWith (bm25Doc k1 b) (queryTerms q) (evalQuery idx q) idx\n")
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"lang": "search",
|
||||
"total_passed": 214,
|
||||
"total_passed": 225,
|
||||
"total_failed": 0,
|
||||
"total": 214,
|
||||
"total": 225,
|
||||
"suites": [
|
||||
{"name":"index","passed":18,"failed":0,"total":18},
|
||||
{"name":"boolean","passed":28,"failed":0,"total":28},
|
||||
@@ -15,7 +15,8 @@
|
||||
{"name":"highlight","passed":12,"failed":0,"total":12},
|
||||
{"name":"stem","passed":18,"failed":0,"total":18},
|
||||
{"name":"near","passed":9,"failed":0,"total":9},
|
||||
{"name":"syn","passed":9,"failed":0,"total":9}
|
||||
{"name":"syn","passed":9,"failed":0,"total":9},
|
||||
{"name":"rankq","passed":11,"failed":0,"total":11}
|
||||
],
|
||||
"generated": "2026-06-06T23:25:35+00:00"
|
||||
"generated": "2026-06-06T23:58:05+00:00"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# search scoreboard
|
||||
|
||||
**214 / 214 passing** (0 failure(s)).
|
||||
**225 / 225 passing** (0 failure(s)).
|
||||
|
||||
| Suite | Passed | Total | Status |
|
||||
|-------|--------|-------|--------|
|
||||
@@ -16,3 +16,4 @@
|
||||
| stem | 18 | 18 | ok |
|
||||
| near | 9 | 9 | ok |
|
||||
| syn | 9 | 9 | ok |
|
||||
| rankq | 11 | 11 | ok |
|
||||
|
||||
67
lib/search/tests/rankq.sx
Normal file
67
lib/search/tests/rankq.sx
Normal file
@@ -0,0 +1,67 @@
|
||||
;; Extension — boolean-filtered ranked search (filter then rank by relevance).
|
||||
;; Corpus:
|
||||
;; 1 "apple apple banana" apple2 banana1
|
||||
;; 2 "apple cherry" apple1 cherry1
|
||||
;; 3 "banana cherry" banana1 cherry1
|
||||
;; 4 "apple banana cherry" apple1 banana1 cherry1
|
||||
|
||||
(define
|
||||
rankq-setup
|
||||
"idx = indexDoc 4 \"apple banana cherry\" (indexDoc 3 \"banana cherry\" (indexDoc 2 \"apple cherry\" (indexDoc 1 \"apple apple banana\" emptyIndex)))\n")
|
||||
|
||||
(define
|
||||
rankq-cases
|
||||
(list
|
||||
(list
|
||||
"queryTerms and"
|
||||
"queryTerms (parseQuery \"apple AND banana\")"
|
||||
(list "apple" "banana"))
|
||||
(list
|
||||
"queryTerms or not"
|
||||
"queryTerms (parseQuery \"a OR NOT b\")"
|
||||
(list "a" "b"))
|
||||
(list
|
||||
"queryTerms phrase"
|
||||
"queryTerms (parseQuery \"\\\"x y\\\" OR z\")"
|
||||
(list "x" "y" "z"))
|
||||
(list
|
||||
"and filter ranked by tf"
|
||||
"searchRankTfIdf \"apple AND banana\" idx"
|
||||
(list 1 4))
|
||||
(list
|
||||
"single term ranked tie"
|
||||
"searchRankTfIdf \"cherry\" idx"
|
||||
(list 2 3 4))
|
||||
(list
|
||||
"or filter ranked"
|
||||
"searchRankTfIdf \"apple OR banana\" idx"
|
||||
(list 1 4 2 3))
|
||||
(list
|
||||
"and-not narrows then ranks"
|
||||
"searchRankTfIdf \"apple AND NOT banana\" idx"
|
||||
(list 2))
|
||||
(list
|
||||
"phrase filter ranked"
|
||||
"searchRankTfIdf \"\\\"apple banana\\\"\" idx"
|
||||
(list 1 4))
|
||||
(list "no matches" "searchRankTfIdf \"zzz\" idx" (list))
|
||||
(list
|
||||
"bm25 boolean ranked subset"
|
||||
"sort (searchRankBm25 1.5 0.75 \"apple OR banana\" idx)"
|
||||
(list 1 2 3 4))
|
||||
(list
|
||||
"bm25 and filter"
|
||||
"searchRankBm25 1.5 0.75 \"apple AND NOT banana\" idx"
|
||||
(list 2))))
|
||||
|
||||
(define
|
||||
rankq-results
|
||||
(search-batch rankq-setup (map (fn (c) (nth c 1)) rankq-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth rankq-results i) (nth c 2)))
|
||||
rankq-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
@@ -120,9 +120,15 @@ lib/search/index.sx lib/search/eval.sx
|
||||
— 18 tests
|
||||
- [x] proximity / NEAR — `nearDocs k t1 t2` (unordered, within k positions) — 9 tests
|
||||
- [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests
|
||||
- [x] boolean-filtered ranked search — `queryTerms`, `searchRankTfIdf`,
|
||||
`searchRankBm25` (filter by boolean query, rank survivors by relevance) — 11 tests
|
||||
|
||||
## Progress log
|
||||
|
||||
- **Extension: boolean-filtered ranked search (225/225 total).** `searchRankTfIdf`/
|
||||
`searchRankBm25` parse a boolean query, filter docs via evalQuery, then rank the
|
||||
survivors by relevance over the query's leaf terms (`queryTerms`) — the real-world
|
||||
filter-then-rank pattern. 11 tests.
|
||||
- **Extension: synonyms/query expansion (214/214 total).** A synonym map
|
||||
`[(Term,[Term])]` expands a query term to itself + synonyms (`expandTerm`); `synDocs`
|
||||
unions, `synRankTfIdf` ranks the expanded set. 9 tests.
|
||||
|
||||
Reference in New Issue
Block a user