search: prefix/wildcard queries + 14 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 31s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 31s
prefixTerms matches indexed terms by prefix (allTerms + isPrefixOf); prefixDocs unions their docs; prefixRankTfIdf ranks via the matched terms. 136/136. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -4,7 +4,7 @@
|
||||
;; interpreter. Public Haskell entry points: indexDoc, lookupTerm, deleteDoc,
|
||||
;; docFreq, allTerms, tokens, positioned, evalQuery, parseQuery, searchQuery,
|
||||
;; rankTfIdf, rankBm25, topNTfIdf, topNBm25, fedIndex, aclFilter, searchTfIdfAcl,
|
||||
;; topNTfIdfAcl, searchBm25Acl.
|
||||
;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf.
|
||||
|
||||
(define
|
||||
search/src
|
||||
@@ -19,4 +19,6 @@
|
||||
"\n"
|
||||
search/rank-src
|
||||
"\n"
|
||||
search/fed-src))
|
||||
search/fed-src
|
||||
"\n"
|
||||
search/prefix-src))
|
||||
|
||||
@@ -24,6 +24,7 @@ PRELOADS=(
|
||||
lib/search/parse.sx
|
||||
lib/search/rank.sx
|
||||
lib/search/fed.sx
|
||||
lib/search/prefix.sx
|
||||
lib/search/api.sx
|
||||
lib/search/testlib.sx
|
||||
)
|
||||
@@ -34,4 +35,5 @@ SUITES=(
|
||||
"parse:lib/search/tests/parse.sx"
|
||||
"rank:lib/search/tests/rank.sx"
|
||||
"integration:lib/search/tests/integration.sx"
|
||||
"prefix:lib/search/tests/prefix.sx"
|
||||
)
|
||||
|
||||
10
lib/search/prefix.sx
Normal file
10
lib/search/prefix.sx
Normal file
@@ -0,0 +1,10 @@
|
||||
;; search prefix / wildcard queries — Haskell source fragment. Depends on index +
|
||||
;; rank (reuses candStep / rankTfIdf). A prefix matches every indexed term that
|
||||
;; starts with it; the matching terms are unioned (OR) into a docid set.
|
||||
;; prefixTerms :: String -> Index -> [Term] (sorted, from allTerms)
|
||||
;; prefixDocs :: String -> Index -> [DocId] (sorted union)
|
||||
;; prefixRankTfIdf :: String -> Index -> [DocId] (ranked by the matched terms)
|
||||
|
||||
(define
|
||||
search/prefix-src
|
||||
"prefixTerms pre idx = filter (isPrefixOf pre) (allTerms idx)\nprefixDocs pre idx = foldl (candStep idx) [] (prefixTerms pre idx)\nprefixRankTfIdf pre idx = rankTfIdf (prefixTerms pre idx) idx\n")
|
||||
@@ -1,14 +1,15 @@
|
||||
{
|
||||
"lang": "search",
|
||||
"total_passed": 122,
|
||||
"total_passed": 136,
|
||||
"total_failed": 0,
|
||||
"total": 122,
|
||||
"total": 136,
|
||||
"suites": [
|
||||
{"name":"index","passed":18,"failed":0,"total":18},
|
||||
{"name":"boolean","passed":28,"failed":0,"total":28},
|
||||
{"name":"parse","passed":32,"failed":0,"total":32},
|
||||
{"name":"rank","passed":23,"failed":0,"total":23},
|
||||
{"name":"integration","passed":21,"failed":0,"total":21}
|
||||
{"name":"integration","passed":21,"failed":0,"total":21},
|
||||
{"name":"prefix","passed":14,"failed":0,"total":14}
|
||||
],
|
||||
"generated": "2026-06-06T20:07:30+00:00"
|
||||
"generated": "2026-06-06T20:21:41+00:00"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# search scoreboard
|
||||
|
||||
**122 / 122 passing** (0 failure(s)).
|
||||
**136 / 136 passing** (0 failure(s)).
|
||||
|
||||
| Suite | Passed | Total | Status |
|
||||
|-------|--------|-------|--------|
|
||||
@@ -9,3 +9,4 @@
|
||||
| parse | 32 | 32 | ok |
|
||||
| rank | 23 | 23 | ok |
|
||||
| integration | 21 | 21 | ok |
|
||||
| prefix | 14 | 14 | ok |
|
||||
|
||||
63
lib/search/tests/prefix.sx
Normal file
63
lib/search/tests/prefix.sx
Normal file
@@ -0,0 +1,63 @@
|
||||
;; Extension — prefix / wildcard queries.
|
||||
;; Corpus: 1 "alpha alpine" 2 "beta apple" 3 "banana alpha"
|
||||
;; allTerms sorted: alpha alpine apple banana beta
|
||||
|
||||
(define
|
||||
prefix-setup
|
||||
"idx = indexDoc 3 \"banana alpha\" (indexDoc 2 \"beta apple\" (indexDoc 1 \"alpha alpine\" emptyIndex))\n")
|
||||
|
||||
(define
|
||||
prefix-cases
|
||||
(list
|
||||
(list
|
||||
"prefix terms two matches"
|
||||
"prefixTerms \"al\" idx"
|
||||
(list "alpha" "alpine"))
|
||||
(list
|
||||
"prefix terms narrower"
|
||||
"prefixTerms \"alp\" idx"
|
||||
(list "alpha" "alpine"))
|
||||
(list
|
||||
"prefix terms wide"
|
||||
"prefixTerms \"a\" idx"
|
||||
(list "alpha" "alpine" "apple"))
|
||||
(list "prefix terms single" "prefixTerms \"ban\" idx" (list "banana"))
|
||||
(list "prefix terms exact term" "prefixTerms \"beta\" idx" (list "beta"))
|
||||
(list "prefix terms none" "prefixTerms \"z\" idx" (list))
|
||||
(list
|
||||
"prefix docs union"
|
||||
"prefixDocs \"al\" idx"
|
||||
(list 1 3))
|
||||
(list "prefix docs single term" "prefixDocs \"ban\" idx" (list 3))
|
||||
(list
|
||||
"prefix docs wide"
|
||||
"prefixDocs \"a\" idx"
|
||||
(list 1 2 3))
|
||||
(list "prefix docs none" "prefixDocs \"z\" idx" (list))
|
||||
(list
|
||||
"prefix docs exact"
|
||||
"prefixDocs \"alpha\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"prefix rank ranks by matched terms"
|
||||
"prefixRankTfIdf \"al\" idx"
|
||||
(list 1 3))
|
||||
(list
|
||||
"prefix rank single doc"
|
||||
"prefixRankTfIdf \"ban\" idx"
|
||||
(list 3))
|
||||
(list "prefix rank empty" "prefixRankTfIdf \"z\" idx" (list))))
|
||||
|
||||
(define
|
||||
prefix-results
|
||||
(search-batch
|
||||
prefix-setup
|
||||
(map (fn (c) (nth c 1)) prefix-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth prefix-results i) (nth c 2)))
|
||||
prefix-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
@@ -108,8 +108,19 @@ lib/search/index.sx lib/search/eval.sx
|
||||
- [x] `lib/search/tests/integration.sx` — 21 cases: index merge, cross-peer df/lookup,
|
||||
position preservation, boolean/phrase over the merge, ACL filter + top-N + bm25
|
||||
|
||||
## Extensions (post-roadmap, search-shaped vocabulary)
|
||||
|
||||
- [x] prefix / wildcard queries (`prefixTerms`, `prefixDocs`, `prefixRankTfIdf`) — 14 tests
|
||||
- [ ] fuzzy matching — edit distance term expansion
|
||||
- [ ] result pagination (offset / limit)
|
||||
- [ ] snippet / highlight generation
|
||||
- [ ] stemming (suffix stripping) — recall-improving normalizer
|
||||
|
||||
## Progress log
|
||||
|
||||
- **Extension: prefix/wildcard queries (136/136 total).** `prefixTerms` matches every
|
||||
indexed term starting with a prefix (via allTerms + isPrefixOf); `prefixDocs` unions
|
||||
their docs; `prefixRankTfIdf` ranks treating the matched terms as the query. 14 tests.
|
||||
- **Phase 4 complete — federation + ACL (122/122 total). Roadmap done.** `fedIndex`
|
||||
merges per-peer inverted indices (union posting lists per term) after relabelling
|
||||
local DocIds to global `gid = peer*1000 + local` — the bijection makes (peer,doc-id)
|
||||
|
||||
Reference in New Issue
Block a user