search: did-you-mean spelling suggestion + 9 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 30s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 30s
suggest/suggestN rank indexed terms by edit distance to a (misspelled) query term, alphabetical tiebreak. 234/234. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,7 @@
|
|||||||
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
|
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
|
||||||
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
|
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
|
||||||
;; nearDocs, expandTerm, synDocs, synRankTfIdf, queryTerms, searchRankTfIdf,
|
;; nearDocs, expandTerm, synDocs, synRankTfIdf, queryTerms, searchRankTfIdf,
|
||||||
;; searchRankBm25.
|
;; searchRankBm25, suggestN, suggest.
|
||||||
|
|
||||||
(define
|
(define
|
||||||
search/src
|
search/src
|
||||||
@@ -39,4 +39,6 @@
|
|||||||
"\n"
|
"\n"
|
||||||
search/syn-src
|
search/syn-src
|
||||||
"\n"
|
"\n"
|
||||||
search/rankq-src))
|
search/rankq-src
|
||||||
|
"\n"
|
||||||
|
search/suggest-src))
|
||||||
|
|||||||
@@ -32,6 +32,7 @@ PRELOADS=(
|
|||||||
lib/search/near.sx
|
lib/search/near.sx
|
||||||
lib/search/syn.sx
|
lib/search/syn.sx
|
||||||
lib/search/rankq.sx
|
lib/search/rankq.sx
|
||||||
|
lib/search/suggest.sx
|
||||||
lib/search/api.sx
|
lib/search/api.sx
|
||||||
lib/search/testlib.sx
|
lib/search/testlib.sx
|
||||||
)
|
)
|
||||||
@@ -50,4 +51,5 @@ SUITES=(
|
|||||||
"near:lib/search/tests/near.sx"
|
"near:lib/search/tests/near.sx"
|
||||||
"syn:lib/search/tests/syn.sx"
|
"syn:lib/search/tests/syn.sx"
|
||||||
"rankq:lib/search/tests/rankq.sx"
|
"rankq:lib/search/tests/rankq.sx"
|
||||||
|
"suggest:lib/search/tests/suggest.sx"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
{
|
{
|
||||||
"lang": "search",
|
"lang": "search",
|
||||||
"total_passed": 225,
|
"total_passed": 234,
|
||||||
"total_failed": 0,
|
"total_failed": 0,
|
||||||
"total": 225,
|
"total": 234,
|
||||||
"suites": [
|
"suites": [
|
||||||
{"name":"index","passed":18,"failed":0,"total":18},
|
{"name":"index","passed":18,"failed":0,"total":18},
|
||||||
{"name":"boolean","passed":28,"failed":0,"total":28},
|
{"name":"boolean","passed":28,"failed":0,"total":28},
|
||||||
@@ -16,7 +16,8 @@
|
|||||||
{"name":"stem","passed":18,"failed":0,"total":18},
|
{"name":"stem","passed":18,"failed":0,"total":18},
|
||||||
{"name":"near","passed":9,"failed":0,"total":9},
|
{"name":"near","passed":9,"failed":0,"total":9},
|
||||||
{"name":"syn","passed":9,"failed":0,"total":9},
|
{"name":"syn","passed":9,"failed":0,"total":9},
|
||||||
{"name":"rankq","passed":11,"failed":0,"total":11}
|
{"name":"rankq","passed":11,"failed":0,"total":11},
|
||||||
|
{"name":"suggest","passed":9,"failed":0,"total":9}
|
||||||
],
|
],
|
||||||
"generated": "2026-06-06T23:58:05+00:00"
|
"generated": "2026-06-07T00:44:05+00:00"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# search scoreboard
|
# search scoreboard
|
||||||
|
|
||||||
**225 / 225 passing** (0 failure(s)).
|
**234 / 234 passing** (0 failure(s)).
|
||||||
|
|
||||||
| Suite | Passed | Total | Status |
|
| Suite | Passed | Total | Status |
|
||||||
|-------|--------|-------|--------|
|
|-------|--------|-------|--------|
|
||||||
@@ -17,3 +17,4 @@
|
|||||||
| near | 9 | 9 | ok |
|
| near | 9 | 9 | ok |
|
||||||
| syn | 9 | 9 | ok |
|
| syn | 9 | 9 | ok |
|
||||||
| rankq | 11 | 11 | ok |
|
| rankq | 11 | 11 | ok |
|
||||||
|
| suggest | 9 | 9 | ok |
|
||||||
|
|||||||
9
lib/search/suggest.sx
Normal file
9
lib/search/suggest.sx
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
;; search did-you-mean / spelling suggestion — Haskell source fragment.
|
||||||
|
;; Depends on fuzzy (editDist) + index (allTerms). Ranks indexed terms by edit
|
||||||
|
;; distance to a (possibly misspelled) query term; ties broken alphabetically.
|
||||||
|
;; suggestN :: Int -> String -> Index -> [Term]
|
||||||
|
;; suggest :: String -> Index -> Term ("" if the index has no terms)
|
||||||
|
|
||||||
|
(define
|
||||||
|
search/suggest-src
|
||||||
|
"sgMk term t = (editDist term t, t)\nsgPairs term idx = map (sgMk term) (allTerms idx)\nsgCmp p1 p2 = if fst p1 < fst p2 then LT else if fst p1 > fst p2 then GT else compare (snd p1) (snd p2)\nsuggestN n term idx = take n (map snd (sortBy sgCmp (sgPairs term idx)))\nsgHead [] = \"\"\nsgHead (x:xs) = x\nsuggest term idx = sgHead (suggestN 1 term idx)\n")
|
||||||
42
lib/search/tests/suggest.sx
Normal file
42
lib/search/tests/suggest.sx
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
;; Extension — did-you-mean / spelling suggestion.
|
||||||
|
;; Corpus terms (sorted): ample apple apply banana orange
|
||||||
|
|
||||||
|
(define
|
||||||
|
suggest-setup
|
||||||
|
"idx = indexDoc 1 \"apple apply ample banana orange\" emptyIndex\n")
|
||||||
|
|
||||||
|
(define
|
||||||
|
suggest-cases
|
||||||
|
(list
|
||||||
|
(list "suggest exact term" "[suggest \"apple\" idx]" (list "apple"))
|
||||||
|
(list
|
||||||
|
"suggest misspelled banana"
|
||||||
|
"[suggest \"bananna\" idx]"
|
||||||
|
(list "banana"))
|
||||||
|
(list
|
||||||
|
"suggest missing letter orange"
|
||||||
|
"[suggest \"orang\" idx]"
|
||||||
|
(list "orange"))
|
||||||
|
(list "suggest closest apply" "[suggest \"aply\" idx]" (list "apply"))
|
||||||
|
(list "suggestN 1 banana" "suggestN 1 \"bananna\" idx" (list "banana"))
|
||||||
|
(list
|
||||||
|
"suggestN 2 ties alpha"
|
||||||
|
"suggestN 2 \"aple\" idx"
|
||||||
|
(list "ample" "apple"))
|
||||||
|
(list "suggest empty term shortest" "[suggest \"\" idx]" (list "ample"))
|
||||||
|
(list "suggest empty index" "[suggest \"apple\" emptyIndex]" (list ""))
|
||||||
|
(list "suggestN empty index" "suggestN 1 \"apple\" emptyIndex" (list))))
|
||||||
|
|
||||||
|
(define
|
||||||
|
suggest-results
|
||||||
|
(search-batch
|
||||||
|
suggest-setup
|
||||||
|
(map (fn (c) (nth c 1)) suggest-cases)))
|
||||||
|
|
||||||
|
(map-indexed
|
||||||
|
(fn
|
||||||
|
(i c)
|
||||||
|
(hk-test (nth c 0) (nth suggest-results i) (nth c 2)))
|
||||||
|
suggest-cases)
|
||||||
|
|
||||||
|
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||||
@@ -122,9 +122,14 @@ lib/search/index.sx lib/search/eval.sx
|
|||||||
- [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests
|
- [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests
|
||||||
- [x] boolean-filtered ranked search — `queryTerms`, `searchRankTfIdf`,
|
- [x] boolean-filtered ranked search — `queryTerms`, `searchRankTfIdf`,
|
||||||
`searchRankBm25` (filter by boolean query, rank survivors by relevance) — 11 tests
|
`searchRankBm25` (filter by boolean query, rank survivors by relevance) — 11 tests
|
||||||
|
- [x] did-you-mean / spelling suggestion — `suggest`, `suggestN` (closest indexed
|
||||||
|
terms by edit distance, alphabetical tiebreak) — 9 tests
|
||||||
|
|
||||||
## Progress log
|
## Progress log
|
||||||
|
|
||||||
|
- **Extension: did-you-mean / spelling suggestion (234/234 total).** `suggest`/`suggestN`
|
||||||
|
rank indexed terms by edit distance to a (misspelled) query term, alphabetical
|
||||||
|
tiebreak. 9 tests.
|
||||||
- **Extension: boolean-filtered ranked search (225/225 total).** `searchRankTfIdf`/
|
- **Extension: boolean-filtered ranked search (225/225 total).** `searchRankTfIdf`/
|
||||||
`searchRankBm25` parse a boolean query, filter docs via evalQuery, then rank the
|
`searchRankBm25` parse a boolean query, filter docs via evalQuery, then rank the
|
||||||
survivors by relevance over the query's leaf terms (`queryTerms`) — the real-world
|
survivors by relevance over the query's leaf terms (`queryTerms`) — the real-world
|
||||||
|
|||||||
Reference in New Issue
Block a user