search: did-you-mean spelling suggestion + 9 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 30s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 30s
suggest/suggestN rank indexed terms by edit distance to a (misspelled) query term, alphabetical tiebreak. 234/234. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
|
||||
;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
|
||||
;; nearDocs, expandTerm, synDocs, synRankTfIdf, queryTerms, searchRankTfIdf,
|
||||
;; searchRankBm25.
|
||||
;; searchRankBm25, suggestN, suggest.
|
||||
|
||||
(define
|
||||
search/src
|
||||
@@ -39,4 +39,6 @@
|
||||
"\n"
|
||||
search/syn-src
|
||||
"\n"
|
||||
search/rankq-src))
|
||||
search/rankq-src
|
||||
"\n"
|
||||
search/suggest-src))
|
||||
|
||||
@@ -32,6 +32,7 @@ PRELOADS=(
|
||||
lib/search/near.sx
|
||||
lib/search/syn.sx
|
||||
lib/search/rankq.sx
|
||||
lib/search/suggest.sx
|
||||
lib/search/api.sx
|
||||
lib/search/testlib.sx
|
||||
)
|
||||
@@ -50,4 +51,5 @@ SUITES=(
|
||||
"near:lib/search/tests/near.sx"
|
||||
"syn:lib/search/tests/syn.sx"
|
||||
"rankq:lib/search/tests/rankq.sx"
|
||||
"suggest:lib/search/tests/suggest.sx"
|
||||
)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
{
|
||||
"lang": "search",
|
||||
"total_passed": 225,
|
||||
"total_passed": 234,
|
||||
"total_failed": 0,
|
||||
"total": 225,
|
||||
"total": 234,
|
||||
"suites": [
|
||||
{"name":"index","passed":18,"failed":0,"total":18},
|
||||
{"name":"boolean","passed":28,"failed":0,"total":28},
|
||||
@@ -16,7 +16,8 @@
|
||||
{"name":"stem","passed":18,"failed":0,"total":18},
|
||||
{"name":"near","passed":9,"failed":0,"total":9},
|
||||
{"name":"syn","passed":9,"failed":0,"total":9},
|
||||
{"name":"rankq","passed":11,"failed":0,"total":11}
|
||||
{"name":"rankq","passed":11,"failed":0,"total":11},
|
||||
{"name":"suggest","passed":9,"failed":0,"total":9}
|
||||
],
|
||||
"generated": "2026-06-06T23:58:05+00:00"
|
||||
"generated": "2026-06-07T00:44:05+00:00"
|
||||
}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# search scoreboard
|
||||
|
||||
**225 / 225 passing** (0 failure(s)).
|
||||
**234 / 234 passing** (0 failure(s)).
|
||||
|
||||
| Suite | Passed | Total | Status |
|
||||
|-------|--------|-------|--------|
|
||||
@@ -17,3 +17,4 @@
|
||||
| near | 9 | 9 | ok |
|
||||
| syn | 9 | 9 | ok |
|
||||
| rankq | 11 | 11 | ok |
|
||||
| suggest | 9 | 9 | ok |
|
||||
|
||||
9
lib/search/suggest.sx
Normal file
9
lib/search/suggest.sx
Normal file
@@ -0,0 +1,9 @@
|
||||
;; search did-you-mean / spelling suggestion — Haskell source fragment.
|
||||
;; Depends on fuzzy (editDist) + index (allTerms). Ranks indexed terms by edit
|
||||
;; distance to a (possibly misspelled) query term; ties broken alphabetically.
|
||||
;; suggestN :: Int -> String -> Index -> [Term]
|
||||
;; suggest :: String -> Index -> Term ("" if the index has no terms)
|
||||
|
||||
(define
|
||||
search/suggest-src
|
||||
"sgMk term t = (editDist term t, t)\nsgPairs term idx = map (sgMk term) (allTerms idx)\nsgCmp p1 p2 = if fst p1 < fst p2 then LT else if fst p1 > fst p2 then GT else compare (snd p1) (snd p2)\nsuggestN n term idx = take n (map snd (sortBy sgCmp (sgPairs term idx)))\nsgHead [] = \"\"\nsgHead (x:xs) = x\nsuggest term idx = sgHead (suggestN 1 term idx)\n")
|
||||
42
lib/search/tests/suggest.sx
Normal file
42
lib/search/tests/suggest.sx
Normal file
@@ -0,0 +1,42 @@
|
||||
;; Extension — did-you-mean / spelling suggestion.
|
||||
;; Corpus terms (sorted): ample apple apply banana orange
|
||||
|
||||
(define
|
||||
suggest-setup
|
||||
"idx = indexDoc 1 \"apple apply ample banana orange\" emptyIndex\n")
|
||||
|
||||
(define
|
||||
suggest-cases
|
||||
(list
|
||||
(list "suggest exact term" "[suggest \"apple\" idx]" (list "apple"))
|
||||
(list
|
||||
"suggest misspelled banana"
|
||||
"[suggest \"bananna\" idx]"
|
||||
(list "banana"))
|
||||
(list
|
||||
"suggest missing letter orange"
|
||||
"[suggest \"orang\" idx]"
|
||||
(list "orange"))
|
||||
(list "suggest closest apply" "[suggest \"aply\" idx]" (list "apply"))
|
||||
(list "suggestN 1 banana" "suggestN 1 \"bananna\" idx" (list "banana"))
|
||||
(list
|
||||
"suggestN 2 ties alpha"
|
||||
"suggestN 2 \"aple\" idx"
|
||||
(list "ample" "apple"))
|
||||
(list "suggest empty term shortest" "[suggest \"\" idx]" (list "ample"))
|
||||
(list "suggest empty index" "[suggest \"apple\" emptyIndex]" (list ""))
|
||||
(list "suggestN empty index" "suggestN 1 \"apple\" emptyIndex" (list))))
|
||||
|
||||
(define
|
||||
suggest-results
|
||||
(search-batch
|
||||
suggest-setup
|
||||
(map (fn (c) (nth c 1)) suggest-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth suggest-results i) (nth c 2)))
|
||||
suggest-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
@@ -122,9 +122,14 @@ lib/search/index.sx lib/search/eval.sx
|
||||
- [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests
|
||||
- [x] boolean-filtered ranked search — `queryTerms`, `searchRankTfIdf`,
|
||||
`searchRankBm25` (filter by boolean query, rank survivors by relevance) — 11 tests
|
||||
- [x] did-you-mean / spelling suggestion — `suggest`, `suggestN` (closest indexed
|
||||
terms by edit distance, alphabetical tiebreak) — 9 tests
|
||||
|
||||
## Progress log
|
||||
|
||||
- **Extension: did-you-mean / spelling suggestion (234/234 total).** `suggest`/`suggestN`
|
||||
rank indexed terms by edit distance to a (misspelled) query term, alphabetical
|
||||
tiebreak. 9 tests.
|
||||
- **Extension: boolean-filtered ranked search (225/225 total).** `searchRankTfIdf`/
|
||||
`searchRankBm25` parse a boolean query, filter docs via evalQuery, then rank the
|
||||
survivors by relevance over the query's leaf terms (`queryTerms`) — the real-world
|
||||
|
||||
Reference in New Issue
Block a user