diff --git a/lib/search/api.sx b/lib/search/api.sx index 29f445af..dd66031b 100644 --- a/lib/search/api.sx +++ b/lib/search/api.sx @@ -8,7 +8,7 @@ ;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs, ;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed, ;; nearDocs, expandTerm, synDocs, synRankTfIdf, queryTerms, searchRankTfIdf, -;; searchRankBm25. +;; searchRankBm25, suggestN, suggest. (define search/src @@ -39,4 +39,6 @@ "\n" search/syn-src "\n" - search/rankq-src)) + search/rankq-src + "\n" + search/suggest-src)) diff --git a/lib/search/conformance.conf b/lib/search/conformance.conf index 9c7b006e..ec0fa631 100644 --- a/lib/search/conformance.conf +++ b/lib/search/conformance.conf @@ -32,6 +32,7 @@ PRELOADS=( lib/search/near.sx lib/search/syn.sx lib/search/rankq.sx + lib/search/suggest.sx lib/search/api.sx lib/search/testlib.sx ) @@ -50,4 +51,5 @@ SUITES=( "near:lib/search/tests/near.sx" "syn:lib/search/tests/syn.sx" "rankq:lib/search/tests/rankq.sx" + "suggest:lib/search/tests/suggest.sx" ) diff --git a/lib/search/scoreboard.json b/lib/search/scoreboard.json index 3ea5b5ee..d548e4b3 100644 --- a/lib/search/scoreboard.json +++ b/lib/search/scoreboard.json @@ -1,8 +1,8 @@ { "lang": "search", - "total_passed": 225, + "total_passed": 234, "total_failed": 0, - "total": 225, + "total": 234, "suites": [ {"name":"index","passed":18,"failed":0,"total":18}, {"name":"boolean","passed":28,"failed":0,"total":28}, @@ -16,7 +16,8 @@ {"name":"stem","passed":18,"failed":0,"total":18}, {"name":"near","passed":9,"failed":0,"total":9}, {"name":"syn","passed":9,"failed":0,"total":9}, - {"name":"rankq","passed":11,"failed":0,"total":11} + {"name":"rankq","passed":11,"failed":0,"total":11}, + {"name":"suggest","passed":9,"failed":0,"total":9} ], - "generated": "2026-06-06T23:58:05+00:00" + "generated": "2026-06-07T00:44:05+00:00" } diff --git a/lib/search/scoreboard.md b/lib/search/scoreboard.md index 2cc7fd9c..4a59608e 100644 --- a/lib/search/scoreboard.md +++ b/lib/search/scoreboard.md @@ -1,6 +1,6 @@ # search scoreboard -**225 / 225 passing** (0 failure(s)). +**234 / 234 passing** (0 failure(s)). | Suite | Passed | Total | Status | |-------|--------|-------|--------| @@ -17,3 +17,4 @@ | near | 9 | 9 | ok | | syn | 9 | 9 | ok | | rankq | 11 | 11 | ok | +| suggest | 9 | 9 | ok | diff --git a/lib/search/suggest.sx b/lib/search/suggest.sx new file mode 100644 index 00000000..7b06b1fb --- /dev/null +++ b/lib/search/suggest.sx @@ -0,0 +1,9 @@ +;; search did-you-mean / spelling suggestion — Haskell source fragment. +;; Depends on fuzzy (editDist) + index (allTerms). Ranks indexed terms by edit +;; distance to a (possibly misspelled) query term; ties broken alphabetically. +;; suggestN :: Int -> String -> Index -> [Term] +;; suggest :: String -> Index -> Term ("" if the index has no terms) + +(define + search/suggest-src + "sgMk term t = (editDist term t, t)\nsgPairs term idx = map (sgMk term) (allTerms idx)\nsgCmp p1 p2 = if fst p1 < fst p2 then LT else if fst p1 > fst p2 then GT else compare (snd p1) (snd p2)\nsuggestN n term idx = take n (map snd (sortBy sgCmp (sgPairs term idx)))\nsgHead [] = \"\"\nsgHead (x:xs) = x\nsuggest term idx = sgHead (suggestN 1 term idx)\n") diff --git a/lib/search/tests/suggest.sx b/lib/search/tests/suggest.sx new file mode 100644 index 00000000..164b43ec --- /dev/null +++ b/lib/search/tests/suggest.sx @@ -0,0 +1,42 @@ +;; Extension — did-you-mean / spelling suggestion. +;; Corpus terms (sorted): ample apple apply banana orange + +(define + suggest-setup + "idx = indexDoc 1 \"apple apply ample banana orange\" emptyIndex\n") + +(define + suggest-cases + (list + (list "suggest exact term" "[suggest \"apple\" idx]" (list "apple")) + (list + "suggest misspelled banana" + "[suggest \"bananna\" idx]" + (list "banana")) + (list + "suggest missing letter orange" + "[suggest \"orang\" idx]" + (list "orange")) + (list "suggest closest apply" "[suggest \"aply\" idx]" (list "apply")) + (list "suggestN 1 banana" "suggestN 1 \"bananna\" idx" (list "banana")) + (list + "suggestN 2 ties alpha" + "suggestN 2 \"aple\" idx" + (list "ample" "apple")) + (list "suggest empty term shortest" "[suggest \"\" idx]" (list "ample")) + (list "suggest empty index" "[suggest \"apple\" emptyIndex]" (list "")) + (list "suggestN empty index" "suggestN 1 \"apple\" emptyIndex" (list)))) + +(define + suggest-results + (search-batch + suggest-setup + (map (fn (c) (nth c 1)) suggest-cases))) + +(map-indexed + (fn + (i c) + (hk-test (nth c 0) (nth suggest-results i) (nth c 2))) + suggest-cases) + +{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails} diff --git a/plans/search-on-sx.md b/plans/search-on-sx.md index cf8c530a..4cd93e8f 100644 --- a/plans/search-on-sx.md +++ b/plans/search-on-sx.md @@ -122,9 +122,14 @@ lib/search/index.sx lib/search/eval.sx - [x] synonym / query expansion — `expandTerm`, `synDocs`, `synRankTfIdf` — 9 tests - [x] boolean-filtered ranked search — `queryTerms`, `searchRankTfIdf`, `searchRankBm25` (filter by boolean query, rank survivors by relevance) — 11 tests +- [x] did-you-mean / spelling suggestion — `suggest`, `suggestN` (closest indexed + terms by edit distance, alphabetical tiebreak) — 9 tests ## Progress log +- **Extension: did-you-mean / spelling suggestion (234/234 total).** `suggest`/`suggestN` + rank indexed terms by edit distance to a (misspelled) query term, alphabetical + tiebreak. 9 tests. - **Extension: boolean-filtered ranked search (225/225 total).** `searchRankTfIdf`/ `searchRankBm25` parse a boolean query, filter docs via evalQuery, then rank the survivors by relevance over the query's leaf terms (`queryTerms`) — the real-world