search: stemming (suffix stripping) + 18 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 16s
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 16s
Deterministic English suffix stripping (stem), stemText/stemTokens, indexStemmed. Worked around two haskell-on-sx string gotchas: take/drop over a String yield char codes (rebuild via joinChars . map chr), and isSuffixOf's reverse trips ++ (manual suffix compare). 196/196. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
47
lib/search/tests/stem.sx
Normal file
47
lib/search/tests/stem.sx
Normal file
@@ -0,0 +1,47 @@
|
||||
;; Extension — stemming (suffix stripping). Scalar string results wrapped in [].
|
||||
|
||||
(define
|
||||
stem-cases
|
||||
(list
|
||||
(list "stem plural s" "[stem \"cats\"]" (list "cat"))
|
||||
(list "stem plural dogs" "[stem \"dogs\"]" (list "dog"))
|
||||
(list "stem keeps ss" "[stem \"pass\"]" (list "pass"))
|
||||
(list "stem short s unchanged" "[stem \"is\"]" (list "is"))
|
||||
(list "stem es boxes" "[stem \"boxes\"]" (list "box"))
|
||||
(list "stem es wishes" "[stem \"wishes\"]" (list "wish"))
|
||||
(list "stem ies cities" "[stem \"cities\"]" (list "city"))
|
||||
(list "stem ies parties" "[stem \"parties\"]" (list "party"))
|
||||
(list "stem ing jumping" "[stem \"jumping\"]" (list "jump"))
|
||||
(list "stem ing running literal" "[stem \"running\"]" (list "runn"))
|
||||
(list "stem ed jumped" "[stem \"jumped\"]" (list "jump"))
|
||||
(list "stem ed wanted" "[stem \"wanted\"]" (list "want"))
|
||||
(list "stem short ed unchanged" "[stem \"red\"]" (list "red"))
|
||||
(list "stem no suffix" "[stem \"cat\"]" (list "cat"))
|
||||
(list
|
||||
"stemText normalizes and stems"
|
||||
"[stemText \"Cats Running!\"]"
|
||||
(list "cat runn"))
|
||||
(list
|
||||
"stemTokens list"
|
||||
"stemTokens \"boxes and cats\""
|
||||
(list "box" "and" "cat"))
|
||||
(list
|
||||
"indexStemmed unifies plural"
|
||||
"map fst (lookupTerm \"cat\" (indexStemmed 2 \"a cat\" (indexStemmed 1 \"the cats\" emptyIndex)))"
|
||||
(list 1 2))
|
||||
(list
|
||||
"indexStemmed stem query"
|
||||
"map fst (lookupTerm (stem \"boxes\") (indexStemmed 1 \"many boxes\" emptyIndex))"
|
||||
(list 1))))
|
||||
|
||||
(define
|
||||
stem-results
|
||||
(search-batch "" (map (fn (c) (nth c 1)) stem-cases)))
|
||||
|
||||
(map-indexed
|
||||
(fn
|
||||
(i c)
|
||||
(hk-test (nth c 0) (nth stem-results i) (nth c 2)))
|
||||
stem-cases)
|
||||
|
||||
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
|
||||
Reference in New Issue
Block a user