Merge loops/search into architecture: search-on-sx full-text search on Haskell

Tokenizer + inverted index, query AST (boolean/phrase) + parser, TF-IDF/BM25 ranking + top-N, federation merge + ACL post-filter, and 9 extensions (prefix, pagination, fuzzy, highlight, stem, NEAR, synonyms, boolean-ranked search, did-you-mean). lib/search/conformance.sh => 234/234 across 14 suites. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-07 09:16:57 +00:00
parent c5faf93813 5d62d08e1c
commit 644ea178c2
37 changed files with 1669 additions and 28 deletions
--- a/lib/search/api.sx
+++ b/lib/search/api.sx
@@ -0,0 +1,44 @@
+;; search public API — assembles the canonical Haskell source from all layers.
+;; Tests and callers concatenate `search/src` with their own top-level bindings
+;; (e.g. "result = lookupTerm \"cat\" idx\n") and evaluate via the haskell-on-sx
+;; interpreter. Public Haskell entry points: indexDoc, lookupTerm, deleteDoc,
+;; docFreq, allTerms, tokens, positioned, evalQuery, parseQuery, searchQuery,
+;; rankTfIdf, rankBm25, topNTfIdf, topNBm25, fedIndex, aclFilter, searchTfIdfAcl,
+;; topNTfIdfAcl, searchBm25Acl, prefixTerms, prefixDocs, prefixRankTfIdf,
+;; paginate, pageTfIdf, pageBm25, resultCount, editDist, fuzzyTerms, fuzzyDocs,
+;; fuzzyRankTfIdf, highlight, snippet, stem, stemText, stemTokens, indexStemmed,
+;; nearDocs, expandTerm, synDocs, synRankTfIdf, queryTerms, searchRankTfIdf,
+;; searchRankBm25, suggestN, suggest.
+
+(define
+  search/src
+  (str
+    search/tokenize-src
+    "\n"
+    search/index-src
+    "\n"
+    search/query-src
+    "\n"
+    search/parse-src
+    "\n"
+    search/rank-src
+    "\n"
+    search/fed-src
+    "\n"
+    search/prefix-src
+    "\n"
+    search/page-src
+    "\n"
+    search/fuzzy-src
+    "\n"
+    search/highlight-src
+    "\n"
+    search/stem-src
+    "\n"
+    search/near-src
+    "\n"
+    search/syn-src
+    "\n"
+    search/rankq-src
+    "\n"
+    search/suggest-src))
--- a/lib/search/conformance.conf
+++ b/lib/search/conformance.conf
@@ -0,0 +1,55 @@
+# search-on-sx conformance config — sourced by lib/guest/conformance.sh.
+
+LANG_NAME=search
+SCOREBOARD_DIR=lib/search
+MODE=counters
+COUNTERS_PASS=hk-test-pass
+COUNTERS_FAIL=hk-test-fail
+TIMEOUT_PER_SUITE=600
+
+PRELOADS=(
+  lib/haskell/tokenizer.sx
+  lib/haskell/layout.sx
+  lib/haskell/parser.sx
+  lib/haskell/desugar.sx
+  lib/haskell/runtime.sx
+  lib/haskell/match.sx
+  lib/haskell/eval.sx
+  lib/haskell/map.sx
+  lib/haskell/set.sx
+  lib/haskell/testlib.sx
+  lib/search/tokenize.sx
+  lib/search/index.sx
+  lib/search/query.sx
+  lib/search/parse.sx
+  lib/search/rank.sx
+  lib/search/fed.sx
+  lib/search/prefix.sx
+  lib/search/page.sx
+  lib/search/fuzzy.sx
+  lib/search/highlight.sx
+  lib/search/stem.sx
+  lib/search/near.sx
+  lib/search/syn.sx
+  lib/search/rankq.sx
+  lib/search/suggest.sx
+  lib/search/api.sx
+  lib/search/testlib.sx
+)
+
+SUITES=(
+  "index:lib/search/tests/index.sx"
+  "boolean:lib/search/tests/boolean.sx"
+  "parse:lib/search/tests/parse.sx"
+  "rank:lib/search/tests/rank.sx"
+  "integration:lib/search/tests/integration.sx"
+  "prefix:lib/search/tests/prefix.sx"
+  "page:lib/search/tests/page.sx"
+  "fuzzy:lib/search/tests/fuzzy.sx"
+  "highlight:lib/search/tests/highlight.sx"
+  "stem:lib/search/tests/stem.sx"
+  "near:lib/search/tests/near.sx"
+  "syn:lib/search/tests/syn.sx"
+  "rankq:lib/search/tests/rankq.sx"
+  "suggest:lib/search/tests/suggest.sx"
+)
--- a/lib/search/conformance.sh
+++ b/lib/search/conformance.sh
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+# Thin wrapper — see lib/guest/conformance.sh and lib/search/conformance.conf.
+exec bash "$(dirname "$0")/../guest/conformance.sh" "$(dirname "$0")/conformance.conf" "$@"
--- a/lib/search/fed.sx
+++ b/lib/search/fed.sx
@@ -0,0 +1,16 @@
+;; search federation + ACL — Haskell source fragment. Depends on index + rank.
+;; Federation merges per-peer INDICES (not ranked results): each peer's local
+;; DocIds are relabelled to global ids `gid peer local = peer*1000 + local`
+;; (dedupe by (peer,doc-id) is automatic via the bijection), then posting lists
+;; are unioned per term. Ranking then runs once over the merged index, which is
+;; rank-correct. ACL is a post-rank filter: an injected `permit :: DocId -> Bool`
+;; predicate (viewer baked in by the caller) — never baked into the index.
+;;   fedIndex       :: [(PeerId, Index)] -> Index
+;;   aclFilter      :: (DocId -> Bool) -> [DocId] -> [DocId]
+;;   searchTfIdfAcl :: (DocId -> Bool) -> [Term] -> Index -> [DocId]
+;;   topNTfIdfAcl   :: Int -> (DocId -> Bool) -> [Term] -> Index -> [DocId]
+;;   searchBm25Acl  :: (DocId -> Bool) -> Float -> Float -> [Term] -> Index -> [DocId]
+
+(define
+  search/fed-src
+  "gid peer local = peer * 1000 + local\nfedRelabelPosting peer p = (gid peer (fst p), snd p)\nfedRelabelEntry peer e = (fst e, map (fedRelabelPosting peer) (snd e))\nfedRelabelIndex peer idx = map (fedRelabelEntry peer) idx\nfedInsP p [] = [p]\nfedInsP p (q:qs) = if fst p < fst q then p : q : qs else if fst p == fst q then p : qs else q : fedInsP p qs\nfedMergePL a b = foldr fedInsP b a\nfedInsTerm t pl [] = [(t, pl)]\nfedInsTerm t pl (x:xs) = if t < fst x then (t, pl) : x : xs else if t == fst x then (fst x, fedMergePL pl (snd x)) : xs else x : fedInsTerm t pl xs\nfedMergeEntry idx e = fedInsTerm (fst e) (snd e) idx\nfedMergeTwo a b = foldl fedMergeEntry a b\nfedAddPeer acc pair = fedMergeTwo acc (fedRelabelIndex (fst pair) (snd pair))\nfedIndex pairs = foldl fedAddPeer emptyIndex pairs\naclFilter permit docs = filter permit docs\nsearchTfIdfAcl permit ts idx = aclFilter permit (rankTfIdf ts idx)\ntopNTfIdfAcl n permit ts idx = take n (aclFilter permit (rankTfIdf ts idx))\nsearchBm25Acl permit k1 b ts idx = aclFilter permit (rankBm25 k1 b ts idx)\n")
--- a/lib/search/fuzzy.sx
+++ b/lib/search/fuzzy.sx
@@ -0,0 +1,12 @@
+;; search fuzzy matching — Haskell source fragment. Depends on index + rank.
+;; Levenshtein edit distance (O(m*n) row-based DP — the naive recursive version is
+;; exponential and far too slow under load) expands a query term to all indexed
+;; terms within a max distance, then unions / ranks their docs.
+;;   editDist        :: String -> String -> Int
+;;   fuzzyTerms      :: Int -> String -> Index -> [Term]   (sorted)
+;;   fuzzyDocs       :: Int -> String -> Index -> [DocId]  (sorted union)
+;;   fuzzyRankTfIdf  :: Int -> String -> Index -> [DocId]
+
+(define
+  search/fuzzy-src
+  "edMin3 a b c = min a (min b c)\nedCost x y = if x == y then 0 else 1\nedUpto i n = if i > n then [] else i : edUpto (i + 1) n\nedLast [x] = x\nedLast (x:xs) = edLast xs\nedNrow x [] prev left = []\nedNrow x (y:ys) prev left = let v = edMin3 (head (tail prev) + 1) (left + 1) (head prev + edCost x y) in v : edNrow x ys (tail prev) v\nedRow x ys prev = let f = head prev + 1 in f : edNrow x ys prev f\nedRows [] ys prev = prev\nedRows (x:xs) ys prev = edRows xs ys (edRow x ys prev)\neditDist xs ys = edLast (edRows xs ys (edUpto 0 (length ys)))\nqWithinDist maxd term t = editDist term t <= maxd\nfuzzyTerms maxd term idx = filter (qWithinDist maxd term) (allTerms idx)\nfuzzyDocs maxd term idx = foldl (candStep idx) [] (fuzzyTerms maxd term idx)\nfuzzyRankTfIdf maxd term idx = rankTfIdf (fuzzyTerms maxd term idx) idx\n")
--- a/lib/search/highlight.sx
+++ b/lib/search/highlight.sx
@@ -0,0 +1,10 @@
+;; search highlight / snippet — Haskell source fragment. Depends on tokenize.
+;; Operates on document text (not the index): marks query-matching tokens with
+;; [..] and extracts a context window around the first match. Tokens are
+;; normalized (lowercase, punctuation-stripped) by `tokens`, matching index side.
+;;   highlight :: [Term] -> String -> String
+;;   snippet   :: Int -> [Term] -> String -> String   (ctx tokens each side of 1st match)
+
+(define
+  search/highlight-src
+  "hlMark terms t = if elem t terms then \"[\" ++ t ++ \"]\" else t\nhighlight terms text = unwords (map (hlMark terms) (tokens text))\nhlIdxFrom terms [] i = 0 - 1\nhlIdxFrom terms (t:ts) i = if elem t terms then i else hlIdxFrom terms ts (i + 1)\nhlIdx terms toks = hlIdxFrom terms toks 0\nhlMax0 x = if x < 0 then 0 else x\nsnipStart ctx i = if i < 0 then 0 else hlMax0 (i - ctx)\nsnipToks ctx terms toks = unwords (map (hlMark terms) (take (2 * ctx + 1) (drop (snipStart ctx (hlIdx terms toks)) toks)))\nsnippet ctx terms text = snipToks ctx terms (tokens text)\n")
--- a/lib/search/index.sx
+++ b/lib/search/index.sx
@@ -0,0 +1,15 @@
+;; search inverted index — Haskell source fragment (depends on tokenize).
+;; Index = [(Term, [(DocId, [Pos])])], sorted by Term; postings sorted by DocId.
+;; Data.Map's public API lacks toList/keys/map/filter, so a sorted assoc-list
+;; index is used — it is the conceptual `Map Term [(DocId,[Pos])]` and exposes
+;; term iteration (allTerms) and df naturally for ranking.
+;;   emptyIndex :: Index
+;;   indexDoc   :: DocId -> String -> Index -> Index   (re-index replaces)
+;;   lookupTerm :: Term -> Index -> [(DocId, [Pos])]
+;;   deleteDoc  :: DocId -> Index -> Index
+;;   docFreq    :: Term -> Index -> Int
+;;   allTerms   :: Index -> [Term]
+
+(define
+  search/index-src
+  "emptyIndex = []\ngroupBump [] t p = [(t, [p])]\ngroupBump (g:gs) t p = if fst g == t then (t, snd g ++ [p]) : gs else g : groupBump gs t p\ngroupStep acc tp = groupBump acc (fst tp) (snd tp)\ngroupTok pairs = foldl groupStep [] pairs\ninsPosting d ps [] = [(d, ps)]\ninsPosting d ps (q:qs) = if d < fst q then (d, ps) : q : qs else if d == fst q then (d, ps) : qs else q : insPosting d ps qs\ninsTerm t d ps [] = [(t, [(d, ps)])]\ninsTerm t d ps (e:es) = if t < fst e then (t, [(d, ps)]) : e : es else if t == fst e then (fst e, insPosting d ps (snd e)) : es else e : insTerm t d ps es\nindexStep d ix tp = insTerm (fst tp) d (snd tp) ix\nindexDoc d text idx = foldl (indexStep d) idx (groupTok (positioned text))\nlookupTerm t idx = case lookup t idx of { Nothing -> []; Just pl -> pl }\ndocFreq t idx = length (lookupTerm t idx)\nallTerms idx = map fst idx\npostingKeep d q = fst q /= d\ndropTermDoc d e = (fst e, filter (postingKeep d) (snd e))\nplKeep e = not (null (snd e))\ndeleteDoc d idx = filter plKeep (map (dropTermDoc d) idx)\n")
--- a/lib/search/near.sx
+++ b/lib/search/near.sx
@@ -0,0 +1,8 @@
+;; search proximity (NEAR) — Haskell source fragment. Depends on query (posIn,
+;; docsWith, sortedInter). Finds docs where two terms occur within k positions of
+;; each other (unordered), using the positional postings.
+;;   nearDocs :: Int -> Term -> Term -> Index -> [DocId]   (sorted)
+
+(define
+  search/near-src
+  "nrAbsDiff a b = if a > b then a - b else b - a\nnrCloseTo k x [] = False\nnrCloseTo k x (y:ys) = if nrAbsDiff x y <= k then True else nrCloseTo k x ys\nnrAnyClose k [] ys = False\nnrAnyClose k (x:xs) ys = if nrCloseTo k x ys then True else nrAnyClose k xs ys\nnearInDoc k t1 t2 d idx = nrAnyClose k (posIn t1 d idx) (posIn t2 d idx)\nnearHere k t1 t2 idx d = nearInDoc k t1 t2 d idx\nnearDocs k t1 t2 idx = filter (nearHere k t1 t2 idx) (sortedInter (docsWith t1 idx) (docsWith t2 idx))\n")
--- a/lib/search/page.sx
+++ b/lib/search/page.sx
@@ -0,0 +1,11 @@
+;; search pagination — Haskell source fragment. Depends on rank.
+;; Windows a ranked result list by offset/limit (offset >= length -> empty;
+;; limit clamps to what remains).
+;;   paginate    :: Int -> Int -> [DocId] -> [DocId]   (offset, limit)
+;;   pageTfIdf   :: Int -> Int -> [Term] -> Index -> [DocId]
+;;   pageBm25    :: Int -> Int -> Float -> Float -> [Term] -> Index -> [DocId]
+;;   resultCount :: [Term] -> Index -> Int
+
+(define
+  search/page-src
+  "paginate off lim docs = take lim (drop off docs)\npageTfIdf off lim ts idx = paginate off lim (rankTfIdf ts idx)\npageBm25 off lim k1 b ts idx = paginate off lim (rankBm25 k1 b ts idx)\nresultCount ts idx = length (rankTfIdf ts idx)\n")
--- a/lib/search/parse.sx
+++ b/lib/search/parse.sx
@@ -0,0 +1,18 @@
+;; search query parser — Haskell source fragment. Depends on tokenize + query.
+;; Grammar (precedence OR < AND < NOT):
+;;   expr   = orExpr
+;;   orExpr = andExpr (OR andExpr)*
+;;   andExpr= notExpr ((AND | <implicit>) notExpr)*   -- adjacency means AND
+;;   notExpr= NOT notExpr | atom
+;;   atom   = '(' expr ')' | '"' word+ '"' | word
+;; Keywords AND/OR/NOT are case-insensitive; bare words are normalized via tokens.
+;; Gotchas: delimiters matched by ord (escaped char literals like '\"' break the
+;; haskell-on-sx tokenizer); an [] *pattern* inside a `case` alt also breaks the
+;; parser, so qNormTerm/qDropRP/showQ are written as multi-clause functions.
+;;   parseQuery  :: String -> Query
+;;   searchQuery :: String -> Index -> [DocId]
+;;   showQ       :: Query -> String   -- canonical render for tests/debug
+
+(define
+  search/parse-src
+  "data QTok = TAnd | TOr | TNot | TLP | TRP | TWord String | TPhrase [String]\nqIsSpace c = ord c == 32\nqIsLP c = ord c == 40\nqIsRP c = ord c == 41\nqIsQuote c = ord c == 34\nqDelim c = qIsSpace c || qIsLP c || qIsRP c || qIsQuote c\nqReadWord [] = ([], [])\nqReadWord (c:cs) = if qDelim c then ([], c:cs) else let (w, rest) = qReadWord cs in (c:w, rest)\nqReadPhrase [] = ([], [])\nqReadPhrase (c:cs) = if qIsQuote c then ([], cs) else let (w, rest) = qReadPhrase cs in (c:w, rest)\ntoUpperCh c = chr (toUpper (ord c))\nqUpper w = joinChars (map toUpperCh w)\nqFirstTok [] = \"\"\nqFirstTok (x:xs) = x\nqNormTerm w = qFirstTok (tokens w)\nqClassify w = if qUpper w == \"AND\" then TAnd else if qUpper w == \"OR\" then TOr else if qUpper w == \"NOT\" then TNot else TWord (qNormTerm w)\nqPhraseTok cs = let (p, rest) = qReadPhrase cs in TPhrase (tokens p) : qtokens rest\nqWordTok cs = let (w, rest) = qReadWord cs in qClassify w : qtokens rest\nqtokens [] = []\nqtokens (c:cs) = if qIsSpace c then qtokens cs else if qIsLP c then TLP : qtokens cs else if qIsRP c then TRP : qtokens cs else if qIsQuote c then qPhraseTok cs else qWordTok (c:cs)\nqDropRP (q, (TRP:rest)) = (q, rest)\nqDropRP (q, ts) = (q, ts)\nparseAtom [] = (Term \"\", [])\nparseAtom (TLP:ts) = qDropRP (parseExpr ts)\nparseAtom (TPhrase ps : ts) = (Phrase ps, ts)\nparseAtom (TWord w : ts) = (Term w, ts)\nparseAtom ts = (Term \"\", ts)\nqWrapNot (q, ts) = (Not q, ts)\nparseNot (TNot:ts) = qWrapNot (parseNot ts)\nparseNot ts = parseAtom ts\nqStartsAtom (TWord w : ts) = True\nqStartsAtom (TPhrase p : ts) = True\nqStartsAtom (TLP : ts) = True\nqStartsAtom (TNot : ts) = True\nqStartsAtom ts = False\nqAndStep left ts = let (r, rest) = parseNot ts in parseAndR (And left r) rest\nparseAndR left (TAnd:ts) = qAndStep left ts\nparseAndR left ts = if qStartsAtom ts then qAndStep left ts else (left, ts)\nparseAnd ts = let (l, rest) = parseNot ts in parseAndR l rest\nparseOrR left (TOr:ts) = let (r, rest) = parseAnd ts in parseOrR (Or left r) rest\nparseOrR left ts = (left, ts)\nparseExpr ts = let (l, rest) = parseAnd ts in parseOrR l rest\nparseQuery s = fst (parseExpr (qtokens s))\nsearchQuery s idx = evalQuery idx (parseQuery s)\njoinSp [] = \"\"\njoinSp [x] = x\njoinSp (x:xs) = x ++ \"-\" ++ joinSp xs\nshowQ (Term t) = \"T:\" ++ t\nshowQ (And a b) = \"(\" ++ showQ a ++ \" & \" ++ showQ b ++ \")\"\nshowQ (Or a b) = \"(\" ++ showQ a ++ \" | \" ++ showQ b ++ \")\"\nshowQ (Not a) = \"!\" ++ showQ a\nshowQ (Phrase ts) = \"P:\" ++ joinSp ts\n")
--- a/lib/search/prefix.sx
+++ b/lib/search/prefix.sx
@@ -0,0 +1,10 @@
+;; search prefix / wildcard queries — Haskell source fragment. Depends on index +
+;; rank (reuses candStep / rankTfIdf). A prefix matches every indexed term that
+;; starts with it; the matching terms are unioned (OR) into a docid set.
+;;   prefixTerms     :: String -> Index -> [Term]      (sorted, from allTerms)
+;;   prefixDocs      :: String -> Index -> [DocId]     (sorted union)
+;;   prefixRankTfIdf :: String -> Index -> [DocId]     (ranked by the matched terms)
+
+(define
+  search/prefix-src
+  "prefixTerms pre idx = filter (isPrefixOf pre) (allTerms idx)\nprefixDocs pre idx = foldl (candStep idx) [] (prefixTerms pre idx)\nprefixRankTfIdf pre idx = rankTfIdf (prefixTerms pre idx) idx\n")
--- a/lib/search/query.sx
+++ b/lib/search/query.sx
@@ -0,0 +1,11 @@
+;; search query AST + boolean/phrase evaluation — Haskell source fragment.
+;; Depends on tokenize + index.
+;;   data Query = Term String | And Query Query | Or Query Query
+;;              | Not Query | Phrase [String]
+;;   evalQuery :: Index -> Query -> [DocId]   (sorted, unique)
+;; Boolean ops are linear merges over docid-sorted posting lists; Not uses
+;; allDocs as the universe; Phrase checks positional adjacency.
+
+(define
+  search/query-src
+  "data Query = Term String | And Query Query | Or Query Query | Not Query | Phrase [String]\ndocsWith t idx = map fst (lookupTerm t idx)\nsortedUnion [] ys = ys\nsortedUnion xs [] = xs\nsortedUnion (x:xs) (y:ys) = if x < y then x : sortedUnion xs (y:ys) else if x > y then y : sortedUnion (x:xs) ys else x : sortedUnion xs ys\nsortedInter [] ys = []\nsortedInter xs [] = []\nsortedInter (x:xs) (y:ys) = if x < y then sortedInter xs (y:ys) else if x > y then sortedInter (x:xs) ys else x : sortedInter xs ys\nsortedDiff [] ys = []\nsortedDiff xs [] = xs\nsortedDiff (x:xs) (y:ys) = if x < y then x : sortedDiff xs (y:ys) else if x > y then sortedDiff (x:xs) ys else sortedDiff xs ys\nmergeDocs acc e = sortedUnion acc (map fst (snd e))\nallDocs idx = foldl mergeDocs [] idx\nposIn t d idx = case lookup d (lookupTerm t idx) of { Nothing -> []; Just ps -> ps }\nelemSorted x [] = False\nelemSorted x (y:ys) = if x == y then True else if x < y then False else elemSorted x ys\nphraseAtAll [] d idx p i = True\nphraseAtAll (t:ts) d idx p i = if elemSorted (p + i) (posIn t d idx) then phraseAtAll ts d idx p (i + 1) else False\nphraseStartsAt ts d idx p = phraseAtAll ts d idx p 0\nphraseInDoc [] d idx = True\nphraseInDoc (t0:rest) d idx = any (phraseStartsAt (t0:rest) d idx) (posIn t0 d idx)\nphraseHere ts idx d = phraseInDoc ts d idx\ninterStep idx acc tt = sortedInter acc (docsWith tt idx)\nphraseCands [] idx = allDocs idx\nphraseCands (t:ts) idx = foldl (interStep idx) (docsWith t idx) ts\nphraseDocs ts idx = filter (phraseHere ts idx) (phraseCands ts idx)\nevalQuery idx q = case q of { Term t -> docsWith t idx ; And a b -> sortedInter (evalQuery idx a) (evalQuery idx b) ; Or a b -> sortedUnion (evalQuery idx a) (evalQuery idx b) ; Not a -> sortedDiff (allDocs idx) (evalQuery idx a) ; Phrase ts -> phraseDocs ts idx }\n")
--- a/lib/search/rank.sx
+++ b/lib/search/rank.sx
@@ -0,0 +1,14 @@
+;; search ranking — Haskell source fragment. Depends on tokenize + index + query.
+;; Ranked retrieval over the candidate set (docs containing any query term).
+;; Scores are floats; ties broken by DocId ascending (deterministic).
+;;   numDocs    :: Index -> Int
+;;   docFreq    :: Term -> Index -> Int           (from index)
+;;   docLen     :: DocId -> Index -> Int
+;;   rankTfIdf  :: [Term] -> Index -> [DocId]
+;;   topNTfIdf  :: Int -> [Term] -> Index -> [DocId]
+;;   rankBm25   :: Float -> Float -> [Term] -> Index -> [DocId]   (k1, b)
+;;   topNBm25   :: Int -> Float -> Float -> [Term] -> Index -> [DocId]
+
+(define
+  search/rank-src
+  "numDocs idx = length (allDocs idx)\ntfIn t d idx = length (posIn t d idx)\nqIdf n df = if df == 0 then 0 else log (n / df)\nidf t idx = qIdf (numDocs idx) (docFreq t idx)\ntermScoreTf idx d t = tfIn t d idx * idf t idx\ntfidfDoc ts idx d = sum (map (termScoreTf idx d) ts)\ncandStep idx acc t = sortedUnion acc (docsWith t idx)\ncandDocs ts idx = foldl (candStep idx) [] ts\ncmpScore p1 p2 = if fst p1 > fst p2 then LT else if fst p1 < fst p2 then GT else compare (snd p1) (snd p2)\nmkPair f ts idx d = (f ts idx d, d)\nrankWith f ts idx = map snd (sortBy cmpScore (map (mkPair f ts idx) (candDocs ts idx)))\nrankTfIdf ts idx = rankWith tfidfDoc ts idx\ntopNTfIdf n ts idx = take n (rankTfIdf ts idx)\ntfAt d idx t = tfIn t d idx\ndocLen d idx = sum (map (tfAt d idx) (allTerms idx))\nlenAt idx d = docLen d idx\navgDocLen idx = sum (map (lenAt idx) (allDocs idx)) / numDocs idx\nbm25idf t idx = log ((numDocs idx - docFreq t idx + 0.5) / (docFreq t idx + 0.5) + 1)\nbm25Term k1 b avgdl idx d t = bm25idf t idx * (tfIn t d idx * (k1 + 1)) / (tfIn t d idx + k1 * (1 - b + b * docLen d idx / avgdl))\nbm25Doc k1 b ts idx d = sum (map (bm25Term k1 b (avgDocLen idx) idx d) ts)\nrankBm25 k1 b ts idx = rankWith (bm25Doc k1 b) ts idx\ntopNBm25 n k1 b ts idx = take n (rankBm25 k1 b ts idx)\n")
--- a/lib/search/rankq.sx
+++ b/lib/search/rankq.sx
@@ -0,0 +1,11 @@
+;; search boolean-filtered ranked search — Haskell source fragment.
+;; Depends on parse (parseQuery/Query), query (evalQuery), rank (tfidfDoc/bm25Doc/
+;; cmpScore). Filters by the boolean query, then ranks the surviving docs by
+;; relevance over the query's leaf terms — the real-world filter-then-rank pattern.
+;;   queryTerms       :: Query -> [Term]
+;;   searchRankTfIdf  :: String -> Index -> [DocId]
+;;   searchRankBm25   :: Float -> Float -> String -> Index -> [DocId]
+
+(define
+  search/rankq-src
+  "queryTerms (Term t) = [t]\nqueryTerms (And a b) = queryTerms a ++ queryTerms b\nqueryTerms (Or a b) = queryTerms a ++ queryTerms b\nqueryTerms (Not a) = queryTerms a\nqueryTerms (Phrase ts) = ts\nmkSubPair f terms idx d = (f terms idx d, d)\nrankSubsetWith f terms docs idx = map snd (sortBy cmpScore (map (mkSubPair f terms idx) docs))\nsearchRankTfIdf s idx = let q = parseQuery s in rankSubsetWith tfidfDoc (queryTerms q) (evalQuery idx q) idx\nsearchRankBm25 k1 b s idx = let q = parseQuery s in rankSubsetWith (bm25Doc k1 b) (queryTerms q) (evalQuery idx q) idx\n")
--- a/lib/search/scoreboard.json
+++ b/lib/search/scoreboard.json
@@ -0,0 +1,23 @@
+{
+  "lang": "search",
+  "total_passed": 234,
+  "total_failed": 0,
+  "total": 234,
+  "suites": [
+    {"name":"index","passed":18,"failed":0,"total":18},
+    {"name":"boolean","passed":28,"failed":0,"total":28},
+    {"name":"parse","passed":32,"failed":0,"total":32},
+    {"name":"rank","passed":23,"failed":0,"total":23},
+    {"name":"integration","passed":21,"failed":0,"total":21},
+    {"name":"prefix","passed":14,"failed":0,"total":14},
+    {"name":"page","passed":12,"failed":0,"total":12},
+    {"name":"fuzzy","passed":18,"failed":0,"total":18},
+    {"name":"highlight","passed":12,"failed":0,"total":12},
+    {"name":"stem","passed":18,"failed":0,"total":18},
+    {"name":"near","passed":9,"failed":0,"total":9},
+    {"name":"syn","passed":9,"failed":0,"total":9},
+    {"name":"rankq","passed":11,"failed":0,"total":11},
+    {"name":"suggest","passed":9,"failed":0,"total":9}
+  ],
+  "generated": "2026-06-07T00:44:05+00:00"
+}
--- a/lib/search/scoreboard.md
+++ b/lib/search/scoreboard.md
@@ -0,0 +1,20 @@
+# search scoreboard
+
+**234 / 234 passing** (0 failure(s)).
+
+| Suite | Passed | Total | Status |
+|-------|--------|-------|--------|
+| index | 18 | 18 | ok |
+| boolean | 28 | 28 | ok |
+| parse | 32 | 32 | ok |
+| rank | 23 | 23 | ok |
+| integration | 21 | 21 | ok |
+| prefix | 14 | 14 | ok |
+| page | 12 | 12 | ok |
+| fuzzy | 18 | 18 | ok |
+| highlight | 12 | 12 | ok |
+| stem | 18 | 18 | ok |
+| near | 9 | 9 | ok |
+| syn | 9 | 9 | ok |
+| rankq | 11 | 11 | ok |
+| suggest | 9 | 9 | ok |
--- a/lib/search/stem.sx
+++ b/lib/search/stem.sx
@@ -0,0 +1,15 @@
+;; search stemming — Haskell source fragment. Depends on tokenize + index.
+;; Lightweight, deterministic English suffix stripping (recall-improving
+;; normalizer). Rules are checked most-specific first; conservative length guards
+;; avoid mangling short words. Not a full Porter stemmer.
+;; Gotcha: take/drop over a String yield char CODES (ints), not char strings, so
+;; rebuild strings with `stStr = joinChars . map chr`. (isSuffixOf's reverse also
+;; trips `++` on the String representation, hence the manual stEnds.)
+;;   stem        :: String -> String
+;;   stemText    :: String -> String        (tokenize + stem + rejoin)
+;;   stemTokens  :: String -> [String]
+;;   indexStemmed:: DocId -> String -> Index -> Index   (index the stemmed text)
+
+(define
+  search/stem-src
+  "stStr cs = joinChars (map chr cs)\nstEnds suf w = let n = length w in let m = length suf in if m > n then False else stStr (drop (n - m) w) == suf\nstDropEnd k w = stStr (take (length w - k) w)\nstem w = if stEnds \"ies\" w && length w >= 5 then stDropEnd 3 w ++ \"y\" else if stEnds \"ss\" w then w else if stEnds \"es\" w && length w >= 5 then stDropEnd 2 w else if stEnds \"s\" w && length w >= 4 then stDropEnd 1 w else if stEnds \"ing\" w && length w >= 6 then stDropEnd 3 w else if stEnds \"ed\" w && length w >= 5 then stDropEnd 2 w else w\nstemTokens s = map stem (tokens s)\nstemText s = unwords (stemTokens s)\nindexStemmed d text idx = indexDoc d (stemText text) idx\n")
--- a/lib/search/suggest.sx
+++ b/lib/search/suggest.sx
@@ -0,0 +1,9 @@
+;; search did-you-mean / spelling suggestion — Haskell source fragment.
+;; Depends on fuzzy (editDist) + index (allTerms). Ranks indexed terms by edit
+;; distance to a (possibly misspelled) query term; ties broken alphabetically.
+;;   suggestN :: Int -> String -> Index -> [Term]
+;;   suggest  :: String -> Index -> Term   ("" if the index has no terms)
+
+(define
+  search/suggest-src
+  "sgMk term t = (editDist term t, t)\nsgPairs term idx = map (sgMk term) (allTerms idx)\nsgCmp p1 p2 = if fst p1 < fst p2 then LT else if fst p1 > fst p2 then GT else compare (snd p1) (snd p2)\nsuggestN n term idx = take n (map snd (sortBy sgCmp (sgPairs term idx)))\nsgHead [] = \"\"\nsgHead (x:xs) = x\nsuggest term idx = sgHead (suggestN 1 term idx)\n")
--- a/lib/search/syn.sx
+++ b/lib/search/syn.sx
@@ -0,0 +1,10 @@
+;; search synonym / query expansion — Haskell source fragment. Depends on index +
+;; rank. A synonym map is an assoc list [(Term, [Term])]; a query term is expanded
+;; to itself plus its synonyms, then the expanded set is unioned / ranked.
+;;   expandTerm    :: [(Term,[Term])] -> Term -> [Term]
+;;   synDocs       :: [(Term,[Term])] -> Term -> Index -> [DocId]
+;;   synRankTfIdf  :: [(Term,[Term])] -> Term -> Index -> [DocId]
+
+(define
+  search/syn-src
+  "synLookup synmap t = case lookup t synmap of { Nothing -> [] ; Just ss -> ss }\nexpandTerm synmap t = t : synLookup synmap t\nsynDocs synmap t idx = foldl (candStep idx) [] (expandTerm synmap t)\nsynRankTfIdf synmap t idx = rankTfIdf (expandTerm synmap t) idx\n")
--- a/lib/search/testlib.sx
+++ b/lib/search/testlib.sx
@@ -0,0 +1,50 @@
+;; search test helpers — convert forced haskell values to plain SX and run
+;; programs built on top of search/src. Reuses hk-test / counters from
+;; lib/haskell/testlib.sx (preloaded by the conformance config).
+
+;; Recursively turn a forced HK value into plain SX:
+;;   cons-list  -> SX list,  Tuple -> SX list,  leaves unchanged.
+(define
+  search-hk->sx
+  (fn
+    (v)
+    (cond
+      ((and (list? v) (not (empty? v)) (= (first v) "[]")) (list))
+      ((and (list? v) (not (empty? v)) (= (first v) ":"))
+        (cons
+          (search-hk->sx (nth v 1))
+          (search-hk->sx (nth v 2))))
+      ((and (list? v) (not (empty? v)) (= (first v) "Tuple"))
+        (map search-hk->sx (rest v)))
+      (:else v))))
+
+;; Evaluate `extra` (extra top-level Haskell bindings) on top of search/src
+;; and return binding `name` as plain SX.
+(define
+  search-eval
+  (fn
+    (extra name)
+    (search-hk->sx
+      (hk-deep-force
+        (get (hk-eval-program (hk-core (str search/src extra))) name)))))
+
+(define
+  search-join
+  (fn
+    (sep xs)
+    (cond
+      ((empty? xs) "")
+      ((empty? (rest xs)) (first xs))
+      (:else (str (first xs) sep (search-join sep (rest xs)))))))
+
+;; Batch many haskell expressions into ONE program evaluation (amortizes the
+;; cost of parsing/binding search/src — important under heavy CPU load).
+;; `setup` is extra top-level Haskell; `exprs` is a list of expression strings
+;; whose results form a single haskell list. Returns the SX list of results.
+(define
+  search-batch
+  (fn
+    (setup exprs)
+    (search-eval
+      (str setup "\nresult = [" (search-join ", " exprs) "]\n")
+      "result")))
--- a/lib/search/tests/boolean.sx
+++ b/lib/search/tests/boolean.sx
@@ -0,0 +1,123 @@
+;; Phase 2 — query AST + boolean/phrase evaluation (hand-built Query values).
+;; Corpus:
+;;   doc 1 "the quick brown dog"   -> the quick brown dog
+;;   doc 2 "a quick brown fox"     -> a quick brown fox
+;;   doc 3 "the dog barks loudly"  -> the dog barks loudly
+;; All queries run in ONE program evaluation (search-batch) to stay fast.
+
+(define
+  search-corpus
+  "idx = indexDoc 3 \"the dog barks loudly\" (indexDoc 2 \"a quick brown fox\" (indexDoc 1 \"the quick brown dog\" emptyIndex))\n")
+
+(define
+  bool-cases
+  (list
+    (list
+      "term in two docs"
+      "evalQuery idx (Term \"quick\")"
+      (list 1 2))
+    (list
+      "term in two docs (the)"
+      "evalQuery idx (Term \"the\")"
+      (list 1 3))
+    (list "term in one doc" "evalQuery idx (Term \"fox\")" (list 2))
+    (list "term absent" "evalQuery idx (Term \"zzz\")" (list))
+    (list
+      "term case-sensitive at AST level"
+      "evalQuery idx (Term \"QUICK\")"
+      (list))
+    (list "term on empty index" "evalQuery emptyIndex (Term \"cat\")" (list))
+    (list
+      "and both terms"
+      "evalQuery idx (And (Term \"quick\") (Term \"brown\"))"
+      (list 1 2))
+    (list
+      "and overlap subset"
+      "evalQuery idx (And (Term \"the\") (Term \"dog\"))"
+      (list 1 3))
+    (list
+      "and disjoint is empty"
+      "evalQuery idx (And (Term \"the\") (Term \"fox\"))"
+      (list))
+    (list
+      "and right-nested"
+      "evalQuery idx (And (Term \"the\") (And (Term \"dog\") (Term \"barks\")))"
+      (list 3))
+    (list
+      "or two singletons"
+      "evalQuery idx (Or (Term \"fox\") (Term \"barks\"))"
+      (list 2 3))
+    (list
+      "or all docs"
+      "evalQuery idx (Or (Term \"quick\") (Term \"the\"))"
+      (list 1 2 3))
+    (list
+      "or with absent term"
+      "evalQuery idx (Or (Term \"fox\") (Term \"zzz\"))"
+      (list 2))
+    (list "not term" "evalQuery idx (Not (Term \"the\"))" (list 2))
+    (list "not term 2" "evalQuery idx (Not (Term \"quick\"))" (list 3))
+    (list
+      "and with not"
+      "evalQuery idx (And (Term \"quick\") (Not (Term \"the\")))"
+      (list 2))
+    (list
+      "double negation"
+      "evalQuery idx (Not (Not (Term \"fox\")))"
+      (list 2))
+    (list
+      "or of and with term"
+      "evalQuery idx (Or (And (Term \"the\") (Term \"dog\")) (Term \"fox\"))"
+      (list 1 2 3))
+    (list
+      "phrase adjacent both docs"
+      "evalQuery idx (Phrase [\"quick\", \"brown\"])"
+      (list 1 2))
+    (list
+      "phrase adjacent one doc"
+      "evalQuery idx (Phrase [\"brown\", \"dog\"])"
+      (list 1))
+    (list
+      "phrase the quick"
+      "evalQuery idx (Phrase [\"the\", \"quick\"])"
+      (list 1))
+    (list
+      "phrase dog barks"
+      "evalQuery idx (Phrase [\"dog\", \"barks\"])"
+      (list 3))
+    (list
+      "phrase non-adjacent empty"
+      "evalQuery idx (Phrase [\"quick\", \"dog\"])"
+      (list))
+    (list
+      "phrase order matters"
+      "evalQuery idx (Phrase [\"brown\", \"quick\"])"
+      (list))
+    (list
+      "phrase single term"
+      "evalQuery idx (Phrase [\"dog\"])"
+      (list 1 3))
+    (list
+      "phrase three terms"
+      "evalQuery idx (Phrase [\"the\", \"dog\", \"barks\"])"
+      (list 3))
+    (list
+      "and of phrase and term"
+      "evalQuery idx (And (Phrase [\"quick\", \"brown\"]) (Term \"dog\"))"
+      (list 1))
+    (list
+      "not of phrase"
+      "evalQuery idx (Not (Phrase [\"quick\", \"brown\"]))"
+      (list 3))))
+
+(define
+  bool-results
+  (search-batch search-corpus (map (fn (c) (nth c 1)) bool-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth bool-results i) (nth c 2)))
+  bool-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/fuzzy.sx
+++ b/lib/search/tests/fuzzy.sx
@@ -0,0 +1,74 @@
+;; Extension — fuzzy matching via Levenshtein edit distance.
+;; Corpus: 1 "color flavor"  2 "colour kitten"  3 "colored"
+;; allTerms: color colored colour flavor kitten
+
+(define
+  fuzzy-setup
+  "idx = indexDoc 3 \"colored\" (indexDoc 2 \"colour kitten\" (indexDoc 1 \"color flavor\" emptyIndex))\n")
+
+(define
+  fuzzy-cases
+  (list
+    (list
+      "editDist substitution"
+      "[editDist \"kitten\" \"sitten\"]"
+      (list 1))
+    (list "editDist equal" "[editDist \"abc\" \"abc\"]" (list 0))
+    (list "editDist deletion" "[editDist \"abc\" \"ab\"]" (list 1))
+    (list "editDist insertion" "[editDist \"ab\" \"abc\"]" (list 1))
+    (list "editDist from empty" "[editDist \"\" \"abc\"]" (list 3))
+    (list "editDist both empty" "[editDist \"\" \"\"]" (list 0))
+    (list
+      "editDist classic"
+      "[editDist \"kitten\" \"sitting\"]"
+      (list 3))
+    (list
+      "editDist color colour"
+      "[editDist \"color\" \"colour\"]"
+      (list 1))
+    (list
+      "editDist color colored"
+      "[editDist \"color\" \"colored\"]"
+      (list 2))
+    (list
+      "fuzzy terms dist 1"
+      "fuzzyTerms 1 \"color\" idx"
+      (list "color" "colour"))
+    (list
+      "fuzzy terms dist 2"
+      "fuzzyTerms 2 \"color\" idx"
+      (list "color" "colored" "colour"))
+    (list "fuzzy terms exact" "fuzzyTerms 0 \"color\" idx" (list "color"))
+    (list
+      "fuzzy terms other word"
+      "fuzzyTerms 1 \"flavour\" idx"
+      (list "flavor"))
+    (list
+      "fuzzy docs dist 1"
+      "fuzzyDocs 1 \"color\" idx"
+      (list 1 2))
+    (list
+      "fuzzy docs dist 2"
+      "fuzzyDocs 2 \"color\" idx"
+      (list 1 2 3))
+    (list "fuzzy docs none" "fuzzyDocs 1 \"zzzzz\" idx" (list))
+    (list
+      "fuzzy rank dist 1"
+      "fuzzyRankTfIdf 1 \"color\" idx"
+      (list 1 2))
+    (list
+      "fuzzy rank dist 2"
+      "fuzzyRankTfIdf 2 \"color\" idx"
+      (list 1 2 3))))
+
+(define
+  fuzzy-results
+  (search-batch fuzzy-setup (map (fn (c) (nth c 1)) fuzzy-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth fuzzy-results i) (nth c 2)))
+  fuzzy-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/highlight.sx
+++ b/lib/search/tests/highlight.sx
@@ -0,0 +1,66 @@
+;; Extension — highlight + snippet over document text.
+;; Text: "the quick brown fox jumps"
+
+(define
+  hl-cases
+  (list
+    (list
+      "highlight two terms"
+      "highlight [\"quick\", \"fox\"] \"the quick brown fox jumps\""
+      "the [quick] brown [fox] jumps")
+    (list
+      "highlight none"
+      "highlight [] \"the quick brown fox jumps\""
+      "the quick brown fox jumps")
+    (list
+      "highlight absent term"
+      "highlight [\"zzz\"] \"the quick brown fox jumps\""
+      "the quick brown fox jumps")
+    (list
+      "highlight first token"
+      "highlight [\"the\"] \"the quick brown fox jumps\""
+      "[the] quick brown fox jumps")
+    (list
+      "highlight normalizes text"
+      "highlight [\"quick\"] \"The Quick, brown!\""
+      "the [quick] brown")
+    (list
+      "snippet around middle"
+      "snippet 1 [\"brown\"] \"the quick brown fox jumps\""
+      "quick [brown] fox")
+    (list
+      "snippet at start"
+      "snippet 1 [\"the\"] \"the quick brown fox jumps\""
+      "[the] quick brown")
+    (list
+      "snippet near end"
+      "snippet 1 [\"fox\"] \"the quick brown fox jumps\""
+      "brown [fox] jumps")
+    (list
+      "snippet ctx zero"
+      "snippet 0 [\"brown\"] \"the quick brown fox jumps\""
+      "[brown]")
+    (list
+      "snippet clamps at end"
+      "snippet 2 [\"jumps\"] \"the quick brown fox jumps\""
+      "brown fox [jumps]")
+    (list
+      "snippet no match shows head"
+      "snippet 1 [\"zzz\"] \"the quick brown fox jumps\""
+      "the quick brown")
+    (list
+      "snippet wide window"
+      "snippet 5 [\"brown\"] \"the quick brown fox jumps\""
+      "the quick [brown] fox jumps")))
+
+(define
+  hl-results
+  (search-batch "" (map (fn (c) (nth c 1)) hl-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth hl-results i) (nth c 2)))
+  hl-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/index.sx
+++ b/lib/search/tests/index.sx
@@ -0,0 +1,88 @@
+;; Phase 1 — tokenize + inverted index.
+;; All cases run in ONE program evaluation (search-batch) to stay fast under load.
+;; Scalar results (docFreq) are wrapped as singleton lists so the batch is a list
+;; of lists.
+
+(define
+  index-cases
+  (list
+    (list
+      "tokens basic lowercases"
+      "tokens \"The Cat sat\""
+      (list "the" "cat" "sat"))
+    (list
+      "tokens strips punctuation"
+      "tokens \"Hello, World!\""
+      (list "hello" "world"))
+    (list "tokens collapses whitespace" "tokens \"  a   b  \"" (list "a" "b"))
+    (list "tokens empty is empty" "tokens \"\"" (list))
+    (list "tokens keeps digits" "tokens \"abc123 x9\"" (list "abc123" "x9"))
+    (list
+      "positioned attaches ordinals"
+      "positioned \"a b a\""
+      (list
+        (list "a" 0)
+        (list "b" 1)
+        (list "a" 2)))
+    (list
+      "index + lookup single doc"
+      "lookupTerm \"cat\" (indexDoc 1 \"the cat sat\" emptyIndex)"
+      (list (list 1 (list 1))))
+    (list
+      "lookup missing term is empty"
+      "lookupTerm \"dog\" (indexDoc 1 \"the cat sat\" emptyIndex)"
+      (list))
+    (list
+      "lookup records all positions"
+      "lookupTerm \"the\" (indexDoc 1 \"the cat the dog the\" emptyIndex)"
+      (list (list 1 (list 0 2 4))))
+    (list
+      "multi-doc posting list sorted by docid"
+      "lookupTerm \"x\" (indexDoc 1 \"x y\" (indexDoc 2 \"x z\" emptyIndex))"
+      (list
+        (list 1 (list 0))
+        (list 2 (list 0))))
+    (list
+      "index/query case symmetry"
+      "lookupTerm \"cat\" (indexDoc 1 \"CAT Cat cat\" emptyIndex)"
+      (list (list 1 (list 0 1 2))))
+    (list
+      "re-index replaces a doc"
+      "lookupTerm \"a\" (indexDoc 1 \"a a a\" (indexDoc 1 \"a\" emptyIndex))"
+      (list (list 1 (list 0 1 2))))
+    (list
+      "delete removes a doc"
+      "lookupTerm \"cat\" (deleteDoc 1 (indexDoc 1 \"the cat\" emptyIndex))"
+      (list))
+    (list
+      "delete leaves other docs"
+      "lookupTerm \"cat\" (deleteDoc 2 (indexDoc 2 \"big cat\" (indexDoc 1 \"the cat\" emptyIndex)))"
+      (list (list 1 (list 1))))
+    (list
+      "docFreq counts docs"
+      "[docFreq \"cat\" (indexDoc 2 \"a cat\" (indexDoc 1 \"the cat\" emptyIndex))]"
+      (list 2))
+    (list
+      "docFreq zero for missing"
+      "[docFreq \"zzz\" (indexDoc 1 \"a b\" emptyIndex)]"
+      (list 0))
+    (list
+      "allTerms sorted and unique"
+      "allTerms (indexDoc 1 \"banana apple cherry apple\" emptyIndex)"
+      (list "apple" "banana" "cherry"))
+    (list
+      "allTerms merged across docs"
+      "allTerms (indexDoc 2 \"d a\" (indexDoc 1 \"c b\" emptyIndex))"
+      (list "a" "b" "c" "d"))))
+
+(define
+  index-results
+  (search-batch "" (map (fn (c) (nth c 1)) index-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth index-results i) (nth c 2)))
+  index-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/integration.sx
+++ b/lib/search/tests/integration.sx
@@ -0,0 +1,102 @@
+;; Phase 4 — federation (merge per-peer indices) + ACL post-filter.
+;; Peers (global id = peer*1000 + local):
+;;   peer 1: 1 "alpha beta"  2 "alpha gamma"     -> 1001 1002
+;;   peer 2: 1 "alpha delta" 2 "beta gamma"      -> 2001 2002
+;; ACL predicates are injected (viewer baked in by the caller), applied post-rank.
+
+(define
+  fed-setup
+  "p1 = indexDoc 2 \"alpha gamma\" (indexDoc 1 \"alpha beta\" emptyIndex)\np2 = indexDoc 2 \"beta gamma\" (indexDoc 1 \"alpha delta\" emptyIndex)\nfed = fedIndex [(1, p1), (2, p2)]\npermitP1 g = g < 2000\npermitNone g = False\npermitList g = elem g [1002, 2001]\n")
+
+(define
+  fed-cases
+  (list
+    (list
+      "fed merges all docs"
+      "sort (allDocs fed)"
+      (list 1001 1002 2001 2002))
+    (list
+      "fed docFreq across peers"
+      "[docFreq \"alpha\" fed]"
+      (list 3))
+    (list "fed docFreq beta" "[docFreq \"beta\" fed]" (list 2))
+    (list "fed numDocs" "[numDocs fed]" (list 4))
+    (list
+      "fed term lookup spans peers"
+      "map fst (lookupTerm \"gamma\" fed)"
+      (list 1002 2002))
+    (list
+      "fed preserves positions"
+      "lookupTerm \"beta\" fed"
+      (list
+        (list 1001 (list 1))
+        (list 2002 (list 0))))
+    (list
+      "fed rank alpha tie by gid"
+      "rankTfIdf [\"alpha\"] fed"
+      (list 1001 1002 2001))
+    (list
+      "fed rank beta"
+      "rankTfIdf [\"beta\"] fed"
+      (list 1001 2002))
+    (list
+      "fed boolean and"
+      "searchQuery \"alpha AND beta\" fed"
+      (list 1001))
+    (list
+      "fed boolean or"
+      "searchQuery \"delta OR barks\" fed"
+      (list 2001))
+    (list
+      "fed phrase within peer1"
+      "searchQuery \"\\\"alpha beta\\\"\" fed"
+      (list 1001))
+    (list
+      "fed phrase within peer2"
+      "searchQuery \"\\\"beta gamma\\\"\" fed"
+      (list 2002))
+    (list
+      "fed phrase peer2 alpha delta"
+      "searchQuery \"\\\"alpha delta\\\"\" fed"
+      (list 2001))
+    (list "fed empty peer list" "allDocs (fedIndex [])" (list))
+    (list
+      "fed single relabelled peer"
+      "rankTfIdf [\"alpha\"] (fedIndex [(5, p1)])"
+      (list 5001 5002))
+    (list
+      "acl peer1 only"
+      "aclFilter permitP1 (rankTfIdf [\"alpha\"] fed)"
+      (list 1001 1002))
+    (list
+      "acl allowlist preserves rank order"
+      "aclFilter permitList (rankTfIdf [\"alpha\"] fed)"
+      (list 1002 2001))
+    (list
+      "acl topN after filter"
+      "topNTfIdfAcl 1 permitP1 [\"alpha\"] fed"
+      (list 1001))
+    (list
+      "acl denies all"
+      "aclFilter permitNone (rankTfIdf [\"alpha\"] fed)"
+      (list))
+    (list
+      "acl on bm25"
+      "searchBm25Acl permitP1 1.5 0.75 [\"alpha\"] fed"
+      (list 1001 1002))
+    (list
+      "acl end-to-end tfidf"
+      "searchTfIdfAcl permitP1 [\"alpha\"] fed"
+      (list 1001 1002))))
+
+(define
+  fed-results
+  (search-batch fed-setup (map (fn (c) (nth c 1)) fed-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth fed-results i) (nth c 2)))
+  fed-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/near.sx
+++ b/lib/search/tests/near.sx
@@ -0,0 +1,49 @@
+;; Extension — proximity (NEAR) search: terms within k positions, unordered.
+;; Corpus:
+;;   1 "the quick brown fox"    the0 quick1 brown2 fox3
+;;   2 "quick the lazy fox dog" quick0 the1 lazy2 fox3 dog4
+;;   3 "fox runs quick"         fox0 runs1 quick2
+
+(define
+  near-setup
+  "idx = indexDoc 3 \"fox runs quick\" (indexDoc 2 \"quick the lazy fox dog\" (indexDoc 1 \"the quick brown fox\" emptyIndex))\n")
+
+(define
+  near-cases
+  (list
+    (list
+      "near adjacent one doc"
+      "nearDocs 1 \"quick\" \"brown\" idx"
+      (list 1))
+    (list
+      "near adjacent both docs"
+      "nearDocs 1 \"quick\" \"the\" idx"
+      (list 1 2))
+    (list
+      "near within 2"
+      "nearDocs 2 \"quick\" \"fox\" idx"
+      (list 1 3))
+    (list "near too far at k1" "nearDocs 1 \"quick\" \"fox\" idx" (list))
+    (list
+      "near unordered symmetric"
+      "nearDocs 2 \"fox\" \"quick\" idx"
+      (list 1 3))
+    (list "near wider window" "nearDocs 5 \"the\" \"dog\" idx" (list 2))
+    (list "near absent term" "nearDocs 1 \"quick\" \"zzz\" idx" (list))
+    (list "near needs both terms" "nearDocs 3 \"brown\" \"dog\" idx" (list))
+    (list
+      "near same docs only"
+      "nearDocs 3 \"fox\" \"runs\" idx"
+      (list 3))))
+
+(define
+  near-results
+  (search-batch near-setup (map (fn (c) (nth c 1)) near-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth near-results i) (nth c 2)))
+  near-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/page.sx
+++ b/lib/search/tests/page.sx
@@ -0,0 +1,53 @@
+;; Extension — result pagination (offset / limit) over ranked results.
+;; Corpus (tf of "x" descending): 1 x4  2 x3  3 x2  4 x1  5 y(no x)
+;; rankTfIdf ["x"] -> [1,2,3,4]
+
+(define
+  page-setup
+  "idx = indexDoc 5 \"y\" (indexDoc 4 \"x\" (indexDoc 3 \"x x\" (indexDoc 2 \"x x x\" (indexDoc 1 \"x x x x other\" emptyIndex))))\n")
+
+(define
+  page-cases
+  (list
+    (list "first page" "pageTfIdf 0 2 [\"x\"] idx" (list 1 2))
+    (list
+      "second page"
+      "pageTfIdf 2 2 [\"x\"] idx"
+      (list 3 4))
+    (list
+      "sliding window"
+      "pageTfIdf 1 2 [\"x\"] idx"
+      (list 2 3))
+    (list
+      "limit exceeds remaining"
+      "pageTfIdf 3 10 [\"x\"] idx"
+      (list 4))
+    (list "offset past end" "pageTfIdf 4 2 [\"x\"] idx" (list))
+    (list "limit zero" "pageTfIdf 0 0 [\"x\"] idx" (list))
+    (list
+      "whole result"
+      "pageTfIdf 0 10 [\"x\"] idx"
+      (list 1 2 3 4))
+    (list
+      "paginate raw list"
+      "paginate 1 2 [10, 20, 30, 40]"
+      (list 20 30))
+    (list "paginate raw past end" "paginate 9 2 [10, 20]" (list))
+    (list
+      "bm25 page window size"
+      "[length (pageBm25 0 2 1.5 0.75 [\"x\"] idx)]"
+      (list 2))
+    (list "result count" "[resultCount [\"x\"] idx]" (list 4))
+    (list "result count zero" "[resultCount [\"zzz\"] idx]" (list 0))))
+
+(define
+  page-results
+  (search-batch page-setup (map (fn (c) (nth c 1)) page-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth page-results i) (nth c 2)))
+  page-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/parse.sx
+++ b/lib/search/tests/parse.sx
@@ -0,0 +1,139 @@
+;; Phase 2 — query parser (parseQuery / searchQuery).
+;; AST cases assert showQ (parseQuery s); search cases assert searchQuery s idx
+;; against the standard corpus. Each group runs in one batched program eval.
+;;   doc 1 "the quick brown dog"   doc 2 "a quick brown fox"   doc 3 "the dog barks loudly"
+
+(define
+  parse-corpus
+  "idx = indexDoc 3 \"the dog barks loudly\" (indexDoc 2 \"a quick brown fox\" (indexDoc 1 \"the quick brown dog\" emptyIndex))\n")
+
+(define
+  ast-cases
+  (list
+    (list "single term" "showQ (parseQuery \"cat\")" "T:cat")
+    (list "term normalized" "showQ (parseQuery \"CAT\")" "T:cat")
+    (list "explicit and" "showQ (parseQuery \"cat AND dog\")" "(T:cat & T:dog)")
+    (list
+      "lowercase and keyword"
+      "showQ (parseQuery \"cat and dog\")"
+      "(T:cat & T:dog)")
+    (list "implicit and" "showQ (parseQuery \"cat dog\")" "(T:cat & T:dog)")
+    (list "or" "showQ (parseQuery \"cat OR dog\")" "(T:cat | T:dog)")
+    (list "not" "showQ (parseQuery \"NOT cat\")" "!T:cat")
+    (list
+      "and binds tighter than or"
+      "showQ (parseQuery \"cat AND dog OR bird\")"
+      "((T:cat & T:dog) | T:bird)")
+    (list
+      "or then and"
+      "showQ (parseQuery \"cat OR dog AND bird\")"
+      "(T:cat | (T:dog & T:bird))")
+    (list
+      "parens override precedence"
+      "showQ (parseQuery \"(cat OR dog) AND bird\")"
+      "((T:cat | T:dog) & T:bird)")
+    (list
+      "and with not"
+      "showQ (parseQuery \"cat AND NOT dog\")"
+      "(T:cat & !T:dog)")
+    (list
+      "two-word phrase"
+      "showQ (parseQuery \"\\\"quick brown\\\"\")"
+      "P:quick-brown")
+    (list
+      "three-word phrase"
+      "showQ (parseQuery \"\\\"quick brown fox\\\"\")"
+      "P:quick-brown-fox")
+    (list
+      "and left-assoc"
+      "showQ (parseQuery \"a AND b AND c\")"
+      "((T:a & T:b) & T:c)")
+    (list
+      "or left-assoc"
+      "showQ (parseQuery \"a OR b OR c\")"
+      "((T:a | T:b) | T:c)")
+    (list
+      "punctuation stripped"
+      "showQ (parseQuery \"cat, dog!\")"
+      "(T:cat & T:dog)")))
+
+(define
+  search-cases
+  (list
+    (list "term" "searchQuery \"quick\" idx" (list 1 2))
+    (list
+      "term normalized"
+      "searchQuery \"QUICK\" idx"
+      (list 1 2))
+    (list
+      "explicit and"
+      "searchQuery \"quick AND brown\" idx"
+      (list 1 2))
+    (list
+      "implicit and"
+      "searchQuery \"quick brown\" idx"
+      (list 1 2))
+    (list "and disjoint" "searchQuery \"the AND fox\" idx" (list))
+    (list "or" "searchQuery \"fox OR barks\" idx" (list 2 3))
+    (list "not" "searchQuery \"NOT the\" idx" (list 2))
+    (list "and not" "searchQuery \"quick AND NOT the\" idx" (list 2))
+    (list
+      "precedence and-or"
+      "searchQuery \"the AND dog OR fox\" idx"
+      (list 1 2 3))
+    (list
+      "precedence or-and"
+      "searchQuery \"fox OR the AND dog\" idx"
+      (list 1 2 3))
+    (list
+      "parens"
+      "searchQuery \"the AND (dog OR fox)\" idx"
+      (list 1 3))
+    (list
+      "phrase"
+      "searchQuery \"\\\"quick brown\\\"\" idx"
+      (list 1 2))
+    (list
+      "phrase one doc"
+      "searchQuery \"\\\"brown dog\\\"\" idx"
+      (list 1))
+    (list
+      "phrase and term"
+      "searchQuery \"\\\"quick brown\\\" AND dog\" idx"
+      (list 1))
+    (list
+      "not phrase"
+      "searchQuery \"NOT \\\"quick brown\\\"\" idx"
+      (list 3))
+    (list
+      "implicit and terms"
+      "searchQuery \"dog barks\" idx"
+      (list 3))))
+
+(define
+  ast-results
+  (search-batch "" (map (fn (c) (nth c 1)) ast-cases)))
+(define
+  search-results
+  (search-batch
+    parse-corpus
+    (map (fn (c) (nth c 1)) search-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test
+      (str "ast: " (nth c 0))
+      (nth ast-results i)
+      (nth c 2)))
+  ast-cases)
+(map-indexed
+  (fn
+    (i c)
+    (hk-test
+      (str "search: " (nth c 0))
+      (nth search-results i)
+      (nth c 2)))
+  search-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/prefix.sx
+++ b/lib/search/tests/prefix.sx
@@ -0,0 +1,63 @@
+;; Extension — prefix / wildcard queries.
+;; Corpus: 1 "alpha alpine"  2 "beta apple"  3 "banana alpha"
+;; allTerms sorted: alpha alpine apple banana beta
+
+(define
+  prefix-setup
+  "idx = indexDoc 3 \"banana alpha\" (indexDoc 2 \"beta apple\" (indexDoc 1 \"alpha alpine\" emptyIndex))\n")
+
+(define
+  prefix-cases
+  (list
+    (list
+      "prefix terms two matches"
+      "prefixTerms \"al\" idx"
+      (list "alpha" "alpine"))
+    (list
+      "prefix terms narrower"
+      "prefixTerms \"alp\" idx"
+      (list "alpha" "alpine"))
+    (list
+      "prefix terms wide"
+      "prefixTerms \"a\" idx"
+      (list "alpha" "alpine" "apple"))
+    (list "prefix terms single" "prefixTerms \"ban\" idx" (list "banana"))
+    (list "prefix terms exact term" "prefixTerms \"beta\" idx" (list "beta"))
+    (list "prefix terms none" "prefixTerms \"z\" idx" (list))
+    (list
+      "prefix docs union"
+      "prefixDocs \"al\" idx"
+      (list 1 3))
+    (list "prefix docs single term" "prefixDocs \"ban\" idx" (list 3))
+    (list
+      "prefix docs wide"
+      "prefixDocs \"a\" idx"
+      (list 1 2 3))
+    (list "prefix docs none" "prefixDocs \"z\" idx" (list))
+    (list
+      "prefix docs exact"
+      "prefixDocs \"alpha\" idx"
+      (list 1 3))
+    (list
+      "prefix rank ranks by matched terms"
+      "prefixRankTfIdf \"al\" idx"
+      (list 1 3))
+    (list
+      "prefix rank single doc"
+      "prefixRankTfIdf \"ban\" idx"
+      (list 3))
+    (list "prefix rank empty" "prefixRankTfIdf \"z\" idx" (list))))
+
+(define
+  prefix-results
+  (search-batch
+    prefix-setup
+    (map (fn (c) (nth c 1)) prefix-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth prefix-results i) (nth c 2)))
+  prefix-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/rank.sx
+++ b/lib/search/tests/rank.sx
@@ -0,0 +1,90 @@
+;; Phase 3 — ranking (TF-IDF, BM25, top-N). Deterministic: ties broken by DocId.
+;; Corpora:
+;;   idx1: 1 "alpha alpha alpha gamma"  2 "alpha"  3 "beta"
+;;   idx2: 1 "cat"  2 "cat cat dog elephant frog grape"  3 "zzz"
+;;   idx3: 1 "kite"  2 "kite"   (identical docs -> tiebreak)
+
+(define
+  rank-setup
+  "idx1 = indexDoc 3 \"beta\" (indexDoc 2 \"alpha\" (indexDoc 1 \"alpha alpha alpha gamma\" emptyIndex))\nidx2 = indexDoc 3 \"zzz\" (indexDoc 2 \"cat cat dog elephant frog grape\" (indexDoc 1 \"cat\" emptyIndex))\nidx3 = indexDoc 2 \"kite\" (indexDoc 1 \"kite\" emptyIndex)\n")
+
+(define
+  rank-cases
+  (list
+    (list
+      "tfidf tf ordering"
+      "rankTfIdf [\"alpha\"] idx1"
+      (list 1 2))
+    (list
+      "tfidf rare term boosts"
+      "rankTfIdf [\"alpha\", \"beta\"] idx1"
+      (list 1 3 2))
+    (list
+      "tfidf single-doc term"
+      "rankTfIdf [\"gamma\"] idx1"
+      (list 1))
+    (list "tfidf absent term empty" "rankTfIdf [\"nope\"] idx1" (list))
+    (list "tfidf empty query empty" "rankTfIdf [] idx1" (list))
+    (list
+      "tfidf candidate union tie by docid"
+      "rankTfIdf [\"beta\", \"gamma\"] idx1"
+      (list 1 3))
+    (list
+      "tfidf tf ordering idx2"
+      "rankTfIdf [\"cat\"] idx2"
+      (list 2 1))
+    (list "topN tfidf 1" "topNTfIdf 1 [\"alpha\"] idx1" (list 1))
+    (list
+      "topN tfidf 2"
+      "topNTfIdf 2 [\"alpha\", \"beta\"] idx1"
+      (list 1 3))
+    (list
+      "topN exceeds results"
+      "topNTfIdf 10 [\"gamma\"] idx1"
+      (list 1))
+    (list "topN zero" "topNTfIdf 0 [\"alpha\"] idx1" (list))
+    (list
+      "bm25 tf+length flips tfidf"
+      "rankBm25 1.5 0.75 [\"cat\"] idx2"
+      (list 1 2))
+    (list
+      "bm25 b=0 ignores length"
+      "rankBm25 1.5 0.0 [\"cat\"] idx2"
+      (list 2 1))
+    (list
+      "bm25 alpha idx1"
+      "rankBm25 1.5 0.75 [\"alpha\"] idx1"
+      (list 1 2))
+    (list "bm25 absent empty" "rankBm25 1.5 0.75 [\"nope\"] idx1" (list))
+    (list
+      "bm25 single-doc term"
+      "rankBm25 1.5 0.75 [\"gamma\"] idx1"
+      (list 1))
+    (list "bm25 topN 1" "topNBm25 1 1.5 0.75 [\"cat\"] idx2" (list 1))
+    (list
+      "bm25 same candidate set"
+      "sort (rankBm25 1.5 0.75 [\"alpha\", \"beta\"] idx1)"
+      (list 1 2 3))
+    (list
+      "tfidf stable tiebreak"
+      "rankTfIdf [\"kite\"] idx3"
+      (list 1 2))
+    (list
+      "bm25 stable tiebreak"
+      "rankBm25 1.5 0.75 [\"kite\"] idx3"
+      (list 1 2))
+    (list "numDocs" "[numDocs idx1]" (list 3))
+    (list "docLen counts tokens" "[docLen 1 idx1]" (list 4))
+    (list "docFreq via index" "[docFreq \"alpha\" idx1]" (list 2))))
+
+(define
+  rank-results
+  (search-batch rank-setup (map (fn (c) (nth c 1)) rank-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth rank-results i) (nth c 2)))
+  rank-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/rankq.sx
+++ b/lib/search/tests/rankq.sx
@@ -0,0 +1,67 @@
+;; Extension — boolean-filtered ranked search (filter then rank by relevance).
+;; Corpus:
+;;   1 "apple apple banana"   apple2 banana1
+;;   2 "apple cherry"         apple1 cherry1
+;;   3 "banana cherry"        banana1 cherry1
+;;   4 "apple banana cherry"  apple1 banana1 cherry1
+
+(define
+  rankq-setup
+  "idx = indexDoc 4 \"apple banana cherry\" (indexDoc 3 \"banana cherry\" (indexDoc 2 \"apple cherry\" (indexDoc 1 \"apple apple banana\" emptyIndex)))\n")
+
+(define
+  rankq-cases
+  (list
+    (list
+      "queryTerms and"
+      "queryTerms (parseQuery \"apple AND banana\")"
+      (list "apple" "banana"))
+    (list
+      "queryTerms or not"
+      "queryTerms (parseQuery \"a OR NOT b\")"
+      (list "a" "b"))
+    (list
+      "queryTerms phrase"
+      "queryTerms (parseQuery \"\\\"x y\\\" OR z\")"
+      (list "x" "y" "z"))
+    (list
+      "and filter ranked by tf"
+      "searchRankTfIdf \"apple AND banana\" idx"
+      (list 1 4))
+    (list
+      "single term ranked tie"
+      "searchRankTfIdf \"cherry\" idx"
+      (list 2 3 4))
+    (list
+      "or filter ranked"
+      "searchRankTfIdf \"apple OR banana\" idx"
+      (list 1 4 2 3))
+    (list
+      "and-not narrows then ranks"
+      "searchRankTfIdf \"apple AND NOT banana\" idx"
+      (list 2))
+    (list
+      "phrase filter ranked"
+      "searchRankTfIdf \"\\\"apple banana\\\"\" idx"
+      (list 1 4))
+    (list "no matches" "searchRankTfIdf \"zzz\" idx" (list))
+    (list
+      "bm25 boolean ranked subset"
+      "sort (searchRankBm25 1.5 0.75 \"apple OR banana\" idx)"
+      (list 1 2 3 4))
+    (list
+      "bm25 and filter"
+      "searchRankBm25 1.5 0.75 \"apple AND NOT banana\" idx"
+      (list 2))))
+
+(define
+  rankq-results
+  (search-batch rankq-setup (map (fn (c) (nth c 1)) rankq-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth rankq-results i) (nth c 2)))
+  rankq-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/stem.sx
+++ b/lib/search/tests/stem.sx
@@ -0,0 +1,47 @@
+;; Extension — stemming (suffix stripping). Scalar string results wrapped in [].
+
+(define
+  stem-cases
+  (list
+    (list "stem plural s" "[stem \"cats\"]" (list "cat"))
+    (list "stem plural dogs" "[stem \"dogs\"]" (list "dog"))
+    (list "stem keeps ss" "[stem \"pass\"]" (list "pass"))
+    (list "stem short s unchanged" "[stem \"is\"]" (list "is"))
+    (list "stem es boxes" "[stem \"boxes\"]" (list "box"))
+    (list "stem es wishes" "[stem \"wishes\"]" (list "wish"))
+    (list "stem ies cities" "[stem \"cities\"]" (list "city"))
+    (list "stem ies parties" "[stem \"parties\"]" (list "party"))
+    (list "stem ing jumping" "[stem \"jumping\"]" (list "jump"))
+    (list "stem ing running literal" "[stem \"running\"]" (list "runn"))
+    (list "stem ed jumped" "[stem \"jumped\"]" (list "jump"))
+    (list "stem ed wanted" "[stem \"wanted\"]" (list "want"))
+    (list "stem short ed unchanged" "[stem \"red\"]" (list "red"))
+    (list "stem no suffix" "[stem \"cat\"]" (list "cat"))
+    (list
+      "stemText normalizes and stems"
+      "[stemText \"Cats Running!\"]"
+      (list "cat runn"))
+    (list
+      "stemTokens list"
+      "stemTokens \"boxes and cats\""
+      (list "box" "and" "cat"))
+    (list
+      "indexStemmed unifies plural"
+      "map fst (lookupTerm \"cat\" (indexStemmed 2 \"a cat\" (indexStemmed 1 \"the cats\" emptyIndex)))"
+      (list 1 2))
+    (list
+      "indexStemmed stem query"
+      "map fst (lookupTerm (stem \"boxes\") (indexStemmed 1 \"many boxes\" emptyIndex))"
+      (list 1))))
+
+(define
+  stem-results
+  (search-batch "" (map (fn (c) (nth c 1)) stem-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth stem-results i) (nth c 2)))
+  stem-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/suggest.sx
+++ b/lib/search/tests/suggest.sx
@@ -0,0 +1,42 @@
+;; Extension — did-you-mean / spelling suggestion.
+;; Corpus terms (sorted): ample apple apply banana orange
+
+(define
+  suggest-setup
+  "idx = indexDoc 1 \"apple apply ample banana orange\" emptyIndex\n")
+
+(define
+  suggest-cases
+  (list
+    (list "suggest exact term" "[suggest \"apple\" idx]" (list "apple"))
+    (list
+      "suggest misspelled banana"
+      "[suggest \"bananna\" idx]"
+      (list "banana"))
+    (list
+      "suggest missing letter orange"
+      "[suggest \"orang\" idx]"
+      (list "orange"))
+    (list "suggest closest apply" "[suggest \"aply\" idx]" (list "apply"))
+    (list "suggestN 1 banana" "suggestN 1 \"bananna\" idx" (list "banana"))
+    (list
+      "suggestN 2 ties alpha"
+      "suggestN 2 \"aple\" idx"
+      (list "ample" "apple"))
+    (list "suggest empty term shortest" "[suggest \"\" idx]" (list "ample"))
+    (list "suggest empty index" "[suggest \"apple\" emptyIndex]" (list ""))
+    (list "suggestN empty index" "suggestN 1 \"apple\" emptyIndex" (list))))
+
+(define
+  suggest-results
+  (search-batch
+    suggest-setup
+    (map (fn (c) (nth c 1)) suggest-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth suggest-results i) (nth c 2)))
+  suggest-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tests/syn.sx
+++ b/lib/search/tests/syn.sx
@@ -0,0 +1,53 @@
+;; Extension — synonym / query expansion.
+;; synmap: car -> automobile, vehicle ; big -> large
+;; Corpus: 1 "fast car"  2 "shiny automobile"  3 "big truck"  4 "large house"  5 "vehicle review"
+
+(define
+  syn-setup
+  "synmap = [(\"car\", [\"automobile\", \"vehicle\"]), (\"big\", [\"large\"])]\nidx = indexDoc 5 \"vehicle review\" (indexDoc 4 \"large house\" (indexDoc 3 \"big truck\" (indexDoc 2 \"shiny automobile\" (indexDoc 1 \"fast car\" emptyIndex))))\n")
+
+(define
+  syn-cases
+  (list
+    (list
+      "expand term with synonyms"
+      "expandTerm synmap \"car\""
+      (list "car" "automobile" "vehicle"))
+    (list
+      "expand single synonym"
+      "expandTerm synmap \"big\""
+      (list "big" "large"))
+    (list "expand unknown term" "expandTerm synmap \"banana\"" (list "banana"))
+    (list
+      "syn docs union"
+      "synDocs synmap \"car\" idx"
+      (list 1 2 5))
+    (list
+      "syn docs single synonym"
+      "synDocs synmap \"big\" idx"
+      (list 3 4))
+    (list
+      "syn docs no synonyms"
+      "synDocs synmap \"house\" idx"
+      (list 4))
+    (list "syn docs absent" "synDocs synmap \"plane\" idx" (list))
+    (list
+      "syn rank expanded"
+      "synRankTfIdf synmap \"car\" idx"
+      (list 1 2 5))
+    (list
+      "syn rank single"
+      "synRankTfIdf synmap \"big\" idx"
+      (list 3 4))))
+
+(define
+  syn-results
+  (search-batch syn-setup (map (fn (c) (nth c 1)) syn-cases)))
+
+(map-indexed
+  (fn
+    (i c)
+    (hk-test (nth c 0) (nth syn-results i) (nth c 2)))
+  syn-cases)
+
+{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}
--- a/lib/search/tokenize.sx
+++ b/lib/search/tokenize.sx
@@ -0,0 +1,8 @@
+;; search tokenizer — Haskell source fragment.
+;; normalize (lowercase + strip punctuation), split on whitespace, attach positions.
+;; tokens :: String -> [String]
+;; positioned :: String -> [(String, Int)]  -- 0-based ordinal positions
+
+(define
+  search/tokenize-src
+  "lowerChar c = chr (toLower (ord c))\nnormChar c = if isAlphaNum c then lowerChar c else ' '\nisBlankCh c = c == ' '\ndropBlanks [] = []\ndropBlanks (c:cs) = if isBlankCh c then dropBlanks cs else c:cs\ntakeWord [] = []\ntakeWord (c:cs) = if isBlankCh c then [] else c : takeWord cs\nafterWord [] = []\nafterWord (c:cs) = if isBlankCh c then c:cs else afterWord cs\nsplitWords s = let s2 = dropBlanks s in if null s2 then [] else takeWord s2 : splitWords (afterWord s2)\nappendStr a b = a ++ b\njoinChars cs = foldr appendStr \"\" cs\ntokens s = map joinChars (splitWords (map normChar s))\nposFrom i [] = []\nposFrom i (x:xs) = (x, i) : posFrom (i + 1) xs\npositioned s = posFrom 0 (tokens s)\n")