;; search inverted index — Haskell source fragment (depends on tokenize). ;; Index = [(Term, [(DocId, [Pos])])], sorted by Term; postings sorted by DocId. ;; Data.Map's public API lacks toList/keys/map/filter, so a sorted assoc-list ;; index is used — it is the conceptual `Map Term [(DocId,[Pos])]` and exposes ;; term iteration (allTerms) and df naturally for ranking. ;; emptyIndex :: Index ;; indexDoc :: DocId -> String -> Index -> Index (re-index replaces) ;; lookupTerm :: Term -> Index -> [(DocId, [Pos])] ;; deleteDoc :: DocId -> Index -> Index ;; docFreq :: Term -> Index -> Int ;; allTerms :: Index -> [Term] (define search/index-src "emptyIndex = []\ngroupBump [] t p = [(t, [p])]\ngroupBump (g:gs) t p = if fst g == t then (t, snd g ++ [p]) : gs else g : groupBump gs t p\ngroupStep acc tp = groupBump acc (fst tp) (snd tp)\ngroupTok pairs = foldl groupStep [] pairs\ninsPosting d ps [] = [(d, ps)]\ninsPosting d ps (q:qs) = if d < fst q then (d, ps) : q : qs else if d == fst q then (d, ps) : qs else q : insPosting d ps qs\ninsTerm t d ps [] = [(t, [(d, ps)])]\ninsTerm t d ps (e:es) = if t < fst e then (t, [(d, ps)]) : e : es else if t == fst e then (fst e, insPosting d ps (snd e)) : es else e : insTerm t d ps es\nindexStep d ix tp = insTerm (fst tp) d (snd tp) ix\nindexDoc d text idx = foldl (indexStep d) idx (groupTok (positioned text))\nlookupTerm t idx = case lookup t idx of { Nothing -> []; Just pl -> pl }\ndocFreq t idx = length (lookupTerm t idx)\nallTerms idx = map fst idx\npostingKeep d q = fst q /= d\ndropTermDoc d e = (fst e, filter (postingKeep d) (snd e))\nplKeep e = not (null (snd e))\ndeleteDoc d idx = filter plKeep (map (dropTermDoc d) idx)\n")