search: Phase 4 federation merge + ACL post-filter + 21 tests
Some checks failed
Test, Build, and Deploy / test-build-deploy (push) Failing after 39s

fedIndex merges per-peer inverted indices (union posting lists per term) after
relabelling local DocIds to global gid = peer*1000 + local — dedupe by
(peer,doc-id) is automatic and positions survive, so ranking runs once over the
merge and interleaves peers by score. ACL is a post-rank filter over an injected
permit predicate (searchTfIdfAcl/topNTfIdfAcl/searchBm25Acl). Roadmap complete,
122/122.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-06-06 20:08:08 +00:00
parent a3f9d4f6c9
commit 77ab827b91
7 changed files with 148 additions and 12 deletions

View File

@@ -0,0 +1,102 @@
;; Phase 4 — federation (merge per-peer indices) + ACL post-filter.
;; Peers (global id = peer*1000 + local):
;; peer 1: 1 "alpha beta" 2 "alpha gamma" -> 1001 1002
;; peer 2: 1 "alpha delta" 2 "beta gamma" -> 2001 2002
;; ACL predicates are injected (viewer baked in by the caller), applied post-rank.
(define
fed-setup
"p1 = indexDoc 2 \"alpha gamma\" (indexDoc 1 \"alpha beta\" emptyIndex)\np2 = indexDoc 2 \"beta gamma\" (indexDoc 1 \"alpha delta\" emptyIndex)\nfed = fedIndex [(1, p1), (2, p2)]\npermitP1 g = g < 2000\npermitNone g = False\npermitList g = elem g [1002, 2001]\n")
(define
fed-cases
(list
(list
"fed merges all docs"
"sort (allDocs fed)"
(list 1001 1002 2001 2002))
(list
"fed docFreq across peers"
"[docFreq \"alpha\" fed]"
(list 3))
(list "fed docFreq beta" "[docFreq \"beta\" fed]" (list 2))
(list "fed numDocs" "[numDocs fed]" (list 4))
(list
"fed term lookup spans peers"
"map fst (lookupTerm \"gamma\" fed)"
(list 1002 2002))
(list
"fed preserves positions"
"lookupTerm \"beta\" fed"
(list
(list 1001 (list 1))
(list 2002 (list 0))))
(list
"fed rank alpha tie by gid"
"rankTfIdf [\"alpha\"] fed"
(list 1001 1002 2001))
(list
"fed rank beta"
"rankTfIdf [\"beta\"] fed"
(list 1001 2002))
(list
"fed boolean and"
"searchQuery \"alpha AND beta\" fed"
(list 1001))
(list
"fed boolean or"
"searchQuery \"delta OR barks\" fed"
(list 2001))
(list
"fed phrase within peer1"
"searchQuery \"\\\"alpha beta\\\"\" fed"
(list 1001))
(list
"fed phrase within peer2"
"searchQuery \"\\\"beta gamma\\\"\" fed"
(list 2002))
(list
"fed phrase peer2 alpha delta"
"searchQuery \"\\\"alpha delta\\\"\" fed"
(list 2001))
(list "fed empty peer list" "allDocs (fedIndex [])" (list))
(list
"fed single relabelled peer"
"rankTfIdf [\"alpha\"] (fedIndex [(5, p1)])"
(list 5001 5002))
(list
"acl peer1 only"
"aclFilter permitP1 (rankTfIdf [\"alpha\"] fed)"
(list 1001 1002))
(list
"acl allowlist preserves rank order"
"aclFilter permitList (rankTfIdf [\"alpha\"] fed)"
(list 1002 2001))
(list
"acl topN after filter"
"topNTfIdfAcl 1 permitP1 [\"alpha\"] fed"
(list 1001))
(list
"acl denies all"
"aclFilter permitNone (rankTfIdf [\"alpha\"] fed)"
(list))
(list
"acl on bm25"
"searchBm25Acl permitP1 1.5 0.75 [\"alpha\"] fed"
(list 1001 1002))
(list
"acl end-to-end tfidf"
"searchTfIdfAcl permitP1 [\"alpha\"] fed"
(list 1001 1002))))
(define
fed-results
(search-batch fed-setup (map (fn (c) (nth c 1)) fed-cases)))
(map-indexed
(fn
(i c)
(hk-test (nth c 0) (nth fed-results i) (nth c 2)))
fed-cases)
{:fail hk-test-fail :pass hk-test-pass :fails hk-test-fails}