Fix cache listing to include files from new structure

- Update list_all() to scan cache_dir for legacy files directly
  (old files stored as CACHE_DIR/{hash}, not CACHE_DIR/legacy/)
- Update cache listing endpoints to use cache_manager.list_all()
  instead of iterating CACHE_DIR.iterdir() directly
- This ensures uploaded files appear in the cache UI regardless
  of whether they're in the old or new cache structure

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-08 01:37:17 +00:00
parent a97c6309d5
commit 034c7542c4
2 changed files with 88 additions and 69 deletions

View File

@@ -278,12 +278,30 @@ class L1CacheManager:
def list_all(self) -> List[CachedFile]: def list_all(self) -> List[CachedFile]:
"""List all cached files.""" """List all cached files."""
files = [] files = []
seen_hashes = set()
# New cache structure entries
for entry in self.cache.list_entries(): for entry in self.cache.list_entries():
files.append(CachedFile.from_cache_entry(entry)) files.append(CachedFile.from_cache_entry(entry))
if entry.content_hash:
seen_hashes.add(entry.content_hash)
# Legacy files stored directly in cache_dir (old structure)
# These are files named by content_hash directly in CACHE_DIR
for f in self.cache_dir.iterdir():
# Skip directories and special files
if not f.is_file():
continue
# Skip metadata/auxiliary files
if f.suffix in ('.json', '.mp4'):
continue
# Skip if name doesn't look like a hash (64 hex chars)
if len(f.name) != 64 or not all(c in '0123456789abcdef' for c in f.name):
continue
# Skip if already seen via new cache
if f.name in seen_hashes:
continue
# Include legacy files
for f in self.legacy_dir.iterdir():
if f.is_file():
files.append(CachedFile( files.append(CachedFile(
node_id=f.name, node_id=f.name,
content_hash=f.name, content_hash=f.name,
@@ -292,6 +310,7 @@ class L1CacheManager:
node_type="legacy", node_type="legacy",
created_at=f.stat().st_mtime, created_at=f.stat().st_mtime,
)) ))
seen_hashes.add(f.name)
return files return files

View File

@@ -1348,13 +1348,14 @@ async def list_cache(
# Get hashes owned by/associated with this user # Get hashes owned by/associated with this user
user_hashes = get_user_cache_hashes(current_user) user_hashes = get_user_cache_hashes(current_user)
# Get cache items that belong to the user # Get cache items that belong to the user (from cache_manager)
cache_items = [] cache_items = []
if CACHE_DIR.exists(): for cached_file in cache_manager.list_all():
for f in CACHE_DIR.iterdir(): content_hash = cached_file.content_hash
if f.is_file() and not f.name.endswith('.provenance.json') and not f.name.endswith('.meta.json') and not f.name.endswith('.mp4'): if content_hash not in user_hashes:
if f.name in user_hashes: continue
meta = load_cache_meta(f.name)
meta = load_cache_meta(content_hash)
# Apply folder filter # Apply folder filter
if folder: if folder:
@@ -1374,11 +1375,10 @@ async def list_cache(
if tag not in meta.get("tags", []): if tag not in meta.get("tags", []):
continue continue
stat = f.stat()
cache_items.append({ cache_items.append({
"hash": f.name, "hash": content_hash,
"size": stat.st_size, "size": cached_file.size_bytes,
"mtime": stat.st_mtime, "mtime": cached_file.created_at,
"meta": meta "meta": meta
}) })
@@ -1485,7 +1485,7 @@ async def list_cache(
return HTMLResponse(render_page("Cache", content, current_user, active_tab="cache")) return HTMLResponse(render_page("Cache", content, current_user, active_tab="cache"))
# JSON response for APIs - list all hashes with optional pagination # JSON response for APIs - list all hashes with optional pagination
all_hashes = [f.name for f in CACHE_DIR.iterdir() if f.is_file() and not f.name.endswith('.provenance.json') and not f.name.endswith('.meta.json') and not f.name.endswith('.mp4')] all_hashes = [cf.content_hash for cf in cache_manager.list_all()]
total = len(all_hashes) total = len(all_hashes)
start = (page - 1) * limit start = (page - 1) * limit
end = start + limit end = start + limit
@@ -2620,14 +2620,15 @@ async def ui_cache_list(
# Get hashes owned by/associated with this user # Get hashes owned by/associated with this user
user_hashes = get_user_cache_hashes(current_user) user_hashes = get_user_cache_hashes(current_user)
# Get cache items that belong to the user # Get cache items that belong to the user (from cache_manager)
cache_items = [] cache_items = []
if CACHE_DIR.exists(): for cached_file in cache_manager.list_all():
for f in CACHE_DIR.iterdir(): content_hash = cached_file.content_hash
if f.is_file() and not f.name.endswith('.provenance.json') and not f.name.endswith('.meta.json') and not f.name.endswith('.mp4'): if content_hash not in user_hashes:
if f.name in user_hashes: continue
# Load metadata for filtering # Load metadata for filtering
meta = load_cache_meta(f.name) meta = load_cache_meta(content_hash)
# Apply folder filter # Apply folder filter
if folder: if folder:
@@ -2647,11 +2648,10 @@ async def ui_cache_list(
if tag not in meta.get("tags", []): if tag not in meta.get("tags", []):
continue continue
stat = f.stat()
cache_items.append({ cache_items.append({
"hash": f.name, "hash": content_hash,
"size": stat.st_size, "size": cached_file.size_bytes,
"mtime": stat.st_mtime, "mtime": cached_file.created_at,
"meta": meta "meta": meta
}) })