From 034c7542c4c8440f2a1e8f3dc808aeb8d8214b4b Mon Sep 17 00:00:00 2001
From: gilesb <giles.bradshaw@sigyl.com>
Date: Thu, 8 Jan 2026 01:37:17 +0000
Subject: [PATCH] Fix cache listing to include files from new structure

- Update list_all() to scan cache_dir for legacy files directly
  (old files stored as CACHE_DIR/{hash}, not CACHE_DIR/legacy/)
- Update cache listing endpoints to use cache_manager.list_all()
  instead of iterating CACHE_DIR.iterdir() directly
- This ensures uploaded files appear in the cache UI regardless
  of whether they're in the old or new cache structure

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 cache_manager.py |  41 ++++++++++++-----
 server.py        | 116 +++++++++++++++++++++++------------------------
 2 files changed, 88 insertions(+), 69 deletions(-)

diff --git a/cache_manager.py b/cache_manager.py
index 24c798a..51b8f8a 100644
--- a/cache_manager.py
+++ b/cache_manager.py
@@ -278,20 +278,39 @@ class L1CacheManager:
     def list_all(self) -> List[CachedFile]:
         """List all cached files."""
         files = []
+        seen_hashes = set()
+
+        # New cache structure entries
         for entry in self.cache.list_entries():
             files.append(CachedFile.from_cache_entry(entry))
+            if entry.content_hash:
+                seen_hashes.add(entry.content_hash)
 
-        # Include legacy files
-        for f in self.legacy_dir.iterdir():
-            if f.is_file():
-                files.append(CachedFile(
-                    node_id=f.name,
-                    content_hash=f.name,
-                    path=f,
-                    size_bytes=f.stat().st_size,
-                    node_type="legacy",
-                    created_at=f.stat().st_mtime,
-                ))
+        # Legacy files stored directly in cache_dir (old structure)
+        # These are files named by content_hash directly in CACHE_DIR
+        for f in self.cache_dir.iterdir():
+            # Skip directories and special files
+            if not f.is_file():
+                continue
+            # Skip metadata/auxiliary files
+            if f.suffix in ('.json', '.mp4'):
+                continue
+            # Skip if name doesn't look like a hash (64 hex chars)
+            if len(f.name) != 64 or not all(c in '0123456789abcdef' for c in f.name):
+                continue
+            # Skip if already seen via new cache
+            if f.name in seen_hashes:
+                continue
+
+            files.append(CachedFile(
+                node_id=f.name,
+                content_hash=f.name,
+                path=f,
+                size_bytes=f.stat().st_size,
+                node_type="legacy",
+                created_at=f.stat().st_mtime,
+            ))
+            seen_hashes.add(f.name)
 
         return files
 
diff --git a/server.py b/server.py
index ef227b7..dab6bd4 100644
--- a/server.py
+++ b/server.py
@@ -1348,39 +1348,39 @@ async def list_cache(
         # Get hashes owned by/associated with this user
         user_hashes = get_user_cache_hashes(current_user)
 
-        # Get cache items that belong to the user
+        # Get cache items that belong to the user (from cache_manager)
         cache_items = []
-        if CACHE_DIR.exists():
-            for f in CACHE_DIR.iterdir():
-                if f.is_file() and not f.name.endswith('.provenance.json') and not f.name.endswith('.meta.json') and not f.name.endswith('.mp4'):
-                    if f.name in user_hashes:
-                        meta = load_cache_meta(f.name)
+        for cached_file in cache_manager.list_all():
+            content_hash = cached_file.content_hash
+            if content_hash not in user_hashes:
+                continue
 
-                        # Apply folder filter
-                        if folder:
-                            item_folder = meta.get("folder", "/")
-                            if folder != "/" and not item_folder.startswith(folder):
-                                continue
-                            if folder == "/" and item_folder != "/":
-                                continue
+            meta = load_cache_meta(content_hash)
 
-                        # Apply collection filter
-                        if collection:
-                            if collection not in meta.get("collections", []):
-                                continue
+            # Apply folder filter
+            if folder:
+                item_folder = meta.get("folder", "/")
+                if folder != "/" and not item_folder.startswith(folder):
+                    continue
+                if folder == "/" and item_folder != "/":
+                    continue
 
-                        # Apply tag filter
-                        if tag:
-                            if tag not in meta.get("tags", []):
-                                continue
+            # Apply collection filter
+            if collection:
+                if collection not in meta.get("collections", []):
+                    continue
 
-                        stat = f.stat()
-                        cache_items.append({
-                            "hash": f.name,
-                            "size": stat.st_size,
-                            "mtime": stat.st_mtime,
-                            "meta": meta
-                        })
+            # Apply tag filter
+            if tag:
+                if tag not in meta.get("tags", []):
+                    continue
+
+            cache_items.append({
+                "hash": content_hash,
+                "size": cached_file.size_bytes,
+                "mtime": cached_file.created_at,
+                "meta": meta
+            })
 
         # Sort by modification time (newest first)
         cache_items.sort(key=lambda x: x["mtime"], reverse=True)
@@ -1485,7 +1485,7 @@ async def list_cache(
         return HTMLResponse(render_page("Cache", content, current_user, active_tab="cache"))
 
     # JSON response for APIs - list all hashes with optional pagination
-    all_hashes = [f.name for f in CACHE_DIR.iterdir() if f.is_file() and not f.name.endswith('.provenance.json') and not f.name.endswith('.meta.json') and not f.name.endswith('.mp4')]
+    all_hashes = [cf.content_hash for cf in cache_manager.list_all()]
     total = len(all_hashes)
     start = (page - 1) * limit
     end = start + limit
@@ -2620,40 +2620,40 @@ async def ui_cache_list(
     # Get hashes owned by/associated with this user
     user_hashes = get_user_cache_hashes(current_user)
 
-    # Get cache items that belong to the user
+    # Get cache items that belong to the user (from cache_manager)
     cache_items = []
-    if CACHE_DIR.exists():
-        for f in CACHE_DIR.iterdir():
-            if f.is_file() and not f.name.endswith('.provenance.json') and not f.name.endswith('.meta.json') and not f.name.endswith('.mp4'):
-                if f.name in user_hashes:
-                    # Load metadata for filtering
-                    meta = load_cache_meta(f.name)
+    for cached_file in cache_manager.list_all():
+        content_hash = cached_file.content_hash
+        if content_hash not in user_hashes:
+            continue
 
-                    # Apply folder filter
-                    if folder:
-                        item_folder = meta.get("folder", "/")
-                        if folder != "/" and not item_folder.startswith(folder):
-                            continue
-                        if folder == "/" and item_folder != "/":
-                            continue
+        # Load metadata for filtering
+        meta = load_cache_meta(content_hash)
 
-                    # Apply collection filter
-                    if collection:
-                        if collection not in meta.get("collections", []):
-                            continue
+        # Apply folder filter
+        if folder:
+            item_folder = meta.get("folder", "/")
+            if folder != "/" and not item_folder.startswith(folder):
+                continue
+            if folder == "/" and item_folder != "/":
+                continue
 
-                    # Apply tag filter
-                    if tag:
-                        if tag not in meta.get("tags", []):
-                            continue
+        # Apply collection filter
+        if collection:
+            if collection not in meta.get("collections", []):
+                continue
 
-                    stat = f.stat()
-                    cache_items.append({
-                        "hash": f.name,
-                        "size": stat.st_size,
-                        "mtime": stat.st_mtime,
-                        "meta": meta
-                    })
+        # Apply tag filter
+        if tag:
+            if tag not in meta.get("tags", []):
+                continue
+
+        cache_items.append({
+            "hash": content_hash,
+            "size": cached_file.size_bytes,
+            "mtime": cached_file.created_at,
+            "meta": meta
+        })
 
     # Sort by modification time (newest first)
     cache_items.sort(key=lambda x: x["mtime"], reverse=True)