Fix cache listing to include files from new structure

- Update list_all() to scan cache_dir for legacy files directly (old files stored as CACHE_DIR/{hash}, not CACHE_DIR/legacy/) - Update cache listing endpoints to use cache_manager.list_all() instead of iterating CACHE_DIR.iterdir() directly - This ensures uploaded files appear in the cache UI regardless of whether they're in the old or new cache structure Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-08 01:37:17 +00:00
parent a97c6309d5
commit 034c7542c4
2 changed files with 88 additions and 69 deletions
--- a/cache_manager.py
+++ b/cache_manager.py
@@ -278,20 +278,39 @@ class L1CacheManager:
    def list_all(self) -> List[CachedFile]:
        """List all cached files."""
        files = []
+        seen_hashes = set()
+
+        # New cache structure entries
        for entry in self.cache.list_entries():
            files.append(CachedFile.from_cache_entry(entry))
+            if entry.content_hash:
+                seen_hashes.add(entry.content_hash)

-        # Include legacy files
-        for f in self.legacy_dir.iterdir():
-            if f.is_file():
-                files.append(CachedFile(
-                    node_id=f.name,
-                    content_hash=f.name,
-                    path=f,
-                    size_bytes=f.stat().st_size,
-                    node_type="legacy",
-                    created_at=f.stat().st_mtime,
-                ))
+        # Legacy files stored directly in cache_dir (old structure)
+        # These are files named by content_hash directly in CACHE_DIR
+        for f in self.cache_dir.iterdir():
+            # Skip directories and special files
+            if not f.is_file():
+                continue
+            # Skip metadata/auxiliary files
+            if f.suffix in ('.json', '.mp4'):
+                continue
+            # Skip if name doesn't look like a hash (64 hex chars)
+            if len(f.name) != 64 or not all(c in '0123456789abcdef' for c in f.name):
+                continue
+            # Skip if already seen via new cache
+            if f.name in seen_hashes:
+                continue
+
+            files.append(CachedFile(
+                node_id=f.name,
+                content_hash=f.name,
+                path=f,
+                size_bytes=f.stat().st_size,
+                node_type="legacy",
+                created_at=f.stat().st_mtime,
+            ))
+            seen_hashes.add(f.name)

        return files

--- a/server.py
+++ b/server.py
@@ -1348,39 +1348,39 @@ async def list_cache(
        # Get hashes owned by/associated with this user
        user_hashes = get_user_cache_hashes(current_user)

-        # Get cache items that belong to the user
+        # Get cache items that belong to the user (from cache_manager)
        cache_items = []
-        if CACHE_DIR.exists():
-            for f in CACHE_DIR.iterdir():
-                if f.is_file() and not f.name.endswith('.provenance.json') and not f.name.endswith('.meta.json') and not f.name.endswith('.mp4'):
-                    if f.name in user_hashes:
-                        meta = load_cache_meta(f.name)
+        for cached_file in cache_manager.list_all():
+            content_hash = cached_file.content_hash
+            if content_hash not in user_hashes:
+                continue

-                        # Apply folder filter
-                        if folder:
-                            item_folder = meta.get("folder", "/")
-                            if folder != "/" and not item_folder.startswith(folder):
-                                continue
-                            if folder == "/" and item_folder != "/":
-                                continue
+            meta = load_cache_meta(content_hash)

-                        # Apply collection filter
-                        if collection:
-                            if collection not in meta.get("collections", []):
-                                continue
+            # Apply folder filter
+            if folder:
+                item_folder = meta.get("folder", "/")
+                if folder != "/" and not item_folder.startswith(folder):
+                    continue
+                if folder == "/" and item_folder != "/":
+                    continue

-                        # Apply tag filter
-                        if tag:
-                            if tag not in meta.get("tags", []):
-                                continue
+            # Apply collection filter
+            if collection:
+                if collection not in meta.get("collections", []):
+                    continue

-                        stat = f.stat()
-                        cache_items.append({
-                            "hash": f.name,
-                            "size": stat.st_size,
-                            "mtime": stat.st_mtime,
-                            "meta": meta
-                        })
+            # Apply tag filter
+            if tag:
+                if tag not in meta.get("tags", []):
+                    continue
+
+            cache_items.append({
+                "hash": content_hash,
+                "size": cached_file.size_bytes,
+                "mtime": cached_file.created_at,
+                "meta": meta
+            })

        # Sort by modification time (newest first)
        cache_items.sort(key=lambda x: x["mtime"], reverse=True)
@@ -1485,7 +1485,7 @@ async def list_cache(
        return HTMLResponse(render_page("Cache", content, current_user, active_tab="cache"))

    # JSON response for APIs - list all hashes with optional pagination
-    all_hashes = [f.name for f in CACHE_DIR.iterdir() if f.is_file() and not f.name.endswith('.provenance.json') and not f.name.endswith('.meta.json') and not f.name.endswith('.mp4')]
+    all_hashes = [cf.content_hash for cf in cache_manager.list_all()]
    total = len(all_hashes)
    start = (page - 1) * limit
    end = start + limit
@@ -2620,40 +2620,40 @@ async def ui_cache_list(
    # Get hashes owned by/associated with this user
    user_hashes = get_user_cache_hashes(current_user)

-    # Get cache items that belong to the user
+    # Get cache items that belong to the user (from cache_manager)
    cache_items = []
-    if CACHE_DIR.exists():
-        for f in CACHE_DIR.iterdir():
-            if f.is_file() and not f.name.endswith('.provenance.json') and not f.name.endswith('.meta.json') and not f.name.endswith('.mp4'):
-                if f.name in user_hashes:
-                    # Load metadata for filtering
-                    meta = load_cache_meta(f.name)
+    for cached_file in cache_manager.list_all():
+        content_hash = cached_file.content_hash
+        if content_hash not in user_hashes:
+            continue

-                    # Apply folder filter
-                    if folder:
-                        item_folder = meta.get("folder", "/")
-                        if folder != "/" and not item_folder.startswith(folder):
-                            continue
-                        if folder == "/" and item_folder != "/":
-                            continue
+        # Load metadata for filtering
+        meta = load_cache_meta(content_hash)

-                    # Apply collection filter
-                    if collection:
-                        if collection not in meta.get("collections", []):
-                            continue
+        # Apply folder filter
+        if folder:
+            item_folder = meta.get("folder", "/")
+            if folder != "/" and not item_folder.startswith(folder):
+                continue
+            if folder == "/" and item_folder != "/":
+                continue

-                    # Apply tag filter
-                    if tag:
-                        if tag not in meta.get("tags", []):
-                            continue
+        # Apply collection filter
+        if collection:
+            if collection not in meta.get("collections", []):
+                continue

-                    stat = f.stat()
-                    cache_items.append({
-                        "hash": f.name,
-                        "size": stat.st_size,
-                        "mtime": stat.st_mtime,
-                        "meta": meta
-                    })
+        # Apply tag filter
+        if tag:
+            if tag not in meta.get("tags", []):
+                continue
+
+        cache_items.append({
+            "hash": content_hash,
+            "size": cached_file.size_bytes,
+            "mtime": cached_file.created_at,
+            "meta": meta
+        })

    # Sort by modification time (newest first)
    cache_items.sort(key=lambda x: x["mtime"], reverse=True)