From 9c158ff8844ec5eeb1b8e10c6d3fcc31e3218453 Mon Sep 17 00:00:00 2001
From: gilesb <giles.bradshaw@sigyl.com>
Date: Fri, 9 Jan 2026 10:17:35 +0000
Subject: [PATCH] Fix media list duplicates and cache browse link

- Database: Use DISTINCT ON to deduplicate items by content_hash
- Database: Count unique content_hashes in count_user_items
- Server: Fix media card link from /ui/cache to /cache
- Server: Use /raw endpoint for image thumbnails
- Server: Add seen_hashes dedup in media list iteration

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 database.py | 40 ++++++++++++++++++++++++----------------
 server.py   | 16 ++++++++++++++--
 2 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/database.py b/database.py
index 00fe672..074bb71 100644
--- a/database.py
+++ b/database.py
@@ -924,17 +924,21 @@ async def save_l2_share(
 
 
 async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit: int = 100, offset: int = 0) -> List[dict]:
-    """Get all items for a user, optionally filtered by type."""
+    """Get all items for a user, optionally filtered by type. Deduplicates by content_hash."""
     async with pool.acquire() as conn:
         if item_type:
             rows = await conn.fetch(
                 """
-                SELECT it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at,
-                       ci.ipfs_cid
-                FROM item_types it
-                JOIN cache_items ci ON it.content_hash = ci.content_hash
-                WHERE it.actor_id = $1 AND it.type = $2
-                ORDER BY it.created_at DESC
+                SELECT * FROM (
+                    SELECT DISTINCT ON (it.content_hash)
+                           it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at,
+                           ci.ipfs_cid
+                    FROM item_types it
+                    JOIN cache_items ci ON it.content_hash = ci.content_hash
+                    WHERE it.actor_id = $1 AND it.type = $2
+                    ORDER BY it.content_hash, it.created_at DESC
+                ) deduped
+                ORDER BY created_at DESC
                 LIMIT $3 OFFSET $4
                 """,
                 actor_id, item_type, limit, offset
@@ -942,12 +946,16 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit:
         else:
             rows = await conn.fetch(
                 """
-                SELECT it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at,
-                       ci.ipfs_cid
-                FROM item_types it
-                JOIN cache_items ci ON it.content_hash = ci.content_hash
-                WHERE it.actor_id = $1
-                ORDER BY it.created_at DESC
+                SELECT * FROM (
+                    SELECT DISTINCT ON (it.content_hash)
+                           it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at,
+                           ci.ipfs_cid
+                    FROM item_types it
+                    JOIN cache_items ci ON it.content_hash = ci.content_hash
+                    WHERE it.actor_id = $1
+                    ORDER BY it.content_hash, it.created_at DESC
+                ) deduped
+                ORDER BY created_at DESC
                 LIMIT $2 OFFSET $3
                 """,
                 actor_id, limit, offset
@@ -968,15 +976,15 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit:
 
 
 async def count_user_items(actor_id: str, item_type: Optional[str] = None) -> int:
-    """Count items for a user."""
+    """Count unique items (by content_hash) for a user."""
     async with pool.acquire() as conn:
         if item_type:
             return await conn.fetchval(
-                "SELECT COUNT(*) FROM item_types WHERE actor_id = $1 AND type = $2",
+                "SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1 AND type = $2",
                 actor_id, item_type
             )
         else:
             return await conn.fetchval(
-                "SELECT COUNT(*) FROM item_types WHERE actor_id = $1",
+                "SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1",
                 actor_id
             )
diff --git a/server.py b/server.py
index c4e3b28..496976d 100644
--- a/server.py
+++ b/server.py
@@ -2395,11 +2395,17 @@ async def list_media(
 
         # Get cache items that belong to the user (from cache_manager)
         cache_items = []
+        seen_hashes = set()  # Deduplicate by content_hash
         for cached_file in cache_manager.list_all():
             content_hash = cached_file.content_hash
             if content_hash not in user_hashes:
                 continue
 
+            # Skip duplicates (same content from multiple runs)
+            if content_hash in seen_hashes:
+                continue
+            seen_hashes.add(content_hash)
+
             # Skip recipes - they have their own section
             if cached_file.node_type == "recipe":
                 continue
@@ -3812,11 +3818,17 @@ async def ui_media_list(
 
     # Get cache items that belong to the user (from cache_manager)
     cache_items = []
+    seen_hashes = set()  # Deduplicate by content_hash
     for cached_file in cache_manager.list_all():
         content_hash = cached_file.content_hash
         if content_hash not in user_hashes:
             continue
 
+        # Skip duplicates (same content from multiple runs)
+        if content_hash in seen_hashes:
+            continue
+        seen_hashes.add(content_hash)
+
         # Skip recipes - they have their own section
         if cached_file.node_type == "recipe":
             continue
@@ -3895,7 +3907,7 @@ async def ui_media_list(
             size_str = f"{size} bytes"
 
         html_parts.append(f'''
-        <a href="/ui/cache/{content_hash}" class="block">
+        <a href="/cache/{content_hash}" class="block">
         <div class="bg-dark-700 rounded-lg p-4 hover:bg-dark-600 transition-colors">
             <div class="flex items-center justify-between gap-2 mb-3">
                 <div class="flex items-center gap-2">
@@ -3913,7 +3925,7 @@ async def ui_media_list(
             video_src = video_src_for_request(content_hash, request)
             html_parts.append(f'<video src="{video_src}" controls muted loop playsinline class="max-h-32 rounded"></video>')
         elif media_type == "image":
-            html_parts.append(f'<img src="/cache/{content_hash}" alt="{content_hash[:16]}" class="max-h-32 rounded object-contain">')
+            html_parts.append(f'<img src="/cache/{content_hash}/raw" alt="{content_hash[:16]}" class="max-h-32 rounded object-contain">')
         else:
             html_parts.append('<p class="text-gray-400 text-sm py-4">Unknown file type</p>')