From 9c158ff8844ec5eeb1b8e10c6d3fcc31e3218453 Mon Sep 17 00:00:00 2001 From: gilesb Date: Fri, 9 Jan 2026 10:17:35 +0000 Subject: [PATCH] Fix media list duplicates and cache browse link - Database: Use DISTINCT ON to deduplicate items by content_hash - Database: Count unique content_hashes in count_user_items - Server: Fix media card link from /ui/cache to /cache - Server: Use /raw endpoint for image thumbnails - Server: Add seen_hashes dedup in media list iteration Co-Authored-By: Claude Opus 4.5 --- database.py | 40 ++++++++++++++++++++++++---------------- server.py | 16 ++++++++++++++-- 2 files changed, 38 insertions(+), 18 deletions(-) diff --git a/database.py b/database.py index 00fe672..074bb71 100644 --- a/database.py +++ b/database.py @@ -924,17 +924,21 @@ async def save_l2_share( async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit: int = 100, offset: int = 0) -> List[dict]: - """Get all items for a user, optionally filtered by type.""" + """Get all items for a user, optionally filtered by type. Deduplicates by content_hash.""" async with pool.acquire() as conn: if item_type: rows = await conn.fetch( """ - SELECT it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at, - ci.ipfs_cid - FROM item_types it - JOIN cache_items ci ON it.content_hash = ci.content_hash - WHERE it.actor_id = $1 AND it.type = $2 - ORDER BY it.created_at DESC + SELECT * FROM ( + SELECT DISTINCT ON (it.content_hash) + it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at, + ci.ipfs_cid + FROM item_types it + JOIN cache_items ci ON it.content_hash = ci.content_hash + WHERE it.actor_id = $1 AND it.type = $2 + ORDER BY it.content_hash, it.created_at DESC + ) deduped + ORDER BY created_at DESC LIMIT $3 OFFSET $4 """, actor_id, item_type, limit, offset @@ -942,12 +946,16 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit: else: rows = await conn.fetch( """ - SELECT it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at, - ci.ipfs_cid - FROM item_types it - JOIN cache_items ci ON it.content_hash = ci.content_hash - WHERE it.actor_id = $1 - ORDER BY it.created_at DESC + SELECT * FROM ( + SELECT DISTINCT ON (it.content_hash) + it.content_hash, it.type, it.description, it.filename, it.pinned, it.created_at, + ci.ipfs_cid + FROM item_types it + JOIN cache_items ci ON it.content_hash = ci.content_hash + WHERE it.actor_id = $1 + ORDER BY it.content_hash, it.created_at DESC + ) deduped + ORDER BY created_at DESC LIMIT $2 OFFSET $3 """, actor_id, limit, offset @@ -968,15 +976,15 @@ async def get_user_items(actor_id: str, item_type: Optional[str] = None, limit: async def count_user_items(actor_id: str, item_type: Optional[str] = None) -> int: - """Count items for a user.""" + """Count unique items (by content_hash) for a user.""" async with pool.acquire() as conn: if item_type: return await conn.fetchval( - "SELECT COUNT(*) FROM item_types WHERE actor_id = $1 AND type = $2", + "SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1 AND type = $2", actor_id, item_type ) else: return await conn.fetchval( - "SELECT COUNT(*) FROM item_types WHERE actor_id = $1", + "SELECT COUNT(DISTINCT content_hash) FROM item_types WHERE actor_id = $1", actor_id ) diff --git a/server.py b/server.py index c4e3b28..496976d 100644 --- a/server.py +++ b/server.py @@ -2395,11 +2395,17 @@ async def list_media( # Get cache items that belong to the user (from cache_manager) cache_items = [] + seen_hashes = set() # Deduplicate by content_hash for cached_file in cache_manager.list_all(): content_hash = cached_file.content_hash if content_hash not in user_hashes: continue + # Skip duplicates (same content from multiple runs) + if content_hash in seen_hashes: + continue + seen_hashes.add(content_hash) + # Skip recipes - they have their own section if cached_file.node_type == "recipe": continue @@ -3812,11 +3818,17 @@ async def ui_media_list( # Get cache items that belong to the user (from cache_manager) cache_items = [] + seen_hashes = set() # Deduplicate by content_hash for cached_file in cache_manager.list_all(): content_hash = cached_file.content_hash if content_hash not in user_hashes: continue + # Skip duplicates (same content from multiple runs) + if content_hash in seen_hashes: + continue + seen_hashes.add(content_hash) + # Skip recipes - they have their own section if cached_file.node_type == "recipe": continue @@ -3895,7 +3907,7 @@ async def ui_media_list( size_str = f"{size} bytes" html_parts.append(f''' - +
@@ -3913,7 +3925,7 @@ async def ui_media_list( video_src = video_src_for_request(content_hash, request) html_parts.append(f'') elif media_type == "image": - html_parts.append(f'{content_hash[:16]}') + html_parts.append(f'{content_hash[:16]}') else: html_parts.append('

Unknown file type

')