From f8ec42b44504c6675b186b45eebbe1bc8283291c Mon Sep 17 00:00:00 2001 From: gilesb Date: Thu, 8 Jan 2026 01:21:11 +0000 Subject: [PATCH] Refactor cache access to use cache_manager consistently - Remove symlink hack from cache_file() - no longer needed - Add get_cache_path() helper for content_hash lookups - Update all CACHE_DIR / content_hash patterns to use cache_manager - Fix cache_manager.get_by_content_hash() to check path.exists() - Fix legacy path lookup (cache_dir not legacy_dir) - Update upload endpoint to use cache_manager.put() This ensures cache lookups work correctly for both legacy files (stored directly in CACHE_DIR) and new files (stored in nodes/). Co-Authored-By: Claude Opus 4.5 --- cache_manager.py | 18 +++---- server.py | 138 ++++++++++++++++++----------------------------- 2 files changed, 61 insertions(+), 95 deletions(-) diff --git a/cache_manager.py b/cache_manager.py index 5f4f3fa..24c798a 100644 --- a/cache_manager.py +++ b/cache_manager.py @@ -243,25 +243,25 @@ class L1CacheManager: def get_by_content_hash(self, content_hash: str) -> Optional[Path]: """Get cached file path by content_hash.""" - # Check index first + # Check index first (new cache structure) node_id = self._content_index.get(content_hash) if node_id: path = self.cache.get(node_id) - if path: + if path and path.exists(): return path - # Check legacy directory - legacy_path = self.legacy_dir / content_hash - if legacy_path.exists(): - return legacy_path - - # Scan cache entries (fallback) + # Scan cache entries (fallback for new structure) entry = self.cache.find_by_content_hash(content_hash) - if entry: + if entry and entry.output_path.exists(): self._content_index[content_hash] = entry.node_id self._save_content_index() return entry.output_path + # Check legacy location (files stored directly as CACHE_DIR/{content_hash}) + legacy_path = self.cache_dir / content_hash + if legacy_path.exists() and legacy_path.is_file(): + return legacy_path + return None def has_content(self, content_hash: str) -> bool: diff --git a/server.py b/server.py index 256a15f..c6a4cd1 100644 --- a/server.py +++ b/server.py @@ -161,24 +161,16 @@ def cache_file(source: Path, node_type: str = "output") -> str: Copy file to cache using L1CacheManager, return content hash. Uses artdag's Cache internally for proper tracking. - Also creates a symlink in legacy location for backward compatibility. """ cached = cache_manager.put(source, node_type=node_type) - - # Create symlink in legacy location for backward compatibility - legacy_path = CACHE_DIR / cached.content_hash - if not legacy_path.exists(): - try: - legacy_path.symlink_to(cached.path) - except (OSError, FileExistsError): - # Symlink failed, try copy - import shutil - if not legacy_path.exists(): - shutil.copy2(cached.path, legacy_path) - return cached.content_hash +def get_cache_path(content_hash: str) -> Optional[Path]: + """Get the path for a cached file by content_hash.""" + return cache_manager.get_by_content_hash(content_hash) + + @app.get("/api") async def api_info(): """Server info (JSON).""" @@ -456,14 +448,14 @@ async def run_detail(run_id: str, request: Request): # Build media HTML for input/output media_html = "" - has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists() - has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists() + has_input = run.inputs and cache_manager.has_content(run.inputs[0]) + has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash) if has_input or has_output: media_html = '
' if has_input: input_hash = run.inputs[0] - input_media_type = detect_media_type(CACHE_DIR / input_hash) + input_media_type = detect_media_type(get_cache_path(input_hash)) input_video_src = video_src_for_request(input_hash, request) if input_media_type == "video": input_elem = f'' @@ -480,7 +472,7 @@ async def run_detail(run_id: str, request: Request): ''' if has_output: output_hash = run.output_hash - output_media_type = detect_media_type(CACHE_DIR / output_hash) + output_media_type = detect_media_type(get_cache_path(output_hash)) output_video_src = video_src_for_request(output_hash, request) if output_media_type == "video": output_elem = f'' @@ -684,14 +676,14 @@ async def list_runs(request: Request, page: int = 1, limit: int = 20): ''') # Show input and output thumbnails - has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists() - has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists() + has_input = run.inputs and cache_manager.has_content(run.inputs[0]) + has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash) if has_input or has_output: html_parts.append('
') if has_input: input_hash = run.inputs[0] - input_media_type = detect_media_type(CACHE_DIR / input_hash) + input_media_type = detect_media_type(get_cache_path(input_hash)) html_parts.append(f'''
Input
@@ -705,7 +697,7 @@ async def list_runs(request: Request, page: int = 1, limit: int = 20): if has_output: output_hash = run.output_hash - output_media_type = detect_media_type(CACHE_DIR / output_hash) + output_media_type = detect_media_type(get_cache_path(output_hash)) html_parts.append(f'''
Output
@@ -767,38 +759,22 @@ async def list_runs(request: Request, page: int = 1, limit: int = 20): @app.get("/cache/{content_hash}") async def get_cached(content_hash: str): """Get cached content by hash.""" - # Try cache_manager first (new location) - cache_path = cache_manager.get_by_content_hash(content_hash) - - # Fallback to legacy location + cache_path = get_cache_path(content_hash) if not cache_path: - legacy_path = CACHE_DIR / content_hash - if legacy_path.exists(): - cache_path = legacy_path - - if not cache_path or not cache_path.exists(): raise HTTPException(404, f"Content {content_hash} not in cache") - return FileResponse(cache_path) @app.get("/cache/{content_hash}/mp4") async def get_cached_mp4(content_hash: str): """Get cached content as MP4 (transcodes MKV on first request, caches result).""" - # Try cache_manager first (new location) - cache_path = cache_manager.get_by_content_hash(content_hash) - - # Fallback to legacy location + cache_path = get_cache_path(content_hash) if not cache_path: - legacy_path = CACHE_DIR / content_hash - if legacy_path.exists(): - cache_path = legacy_path - - mp4_path = CACHE_DIR / f"{content_hash}.mp4" - - if not cache_path or not cache_path.exists(): raise HTTPException(404, f"Content {content_hash} not in cache") + # MP4 transcodes stored alongside original in CACHE_DIR + mp4_path = CACHE_DIR / f"{content_hash}.mp4" + # If MP4 already cached, serve it if mp4_path.exists(): return FileResponse(mp4_path, media_type="video/mp4") @@ -856,8 +832,8 @@ async def cache_detail(content_hash: str, request: Request): """View cached content detail. HTML for browsers, JSON for APIs.""" current_user = get_user_from_cookie(request) - cache_path = CACHE_DIR / content_hash - if not cache_path.exists(): + cache_path = get_cache_path(content_hash) + if not cache_path: if wants_html(request): content = f'

Content not found: {content_hash}

' return HTMLResponse(render_page("Not Found", content, current_user, active_tab="cache"), status_code=404) @@ -1001,8 +977,8 @@ async def ui_cache_meta_form(content_hash: str, request: Request): pin_reason = meta.get("pin_reason", "") # Detect media type for publish - cache_path = CACHE_DIR / content_hash - media_type = detect_media_type(cache_path) if cache_path.exists() else "unknown" + cache_path = get_cache_path(content_hash) + media_type = detect_media_type(cache_path) if cache_path else "unknown" asset_type = "video" if media_type == "video" else "image" # Origin radio checked states @@ -1404,8 +1380,8 @@ async def list_cache( html_parts = [] for item in items_page: content_hash = item["hash"] - cache_path = CACHE_DIR / content_hash - media_type = detect_media_type(cache_path) + cache_path = get_cache_path(content_hash) + media_type = detect_media_type(cache_path) if cache_path else "unknown" # Format size size = item["size"] @@ -1509,11 +1485,9 @@ async def discard_cache(content_hash: str, username: str = Depends(get_required_ - Cannot delete inputs/outputs of activities (runs) - Cannot delete pinned items """ - # Check if content exists (in cache_manager or legacy location) + # Check if content exists if not cache_manager.has_content(content_hash): - cache_path = CACHE_DIR / content_hash - if not cache_path.exists(): - raise HTTPException(404, "Content not found") + raise HTTPException(404, "Content not found") # Check ownership user_hashes = get_user_cache_hashes(username) @@ -1535,8 +1509,8 @@ async def discard_cache(content_hash: str, username: str = Depends(get_required_ success, msg = cache_manager.delete_by_content_hash(content_hash) if not success: # Fallback to legacy deletion - cache_path = CACHE_DIR / content_hash - if cache_path.exists(): + cache_path = get_cache_path(content_hash) + if cache_path and cache_path.exists(): cache_path.unlink() # Clean up legacy metadata files @@ -1564,9 +1538,7 @@ async def ui_discard_cache(content_hash: str, request: Request): # Check if content exists if not cache_manager.has_content(content_hash): - cache_path = CACHE_DIR / content_hash - if not cache_path.exists(): - return '
Content not found
' + return '
Content not found
' # Check if pinned (legacy metadata) meta = load_cache_meta(content_hash) @@ -1583,8 +1555,8 @@ async def ui_discard_cache(content_hash: str, request: Request): success, msg = cache_manager.delete_by_content_hash(content_hash) if not success: # Fallback to legacy deletion - cache_path = CACHE_DIR / content_hash - if cache_path.exists(): + cache_path = get_cache_path(content_hash) + if cache_path and cache_path.exists(): cache_path.unlink() # Clean up legacy metadata files @@ -1718,15 +1690,9 @@ async def upload_to_cache(file: UploadFile = File(...), username: str = Depends( tmp.write(content) tmp_path = Path(tmp.name) - # Hash and move to cache - content_hash = file_hash(tmp_path) - cache_path = CACHE_DIR / content_hash - - if not cache_path.exists(): - import shutil - shutil.move(str(tmp_path), cache_path) - else: - tmp_path.unlink() + # Store in cache via cache_manager + cached = cache_manager.put(tmp_path, node_type="upload", move=True) + content_hash = cached.content_hash # Save uploader metadata actor_id = f"@{username}@{L2_DOMAIN}" @@ -1754,8 +1720,8 @@ class PublishRequest(BaseModel): async def get_cache_meta(content_hash: str, username: str = Depends(get_required_user)): """Get metadata for a cached file.""" # Check file exists - cache_path = CACHE_DIR / content_hash - if not cache_path.exists(): + cache_path = get_cache_path(content_hash) + if not cache_path: raise HTTPException(404, "Content not found") # Check ownership @@ -1770,8 +1736,8 @@ async def get_cache_meta(content_hash: str, username: str = Depends(get_required async def update_cache_meta(content_hash: str, update: CacheMetaUpdate, username: str = Depends(get_required_user)): """Update metadata for a cached file.""" # Check file exists - cache_path = CACHE_DIR / content_hash - if not cache_path.exists(): + cache_path = get_cache_path(content_hash) + if not cache_path: raise HTTPException(404, "Content not found") # Check ownership @@ -1819,8 +1785,8 @@ async def publish_cache_to_l2( Requires origin to be set in metadata before publishing. """ # Check file exists - cache_path = CACHE_DIR / content_hash - if not cache_path.exists(): + cache_path = get_cache_path(content_hash) + if not cache_path: raise HTTPException(404, "Content not found") # Check ownership @@ -1907,8 +1873,8 @@ async def republish_cache_to_l2( Only works for already-published items. """ # Check file exists - cache_path = CACHE_DIR / content_hash - if not cache_path.exists(): + cache_path = get_cache_path(content_hash) + if not cache_path: raise HTTPException(404, "Content not found") # Check ownership @@ -2547,8 +2513,8 @@ async def ui_runs(request: Request): ''') # Show input and output side by side - has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists() - has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists() + has_input = run.inputs and cache_manager.has_content(run.inputs[0]) + has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash) if has_input or has_output: html_parts.append('
') @@ -2556,7 +2522,7 @@ async def ui_runs(request: Request): # Input box if has_input: input_hash = run.inputs[0] - input_media_type = detect_media_type(CACHE_DIR / input_hash) + input_media_type = detect_media_type(get_cache_path(input_hash)) html_parts.append(f'''
Input: {input_hash[:16]}...
@@ -2572,7 +2538,7 @@ async def ui_runs(request: Request): # Output box if has_output: output_hash = run.output_hash - output_media_type = detect_media_type(CACHE_DIR / output_hash) + output_media_type = detect_media_type(get_cache_path(output_hash)) html_parts.append(f'''
Output: {output_hash[:16]}...
@@ -2666,8 +2632,8 @@ async def ui_cache_list( for item in cache_items[:50]: # Limit to 50 items content_hash = item["hash"] - cache_path = CACHE_DIR / content_hash - media_type = detect_media_type(cache_path) + cache_path = get_cache_path(content_hash) + media_type = detect_media_type(cache_path) if cache_path else "unknown" # Format size size = item["size"] @@ -2770,15 +2736,15 @@ async def ui_run_partial(run_id: str, request: Request): ''' # Show input and output side by side - has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists() - has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists() + has_input = run.inputs and cache_manager.has_content(run.inputs[0]) + has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash) if has_input or has_output: html += '
' if has_input: input_hash = run.inputs[0] - input_media_type = detect_media_type(CACHE_DIR / input_hash) + input_media_type = detect_media_type(get_cache_path(input_hash)) html += f'''
Input: {input_hash[:16]}...
@@ -2793,7 +2759,7 @@ async def ui_run_partial(run_id: str, request: Request): if has_output: output_hash = run.output_hash - output_media_type = detect_media_type(CACHE_DIR / output_hash) + output_media_type = detect_media_type(get_cache_path(output_hash)) html += f'''
Output: {output_hash[:16]}...