Refactor cache access to use cache_manager consistently

- Remove symlink hack from cache_file() - no longer needed
- Add get_cache_path() helper for content_hash lookups
- Update all CACHE_DIR / content_hash patterns to use cache_manager
- Fix cache_manager.get_by_content_hash() to check path.exists()
- Fix legacy path lookup (cache_dir not legacy_dir)
- Update upload endpoint to use cache_manager.put()

This ensures cache lookups work correctly for both legacy files
(stored directly in CACHE_DIR) and new files (stored in nodes/).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-08 01:21:11 +00:00
parent 26768b5942
commit f8ec42b445
2 changed files with 61 additions and 95 deletions

View File

@@ -243,25 +243,25 @@ class L1CacheManager:
def get_by_content_hash(self, content_hash: str) -> Optional[Path]: def get_by_content_hash(self, content_hash: str) -> Optional[Path]:
"""Get cached file path by content_hash.""" """Get cached file path by content_hash."""
# Check index first # Check index first (new cache structure)
node_id = self._content_index.get(content_hash) node_id = self._content_index.get(content_hash)
if node_id: if node_id:
path = self.cache.get(node_id) path = self.cache.get(node_id)
if path: if path and path.exists():
return path return path
# Check legacy directory # Scan cache entries (fallback for new structure)
legacy_path = self.legacy_dir / content_hash
if legacy_path.exists():
return legacy_path
# Scan cache entries (fallback)
entry = self.cache.find_by_content_hash(content_hash) entry = self.cache.find_by_content_hash(content_hash)
if entry: if entry and entry.output_path.exists():
self._content_index[content_hash] = entry.node_id self._content_index[content_hash] = entry.node_id
self._save_content_index() self._save_content_index()
return entry.output_path return entry.output_path
# Check legacy location (files stored directly as CACHE_DIR/{content_hash})
legacy_path = self.cache_dir / content_hash
if legacy_path.exists() and legacy_path.is_file():
return legacy_path
return None return None
def has_content(self, content_hash: str) -> bool: def has_content(self, content_hash: str) -> bool:

138
server.py
View File

@@ -161,24 +161,16 @@ def cache_file(source: Path, node_type: str = "output") -> str:
Copy file to cache using L1CacheManager, return content hash. Copy file to cache using L1CacheManager, return content hash.
Uses artdag's Cache internally for proper tracking. Uses artdag's Cache internally for proper tracking.
Also creates a symlink in legacy location for backward compatibility.
""" """
cached = cache_manager.put(source, node_type=node_type) cached = cache_manager.put(source, node_type=node_type)
# Create symlink in legacy location for backward compatibility
legacy_path = CACHE_DIR / cached.content_hash
if not legacy_path.exists():
try:
legacy_path.symlink_to(cached.path)
except (OSError, FileExistsError):
# Symlink failed, try copy
import shutil
if not legacy_path.exists():
shutil.copy2(cached.path, legacy_path)
return cached.content_hash return cached.content_hash
def get_cache_path(content_hash: str) -> Optional[Path]:
"""Get the path for a cached file by content_hash."""
return cache_manager.get_by_content_hash(content_hash)
@app.get("/api") @app.get("/api")
async def api_info(): async def api_info():
"""Server info (JSON).""" """Server info (JSON)."""
@@ -456,14 +448,14 @@ async def run_detail(run_id: str, request: Request):
# Build media HTML for input/output # Build media HTML for input/output
media_html = "" media_html = ""
has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists() has_input = run.inputs and cache_manager.has_content(run.inputs[0])
has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists() has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash)
if has_input or has_output: if has_input or has_output:
media_html = '<div class="grid gap-6 md:grid-cols-2 mb-8">' media_html = '<div class="grid gap-6 md:grid-cols-2 mb-8">'
if has_input: if has_input:
input_hash = run.inputs[0] input_hash = run.inputs[0]
input_media_type = detect_media_type(CACHE_DIR / input_hash) input_media_type = detect_media_type(get_cache_path(input_hash))
input_video_src = video_src_for_request(input_hash, request) input_video_src = video_src_for_request(input_hash, request)
if input_media_type == "video": if input_media_type == "video":
input_elem = f'<video src="{input_video_src}" controls muted loop playsinline class="max-w-full max-h-64 rounded-lg"></video>' input_elem = f'<video src="{input_video_src}" controls muted loop playsinline class="max-w-full max-h-64 rounded-lg"></video>'
@@ -480,7 +472,7 @@ async def run_detail(run_id: str, request: Request):
''' '''
if has_output: if has_output:
output_hash = run.output_hash output_hash = run.output_hash
output_media_type = detect_media_type(CACHE_DIR / output_hash) output_media_type = detect_media_type(get_cache_path(output_hash))
output_video_src = video_src_for_request(output_hash, request) output_video_src = video_src_for_request(output_hash, request)
if output_media_type == "video": if output_media_type == "video":
output_elem = f'<video src="{output_video_src}" controls autoplay muted loop playsinline class="max-w-full max-h-64 rounded-lg"></video>' output_elem = f'<video src="{output_video_src}" controls autoplay muted loop playsinline class="max-w-full max-h-64 rounded-lg"></video>'
@@ -684,14 +676,14 @@ async def list_runs(request: Request, page: int = 1, limit: int = 20):
''') ''')
# Show input and output thumbnails # Show input and output thumbnails
has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists() has_input = run.inputs and cache_manager.has_content(run.inputs[0])
has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists() has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash)
if has_input or has_output: if has_input or has_output:
html_parts.append('<div class="grid gap-4 sm:grid-cols-2">') html_parts.append('<div class="grid gap-4 sm:grid-cols-2">')
if has_input: if has_input:
input_hash = run.inputs[0] input_hash = run.inputs[0]
input_media_type = detect_media_type(CACHE_DIR / input_hash) input_media_type = detect_media_type(get_cache_path(input_hash))
html_parts.append(f''' html_parts.append(f'''
<div class="bg-dark-600 rounded-lg p-3"> <div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Input</div> <div class="text-xs text-gray-400 mb-2">Input</div>
@@ -705,7 +697,7 @@ async def list_runs(request: Request, page: int = 1, limit: int = 20):
if has_output: if has_output:
output_hash = run.output_hash output_hash = run.output_hash
output_media_type = detect_media_type(CACHE_DIR / output_hash) output_media_type = detect_media_type(get_cache_path(output_hash))
html_parts.append(f''' html_parts.append(f'''
<div class="bg-dark-600 rounded-lg p-3"> <div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Output</div> <div class="text-xs text-gray-400 mb-2">Output</div>
@@ -767,38 +759,22 @@ async def list_runs(request: Request, page: int = 1, limit: int = 20):
@app.get("/cache/{content_hash}") @app.get("/cache/{content_hash}")
async def get_cached(content_hash: str): async def get_cached(content_hash: str):
"""Get cached content by hash.""" """Get cached content by hash."""
# Try cache_manager first (new location) cache_path = get_cache_path(content_hash)
cache_path = cache_manager.get_by_content_hash(content_hash)
# Fallback to legacy location
if not cache_path: if not cache_path:
legacy_path = CACHE_DIR / content_hash
if legacy_path.exists():
cache_path = legacy_path
if not cache_path or not cache_path.exists():
raise HTTPException(404, f"Content {content_hash} not in cache") raise HTTPException(404, f"Content {content_hash} not in cache")
return FileResponse(cache_path) return FileResponse(cache_path)
@app.get("/cache/{content_hash}/mp4") @app.get("/cache/{content_hash}/mp4")
async def get_cached_mp4(content_hash: str): async def get_cached_mp4(content_hash: str):
"""Get cached content as MP4 (transcodes MKV on first request, caches result).""" """Get cached content as MP4 (transcodes MKV on first request, caches result)."""
# Try cache_manager first (new location) cache_path = get_cache_path(content_hash)
cache_path = cache_manager.get_by_content_hash(content_hash)
# Fallback to legacy location
if not cache_path: if not cache_path:
legacy_path = CACHE_DIR / content_hash
if legacy_path.exists():
cache_path = legacy_path
mp4_path = CACHE_DIR / f"{content_hash}.mp4"
if not cache_path or not cache_path.exists():
raise HTTPException(404, f"Content {content_hash} not in cache") raise HTTPException(404, f"Content {content_hash} not in cache")
# MP4 transcodes stored alongside original in CACHE_DIR
mp4_path = CACHE_DIR / f"{content_hash}.mp4"
# If MP4 already cached, serve it # If MP4 already cached, serve it
if mp4_path.exists(): if mp4_path.exists():
return FileResponse(mp4_path, media_type="video/mp4") return FileResponse(mp4_path, media_type="video/mp4")
@@ -856,8 +832,8 @@ async def cache_detail(content_hash: str, request: Request):
"""View cached content detail. HTML for browsers, JSON for APIs.""" """View cached content detail. HTML for browsers, JSON for APIs."""
current_user = get_user_from_cookie(request) current_user = get_user_from_cookie(request)
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
if not cache_path.exists(): if not cache_path:
if wants_html(request): if wants_html(request):
content = f'<p class="text-red-400">Content not found: {content_hash}</p>' content = f'<p class="text-red-400">Content not found: {content_hash}</p>'
return HTMLResponse(render_page("Not Found", content, current_user, active_tab="cache"), status_code=404) return HTMLResponse(render_page("Not Found", content, current_user, active_tab="cache"), status_code=404)
@@ -1001,8 +977,8 @@ async def ui_cache_meta_form(content_hash: str, request: Request):
pin_reason = meta.get("pin_reason", "") pin_reason = meta.get("pin_reason", "")
# Detect media type for publish # Detect media type for publish
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
media_type = detect_media_type(cache_path) if cache_path.exists() else "unknown" media_type = detect_media_type(cache_path) if cache_path else "unknown"
asset_type = "video" if media_type == "video" else "image" asset_type = "video" if media_type == "video" else "image"
# Origin radio checked states # Origin radio checked states
@@ -1404,8 +1380,8 @@ async def list_cache(
html_parts = [] html_parts = []
for item in items_page: for item in items_page:
content_hash = item["hash"] content_hash = item["hash"]
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
media_type = detect_media_type(cache_path) media_type = detect_media_type(cache_path) if cache_path else "unknown"
# Format size # Format size
size = item["size"] size = item["size"]
@@ -1509,11 +1485,9 @@ async def discard_cache(content_hash: str, username: str = Depends(get_required_
- Cannot delete inputs/outputs of activities (runs) - Cannot delete inputs/outputs of activities (runs)
- Cannot delete pinned items - Cannot delete pinned items
""" """
# Check if content exists (in cache_manager or legacy location) # Check if content exists
if not cache_manager.has_content(content_hash): if not cache_manager.has_content(content_hash):
cache_path = CACHE_DIR / content_hash raise HTTPException(404, "Content not found")
if not cache_path.exists():
raise HTTPException(404, "Content not found")
# Check ownership # Check ownership
user_hashes = get_user_cache_hashes(username) user_hashes = get_user_cache_hashes(username)
@@ -1535,8 +1509,8 @@ async def discard_cache(content_hash: str, username: str = Depends(get_required_
success, msg = cache_manager.delete_by_content_hash(content_hash) success, msg = cache_manager.delete_by_content_hash(content_hash)
if not success: if not success:
# Fallback to legacy deletion # Fallback to legacy deletion
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
if cache_path.exists(): if cache_path and cache_path.exists():
cache_path.unlink() cache_path.unlink()
# Clean up legacy metadata files # Clean up legacy metadata files
@@ -1564,9 +1538,7 @@ async def ui_discard_cache(content_hash: str, request: Request):
# Check if content exists # Check if content exists
if not cache_manager.has_content(content_hash): if not cache_manager.has_content(content_hash):
cache_path = CACHE_DIR / content_hash return '<div class="bg-red-900/50 border border-red-700 text-red-300 px-4 py-3 rounded-lg mb-4">Content not found</div>'
if not cache_path.exists():
return '<div class="bg-red-900/50 border border-red-700 text-red-300 px-4 py-3 rounded-lg mb-4">Content not found</div>'
# Check if pinned (legacy metadata) # Check if pinned (legacy metadata)
meta = load_cache_meta(content_hash) meta = load_cache_meta(content_hash)
@@ -1583,8 +1555,8 @@ async def ui_discard_cache(content_hash: str, request: Request):
success, msg = cache_manager.delete_by_content_hash(content_hash) success, msg = cache_manager.delete_by_content_hash(content_hash)
if not success: if not success:
# Fallback to legacy deletion # Fallback to legacy deletion
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
if cache_path.exists(): if cache_path and cache_path.exists():
cache_path.unlink() cache_path.unlink()
# Clean up legacy metadata files # Clean up legacy metadata files
@@ -1718,15 +1690,9 @@ async def upload_to_cache(file: UploadFile = File(...), username: str = Depends(
tmp.write(content) tmp.write(content)
tmp_path = Path(tmp.name) tmp_path = Path(tmp.name)
# Hash and move to cache # Store in cache via cache_manager
content_hash = file_hash(tmp_path) cached = cache_manager.put(tmp_path, node_type="upload", move=True)
cache_path = CACHE_DIR / content_hash content_hash = cached.content_hash
if not cache_path.exists():
import shutil
shutil.move(str(tmp_path), cache_path)
else:
tmp_path.unlink()
# Save uploader metadata # Save uploader metadata
actor_id = f"@{username}@{L2_DOMAIN}" actor_id = f"@{username}@{L2_DOMAIN}"
@@ -1754,8 +1720,8 @@ class PublishRequest(BaseModel):
async def get_cache_meta(content_hash: str, username: str = Depends(get_required_user)): async def get_cache_meta(content_hash: str, username: str = Depends(get_required_user)):
"""Get metadata for a cached file.""" """Get metadata for a cached file."""
# Check file exists # Check file exists
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
if not cache_path.exists(): if not cache_path:
raise HTTPException(404, "Content not found") raise HTTPException(404, "Content not found")
# Check ownership # Check ownership
@@ -1770,8 +1736,8 @@ async def get_cache_meta(content_hash: str, username: str = Depends(get_required
async def update_cache_meta(content_hash: str, update: CacheMetaUpdate, username: str = Depends(get_required_user)): async def update_cache_meta(content_hash: str, update: CacheMetaUpdate, username: str = Depends(get_required_user)):
"""Update metadata for a cached file.""" """Update metadata for a cached file."""
# Check file exists # Check file exists
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
if not cache_path.exists(): if not cache_path:
raise HTTPException(404, "Content not found") raise HTTPException(404, "Content not found")
# Check ownership # Check ownership
@@ -1819,8 +1785,8 @@ async def publish_cache_to_l2(
Requires origin to be set in metadata before publishing. Requires origin to be set in metadata before publishing.
""" """
# Check file exists # Check file exists
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
if not cache_path.exists(): if not cache_path:
raise HTTPException(404, "Content not found") raise HTTPException(404, "Content not found")
# Check ownership # Check ownership
@@ -1907,8 +1873,8 @@ async def republish_cache_to_l2(
Only works for already-published items. Only works for already-published items.
""" """
# Check file exists # Check file exists
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
if not cache_path.exists(): if not cache_path:
raise HTTPException(404, "Content not found") raise HTTPException(404, "Content not found")
# Check ownership # Check ownership
@@ -2547,8 +2513,8 @@ async def ui_runs(request: Request):
''') ''')
# Show input and output side by side # Show input and output side by side
has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists() has_input = run.inputs and cache_manager.has_content(run.inputs[0])
has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists() has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash)
if has_input or has_output: if has_input or has_output:
html_parts.append('<div class="grid gap-4 sm:grid-cols-2">') html_parts.append('<div class="grid gap-4 sm:grid-cols-2">')
@@ -2556,7 +2522,7 @@ async def ui_runs(request: Request):
# Input box # Input box
if has_input: if has_input:
input_hash = run.inputs[0] input_hash = run.inputs[0]
input_media_type = detect_media_type(CACHE_DIR / input_hash) input_media_type = detect_media_type(get_cache_path(input_hash))
html_parts.append(f''' html_parts.append(f'''
<div class="bg-dark-600 rounded-lg p-3"> <div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Input: {input_hash[:16]}...</div> <div class="text-xs text-gray-400 mb-2">Input: {input_hash[:16]}...</div>
@@ -2572,7 +2538,7 @@ async def ui_runs(request: Request):
# Output box # Output box
if has_output: if has_output:
output_hash = run.output_hash output_hash = run.output_hash
output_media_type = detect_media_type(CACHE_DIR / output_hash) output_media_type = detect_media_type(get_cache_path(output_hash))
html_parts.append(f''' html_parts.append(f'''
<div class="bg-dark-600 rounded-lg p-3"> <div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Output: {output_hash[:16]}...</div> <div class="text-xs text-gray-400 mb-2">Output: {output_hash[:16]}...</div>
@@ -2666,8 +2632,8 @@ async def ui_cache_list(
for item in cache_items[:50]: # Limit to 50 items for item in cache_items[:50]: # Limit to 50 items
content_hash = item["hash"] content_hash = item["hash"]
cache_path = CACHE_DIR / content_hash cache_path = get_cache_path(content_hash)
media_type = detect_media_type(cache_path) media_type = detect_media_type(cache_path) if cache_path else "unknown"
# Format size # Format size
size = item["size"] size = item["size"]
@@ -2770,15 +2736,15 @@ async def ui_run_partial(run_id: str, request: Request):
''' '''
# Show input and output side by side # Show input and output side by side
has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists() has_input = run.inputs and cache_manager.has_content(run.inputs[0])
has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists() has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash)
if has_input or has_output: if has_input or has_output:
html += '<div class="grid gap-4 sm:grid-cols-2">' html += '<div class="grid gap-4 sm:grid-cols-2">'
if has_input: if has_input:
input_hash = run.inputs[0] input_hash = run.inputs[0]
input_media_type = detect_media_type(CACHE_DIR / input_hash) input_media_type = detect_media_type(get_cache_path(input_hash))
html += f''' html += f'''
<div class="bg-dark-600 rounded-lg p-3"> <div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Input: {input_hash[:16]}...</div> <div class="text-xs text-gray-400 mb-2">Input: {input_hash[:16]}...</div>
@@ -2793,7 +2759,7 @@ async def ui_run_partial(run_id: str, request: Request):
if has_output: if has_output:
output_hash = run.output_hash output_hash = run.output_hash
output_media_type = detect_media_type(CACHE_DIR / output_hash) output_media_type = detect_media_type(get_cache_path(output_hash))
html += f''' html += f'''
<div class="bg-dark-600 rounded-lg p-3"> <div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Output: {output_hash[:16]}...</div> <div class="text-xs text-gray-400 mb-2">Output: {output_hash[:16]}...</div>