Refactor cache access to use cache_manager consistently

- Remove symlink hack from cache_file() - no longer needed
- Add get_cache_path() helper for content_hash lookups
- Update all CACHE_DIR / content_hash patterns to use cache_manager
- Fix cache_manager.get_by_content_hash() to check path.exists()
- Fix legacy path lookup (cache_dir not legacy_dir)
- Update upload endpoint to use cache_manager.put()

This ensures cache lookups work correctly for both legacy files
(stored directly in CACHE_DIR) and new files (stored in nodes/).

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-08 01:21:11 +00:00
parent 26768b5942
commit f8ec42b445
2 changed files with 61 additions and 95 deletions

View File

@@ -243,25 +243,25 @@ class L1CacheManager:
def get_by_content_hash(self, content_hash: str) -> Optional[Path]:
"""Get cached file path by content_hash."""
# Check index first
# Check index first (new cache structure)
node_id = self._content_index.get(content_hash)
if node_id:
path = self.cache.get(node_id)
if path:
if path and path.exists():
return path
# Check legacy directory
legacy_path = self.legacy_dir / content_hash
if legacy_path.exists():
return legacy_path
# Scan cache entries (fallback)
# Scan cache entries (fallback for new structure)
entry = self.cache.find_by_content_hash(content_hash)
if entry:
if entry and entry.output_path.exists():
self._content_index[content_hash] = entry.node_id
self._save_content_index()
return entry.output_path
# Check legacy location (files stored directly as CACHE_DIR/{content_hash})
legacy_path = self.cache_dir / content_hash
if legacy_path.exists() and legacy_path.is_file():
return legacy_path
return None
def has_content(self, content_hash: str) -> bool:

138
server.py
View File

@@ -161,24 +161,16 @@ def cache_file(source: Path, node_type: str = "output") -> str:
Copy file to cache using L1CacheManager, return content hash.
Uses artdag's Cache internally for proper tracking.
Also creates a symlink in legacy location for backward compatibility.
"""
cached = cache_manager.put(source, node_type=node_type)
# Create symlink in legacy location for backward compatibility
legacy_path = CACHE_DIR / cached.content_hash
if not legacy_path.exists():
try:
legacy_path.symlink_to(cached.path)
except (OSError, FileExistsError):
# Symlink failed, try copy
import shutil
if not legacy_path.exists():
shutil.copy2(cached.path, legacy_path)
return cached.content_hash
def get_cache_path(content_hash: str) -> Optional[Path]:
"""Get the path for a cached file by content_hash."""
return cache_manager.get_by_content_hash(content_hash)
@app.get("/api")
async def api_info():
"""Server info (JSON)."""
@@ -456,14 +448,14 @@ async def run_detail(run_id: str, request: Request):
# Build media HTML for input/output
media_html = ""
has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists()
has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists()
has_input = run.inputs and cache_manager.has_content(run.inputs[0])
has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash)
if has_input or has_output:
media_html = '<div class="grid gap-6 md:grid-cols-2 mb-8">'
if has_input:
input_hash = run.inputs[0]
input_media_type = detect_media_type(CACHE_DIR / input_hash)
input_media_type = detect_media_type(get_cache_path(input_hash))
input_video_src = video_src_for_request(input_hash, request)
if input_media_type == "video":
input_elem = f'<video src="{input_video_src}" controls muted loop playsinline class="max-w-full max-h-64 rounded-lg"></video>'
@@ -480,7 +472,7 @@ async def run_detail(run_id: str, request: Request):
'''
if has_output:
output_hash = run.output_hash
output_media_type = detect_media_type(CACHE_DIR / output_hash)
output_media_type = detect_media_type(get_cache_path(output_hash))
output_video_src = video_src_for_request(output_hash, request)
if output_media_type == "video":
output_elem = f'<video src="{output_video_src}" controls autoplay muted loop playsinline class="max-w-full max-h-64 rounded-lg"></video>'
@@ -684,14 +676,14 @@ async def list_runs(request: Request, page: int = 1, limit: int = 20):
''')
# Show input and output thumbnails
has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists()
has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists()
has_input = run.inputs and cache_manager.has_content(run.inputs[0])
has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash)
if has_input or has_output:
html_parts.append('<div class="grid gap-4 sm:grid-cols-2">')
if has_input:
input_hash = run.inputs[0]
input_media_type = detect_media_type(CACHE_DIR / input_hash)
input_media_type = detect_media_type(get_cache_path(input_hash))
html_parts.append(f'''
<div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Input</div>
@@ -705,7 +697,7 @@ async def list_runs(request: Request, page: int = 1, limit: int = 20):
if has_output:
output_hash = run.output_hash
output_media_type = detect_media_type(CACHE_DIR / output_hash)
output_media_type = detect_media_type(get_cache_path(output_hash))
html_parts.append(f'''
<div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Output</div>
@@ -767,38 +759,22 @@ async def list_runs(request: Request, page: int = 1, limit: int = 20):
@app.get("/cache/{content_hash}")
async def get_cached(content_hash: str):
"""Get cached content by hash."""
# Try cache_manager first (new location)
cache_path = cache_manager.get_by_content_hash(content_hash)
# Fallback to legacy location
cache_path = get_cache_path(content_hash)
if not cache_path:
legacy_path = CACHE_DIR / content_hash
if legacy_path.exists():
cache_path = legacy_path
if not cache_path or not cache_path.exists():
raise HTTPException(404, f"Content {content_hash} not in cache")
return FileResponse(cache_path)
@app.get("/cache/{content_hash}/mp4")
async def get_cached_mp4(content_hash: str):
"""Get cached content as MP4 (transcodes MKV on first request, caches result)."""
# Try cache_manager first (new location)
cache_path = cache_manager.get_by_content_hash(content_hash)
# Fallback to legacy location
cache_path = get_cache_path(content_hash)
if not cache_path:
legacy_path = CACHE_DIR / content_hash
if legacy_path.exists():
cache_path = legacy_path
mp4_path = CACHE_DIR / f"{content_hash}.mp4"
if not cache_path or not cache_path.exists():
raise HTTPException(404, f"Content {content_hash} not in cache")
# MP4 transcodes stored alongside original in CACHE_DIR
mp4_path = CACHE_DIR / f"{content_hash}.mp4"
# If MP4 already cached, serve it
if mp4_path.exists():
return FileResponse(mp4_path, media_type="video/mp4")
@@ -856,8 +832,8 @@ async def cache_detail(content_hash: str, request: Request):
"""View cached content detail. HTML for browsers, JSON for APIs."""
current_user = get_user_from_cookie(request)
cache_path = CACHE_DIR / content_hash
if not cache_path.exists():
cache_path = get_cache_path(content_hash)
if not cache_path:
if wants_html(request):
content = f'<p class="text-red-400">Content not found: {content_hash}</p>'
return HTMLResponse(render_page("Not Found", content, current_user, active_tab="cache"), status_code=404)
@@ -1001,8 +977,8 @@ async def ui_cache_meta_form(content_hash: str, request: Request):
pin_reason = meta.get("pin_reason", "")
# Detect media type for publish
cache_path = CACHE_DIR / content_hash
media_type = detect_media_type(cache_path) if cache_path.exists() else "unknown"
cache_path = get_cache_path(content_hash)
media_type = detect_media_type(cache_path) if cache_path else "unknown"
asset_type = "video" if media_type == "video" else "image"
# Origin radio checked states
@@ -1404,8 +1380,8 @@ async def list_cache(
html_parts = []
for item in items_page:
content_hash = item["hash"]
cache_path = CACHE_DIR / content_hash
media_type = detect_media_type(cache_path)
cache_path = get_cache_path(content_hash)
media_type = detect_media_type(cache_path) if cache_path else "unknown"
# Format size
size = item["size"]
@@ -1509,11 +1485,9 @@ async def discard_cache(content_hash: str, username: str = Depends(get_required_
- Cannot delete inputs/outputs of activities (runs)
- Cannot delete pinned items
"""
# Check if content exists (in cache_manager or legacy location)
# Check if content exists
if not cache_manager.has_content(content_hash):
cache_path = CACHE_DIR / content_hash
if not cache_path.exists():
raise HTTPException(404, "Content not found")
raise HTTPException(404, "Content not found")
# Check ownership
user_hashes = get_user_cache_hashes(username)
@@ -1535,8 +1509,8 @@ async def discard_cache(content_hash: str, username: str = Depends(get_required_
success, msg = cache_manager.delete_by_content_hash(content_hash)
if not success:
# Fallback to legacy deletion
cache_path = CACHE_DIR / content_hash
if cache_path.exists():
cache_path = get_cache_path(content_hash)
if cache_path and cache_path.exists():
cache_path.unlink()
# Clean up legacy metadata files
@@ -1564,9 +1538,7 @@ async def ui_discard_cache(content_hash: str, request: Request):
# Check if content exists
if not cache_manager.has_content(content_hash):
cache_path = CACHE_DIR / content_hash
if not cache_path.exists():
return '<div class="bg-red-900/50 border border-red-700 text-red-300 px-4 py-3 rounded-lg mb-4">Content not found</div>'
return '<div class="bg-red-900/50 border border-red-700 text-red-300 px-4 py-3 rounded-lg mb-4">Content not found</div>'
# Check if pinned (legacy metadata)
meta = load_cache_meta(content_hash)
@@ -1583,8 +1555,8 @@ async def ui_discard_cache(content_hash: str, request: Request):
success, msg = cache_manager.delete_by_content_hash(content_hash)
if not success:
# Fallback to legacy deletion
cache_path = CACHE_DIR / content_hash
if cache_path.exists():
cache_path = get_cache_path(content_hash)
if cache_path and cache_path.exists():
cache_path.unlink()
# Clean up legacy metadata files
@@ -1718,15 +1690,9 @@ async def upload_to_cache(file: UploadFile = File(...), username: str = Depends(
tmp.write(content)
tmp_path = Path(tmp.name)
# Hash and move to cache
content_hash = file_hash(tmp_path)
cache_path = CACHE_DIR / content_hash
if not cache_path.exists():
import shutil
shutil.move(str(tmp_path), cache_path)
else:
tmp_path.unlink()
# Store in cache via cache_manager
cached = cache_manager.put(tmp_path, node_type="upload", move=True)
content_hash = cached.content_hash
# Save uploader metadata
actor_id = f"@{username}@{L2_DOMAIN}"
@@ -1754,8 +1720,8 @@ class PublishRequest(BaseModel):
async def get_cache_meta(content_hash: str, username: str = Depends(get_required_user)):
"""Get metadata for a cached file."""
# Check file exists
cache_path = CACHE_DIR / content_hash
if not cache_path.exists():
cache_path = get_cache_path(content_hash)
if not cache_path:
raise HTTPException(404, "Content not found")
# Check ownership
@@ -1770,8 +1736,8 @@ async def get_cache_meta(content_hash: str, username: str = Depends(get_required
async def update_cache_meta(content_hash: str, update: CacheMetaUpdate, username: str = Depends(get_required_user)):
"""Update metadata for a cached file."""
# Check file exists
cache_path = CACHE_DIR / content_hash
if not cache_path.exists():
cache_path = get_cache_path(content_hash)
if not cache_path:
raise HTTPException(404, "Content not found")
# Check ownership
@@ -1819,8 +1785,8 @@ async def publish_cache_to_l2(
Requires origin to be set in metadata before publishing.
"""
# Check file exists
cache_path = CACHE_DIR / content_hash
if not cache_path.exists():
cache_path = get_cache_path(content_hash)
if not cache_path:
raise HTTPException(404, "Content not found")
# Check ownership
@@ -1907,8 +1873,8 @@ async def republish_cache_to_l2(
Only works for already-published items.
"""
# Check file exists
cache_path = CACHE_DIR / content_hash
if not cache_path.exists():
cache_path = get_cache_path(content_hash)
if not cache_path:
raise HTTPException(404, "Content not found")
# Check ownership
@@ -2547,8 +2513,8 @@ async def ui_runs(request: Request):
''')
# Show input and output side by side
has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists()
has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists()
has_input = run.inputs and cache_manager.has_content(run.inputs[0])
has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash)
if has_input or has_output:
html_parts.append('<div class="grid gap-4 sm:grid-cols-2">')
@@ -2556,7 +2522,7 @@ async def ui_runs(request: Request):
# Input box
if has_input:
input_hash = run.inputs[0]
input_media_type = detect_media_type(CACHE_DIR / input_hash)
input_media_type = detect_media_type(get_cache_path(input_hash))
html_parts.append(f'''
<div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Input: {input_hash[:16]}...</div>
@@ -2572,7 +2538,7 @@ async def ui_runs(request: Request):
# Output box
if has_output:
output_hash = run.output_hash
output_media_type = detect_media_type(CACHE_DIR / output_hash)
output_media_type = detect_media_type(get_cache_path(output_hash))
html_parts.append(f'''
<div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Output: {output_hash[:16]}...</div>
@@ -2666,8 +2632,8 @@ async def ui_cache_list(
for item in cache_items[:50]: # Limit to 50 items
content_hash = item["hash"]
cache_path = CACHE_DIR / content_hash
media_type = detect_media_type(cache_path)
cache_path = get_cache_path(content_hash)
media_type = detect_media_type(cache_path) if cache_path else "unknown"
# Format size
size = item["size"]
@@ -2770,15 +2736,15 @@ async def ui_run_partial(run_id: str, request: Request):
'''
# Show input and output side by side
has_input = run.inputs and (CACHE_DIR / run.inputs[0]).exists()
has_output = run.status == "completed" and run.output_hash and (CACHE_DIR / run.output_hash).exists()
has_input = run.inputs and cache_manager.has_content(run.inputs[0])
has_output = run.status == "completed" and run.output_hash and cache_manager.has_content(run.output_hash)
if has_input or has_output:
html += '<div class="grid gap-4 sm:grid-cols-2">'
if has_input:
input_hash = run.inputs[0]
input_media_type = detect_media_type(CACHE_DIR / input_hash)
input_media_type = detect_media_type(get_cache_path(input_hash))
html += f'''
<div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Input: {input_hash[:16]}...</div>
@@ -2793,7 +2759,7 @@ async def ui_run_partial(run_id: str, request: Request):
if has_output:
output_hash = run.output_hash
output_media_type = detect_media_type(CACHE_DIR / output_hash)
output_media_type = detect_media_type(get_cache_path(output_hash))
html += f'''
<div class="bg-dark-600 rounded-lg p-3">
<div class="text-xs text-gray-400 mb-2">Output: {output_hash[:16]}...</div>