Refactor storage: remove Redis duplication, use proper data tiers
- Recipes: Now content-addressed only (cache + IPFS), removed Redis storage - Runs: Completed runs stored in PostgreSQL, Redis only for task_id mapping - Add list_runs_by_actor() to database.py for paginated run queries - Add list_by_type() to cache_manager for filtering by node_type - Fix upload endpoint to return size and filename fields - Fix recipe run endpoint with proper DAG input binding - Fix get_run_service() dependency to pass database module Storage architecture: - Redis: Ephemeral only (sessions, task mappings with TTL) - PostgreSQL: Permanent records (completed runs, metadata) - Cache: Content-addressed files (recipes, media, outputs) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -2,10 +2,81 @@
|
||||
Cache Service - business logic for cache and media management.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Optional, List, Dict, Any
|
||||
from typing import Optional, List, Dict, Any, Tuple
|
||||
|
||||
from artdag_common.utils.media import detect_media_type, get_mime_type
|
||||
import httpx
|
||||
|
||||
|
||||
def detect_media_type(cache_path: Path) -> str:
|
||||
"""Detect if file is image, video, or audio based on magic bytes."""
|
||||
try:
|
||||
with open(cache_path, "rb") as f:
|
||||
header = f.read(32)
|
||||
except Exception:
|
||||
return "unknown"
|
||||
|
||||
# Video signatures
|
||||
if header[:4] == b'\x1a\x45\xdf\xa3': # WebM/MKV
|
||||
return "video"
|
||||
if len(header) > 8 and header[4:8] == b'ftyp': # MP4/MOV
|
||||
return "video"
|
||||
if header[:4] == b'RIFF' and len(header) > 12 and header[8:12] == b'AVI ': # AVI
|
||||
return "video"
|
||||
|
||||
# Image signatures
|
||||
if header[:8] == b'\x89PNG\r\n\x1a\n': # PNG
|
||||
return "image"
|
||||
if header[:2] == b'\xff\xd8': # JPEG
|
||||
return "image"
|
||||
if header[:6] in (b'GIF87a', b'GIF89a'): # GIF
|
||||
return "image"
|
||||
if header[:4] == b'RIFF' and len(header) > 12 and header[8:12] == b'WEBP': # WebP
|
||||
return "image"
|
||||
|
||||
# Audio signatures
|
||||
if header[:4] == b'RIFF' and len(header) > 12 and header[8:12] == b'WAVE': # WAV
|
||||
return "audio"
|
||||
if header[:3] == b'ID3' or header[:2] == b'\xff\xfb': # MP3
|
||||
return "audio"
|
||||
if header[:4] == b'fLaC': # FLAC
|
||||
return "audio"
|
||||
|
||||
return "unknown"
|
||||
|
||||
|
||||
def get_mime_type(path: Path) -> str:
|
||||
"""Get MIME type based on file magic bytes."""
|
||||
media_type = detect_media_type(path)
|
||||
if media_type == "video":
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
header = f.read(12)
|
||||
if header[:4] == b'\x1a\x45\xdf\xa3':
|
||||
return "video/x-matroska"
|
||||
return "video/mp4"
|
||||
except Exception:
|
||||
return "video/mp4"
|
||||
elif media_type == "image":
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
header = f.read(8)
|
||||
if header[:8] == b'\x89PNG\r\n\x1a\n':
|
||||
return "image/png"
|
||||
if header[:2] == b'\xff\xd8':
|
||||
return "image/jpeg"
|
||||
if header[:6] in (b'GIF87a', b'GIF89a'):
|
||||
return "image/gif"
|
||||
return "image/jpeg"
|
||||
except Exception:
|
||||
return "image/jpeg"
|
||||
elif media_type == "audio":
|
||||
return "audio/mpeg"
|
||||
return "application/octet-stream"
|
||||
|
||||
|
||||
class CacheService:
|
||||
@@ -15,18 +86,24 @@ class CacheService:
|
||||
Handles content retrieval, metadata, and media type detection.
|
||||
"""
|
||||
|
||||
def __init__(self, cache_manager, database):
|
||||
self.cache = cache_manager
|
||||
def __init__(self, database, cache_manager):
|
||||
self.db = database
|
||||
self.cache = cache_manager
|
||||
self.cache_dir = Path(os.environ.get("CACHE_DIR", "/tmp/artdag-cache"))
|
||||
|
||||
async def get_item(self, content_hash: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached item by content hash."""
|
||||
path = self.cache.get_by_content_hash(content_hash)
|
||||
async def get_cache_item(self, content_hash: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get cached item with full metadata for display."""
|
||||
# Check if content exists
|
||||
if not self.cache.has_content(content_hash):
|
||||
return None
|
||||
|
||||
path = self.cache.get_content_path(content_hash)
|
||||
if not path or not path.exists():
|
||||
return None
|
||||
|
||||
# Get metadata from database
|
||||
meta = await self.db.get_cache_item(content_hash)
|
||||
meta = await self.db.load_item_metadata(content_hash, None)
|
||||
cache_item = await self.db.get_cache_item(content_hash)
|
||||
|
||||
media_type = detect_media_type(path)
|
||||
mime_type = get_mime_type(path)
|
||||
@@ -38,76 +115,370 @@ class CacheService:
|
||||
"media_type": media_type,
|
||||
"mime_type": mime_type,
|
||||
"size": size,
|
||||
"name": meta.get("name") if meta else None,
|
||||
"description": meta.get("description") if meta else None,
|
||||
"tags": meta.get("tags", []) if meta else [],
|
||||
"ipfs_cid": meta.get("ipfs_cid") if meta else None,
|
||||
"ipfs_cid": cache_item.get("ipfs_cid") if cache_item else None,
|
||||
"meta": meta,
|
||||
}
|
||||
|
||||
async def get_path(self, content_hash: str) -> Optional[Path]:
|
||||
"""Get the file path for cached content."""
|
||||
return self.cache.get_by_content_hash(content_hash)
|
||||
async def check_access(self, content_hash: str, actor_id: str, username: str) -> bool:
|
||||
"""Check if user has access to content."""
|
||||
user_hashes = await self._get_user_cache_hashes(username, actor_id)
|
||||
return content_hash in user_hashes
|
||||
|
||||
async def list_items(
|
||||
self,
|
||||
actor_id: str = None,
|
||||
media_type: str = None,
|
||||
page: int = 1,
|
||||
limit: int = 20,
|
||||
) -> Dict[str, Any]:
|
||||
"""List cached items with filters and pagination."""
|
||||
# Get items from database
|
||||
items = await self.db.list_cache_items(
|
||||
actor_id=actor_id,
|
||||
media_type=media_type,
|
||||
offset=(page - 1) * limit,
|
||||
limit=limit,
|
||||
)
|
||||
async def _get_user_cache_hashes(self, username: str, actor_id: Optional[str] = None) -> set:
|
||||
"""Get all cache hashes owned by or associated with a user."""
|
||||
match_values = [username]
|
||||
if actor_id:
|
||||
match_values.append(actor_id)
|
||||
|
||||
total = await self.db.count_cache_items(actor_id=actor_id, media_type=media_type)
|
||||
hashes = set()
|
||||
|
||||
return {
|
||||
"items": items,
|
||||
"pagination": {
|
||||
"page": page,
|
||||
"limit": limit,
|
||||
"total": total,
|
||||
"has_more": page * limit < total,
|
||||
}
|
||||
}
|
||||
# Query database for items owned by user
|
||||
if actor_id:
|
||||
try:
|
||||
db_items = await self.db.get_user_items(actor_id)
|
||||
for item in db_items:
|
||||
hashes.add(item["content_hash"])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Legacy: Files uploaded by user (JSON metadata)
|
||||
if self.cache_dir.exists():
|
||||
for f in self.cache_dir.iterdir():
|
||||
if f.name.endswith('.meta.json'):
|
||||
try:
|
||||
with open(f, 'r') as mf:
|
||||
meta = json.load(mf)
|
||||
if meta.get("uploader") in match_values:
|
||||
hashes.add(f.name.replace('.meta.json', ''))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Files from user's runs (inputs and outputs)
|
||||
runs = await self._list_user_runs(username, actor_id)
|
||||
for run in runs:
|
||||
inputs = run.get("inputs", [])
|
||||
if isinstance(inputs, dict):
|
||||
inputs = list(inputs.values())
|
||||
hashes.update(inputs)
|
||||
if run.get("output_hash"):
|
||||
hashes.add(run["output_hash"])
|
||||
|
||||
return hashes
|
||||
|
||||
async def _list_user_runs(self, username: str, actor_id: Optional[str]) -> List[Dict]:
|
||||
"""List runs for a user (helper for access check)."""
|
||||
from ..dependencies import get_redis_client
|
||||
import json
|
||||
|
||||
redis = get_redis_client()
|
||||
runs = []
|
||||
cursor = 0
|
||||
prefix = "artdag:run:"
|
||||
|
||||
while True:
|
||||
cursor, keys = redis.scan(cursor=cursor, match=f"{prefix}*", count=100)
|
||||
for key in keys:
|
||||
data = redis.get(key)
|
||||
if data:
|
||||
run = json.loads(data)
|
||||
if run.get("actor_id") in (username, actor_id) or run.get("username") in (username, actor_id):
|
||||
runs.append(run)
|
||||
if cursor == 0:
|
||||
break
|
||||
|
||||
return runs
|
||||
|
||||
async def get_raw_file(self, content_hash: str) -> Tuple[Optional[Path], Optional[str], Optional[str]]:
|
||||
"""Get raw file path, media type, and filename for download."""
|
||||
if not self.cache.has_content(content_hash):
|
||||
return None, None, None
|
||||
|
||||
path = self.cache.get_content_path(content_hash)
|
||||
if not path or not path.exists():
|
||||
return None, None, None
|
||||
|
||||
media_type = detect_media_type(path)
|
||||
mime = get_mime_type(path)
|
||||
|
||||
# Determine extension
|
||||
ext = "bin"
|
||||
if media_type == "video":
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
header = f.read(12)
|
||||
if header[:4] == b'\x1a\x45\xdf\xa3':
|
||||
ext = "mkv"
|
||||
else:
|
||||
ext = "mp4"
|
||||
except Exception:
|
||||
ext = "mp4"
|
||||
elif media_type == "image":
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
header = f.read(8)
|
||||
if header[:8] == b'\x89PNG\r\n\x1a\n':
|
||||
ext = "png"
|
||||
else:
|
||||
ext = "jpg"
|
||||
except Exception:
|
||||
ext = "jpg"
|
||||
|
||||
filename = f"{content_hash}.{ext}"
|
||||
return path, mime, filename
|
||||
|
||||
async def get_as_mp4(self, content_hash: str) -> Tuple[Optional[Path], Optional[str]]:
|
||||
"""Get content as MP4, transcoding if necessary. Returns (path, error)."""
|
||||
if not self.cache.has_content(content_hash):
|
||||
return None, f"Content {content_hash} not in cache"
|
||||
|
||||
path = self.cache.get_content_path(content_hash)
|
||||
if not path or not path.exists():
|
||||
return None, f"Content {content_hash} not in cache"
|
||||
|
||||
# Check if video
|
||||
media_type = detect_media_type(path)
|
||||
if media_type != "video":
|
||||
return None, "Content is not a video"
|
||||
|
||||
# Check for cached MP4
|
||||
mp4_path = self.cache_dir / f"{content_hash}.mp4"
|
||||
if mp4_path.exists():
|
||||
return mp4_path, None
|
||||
|
||||
# Check if already MP4 format
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ffprobe", "-v", "error", "-select_streams", "v:0",
|
||||
"-show_entries", "format=format_name", "-of", "csv=p=0", str(path)],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
if "mp4" in result.stdout.lower() or "mov" in result.stdout.lower():
|
||||
return path, None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Transcode to MP4
|
||||
transcode_path = self.cache_dir / f"{content_hash}.transcoding.mp4"
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["ffmpeg", "-y", "-i", str(path),
|
||||
"-c:v", "libx264", "-preset", "fast", "-crf", "23",
|
||||
"-c:a", "aac", "-b:a", "128k",
|
||||
"-movflags", "+faststart",
|
||||
str(transcode_path)],
|
||||
capture_output=True, text=True, timeout=600
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return None, f"Transcoding failed: {result.stderr[:200]}"
|
||||
|
||||
transcode_path.rename(mp4_path)
|
||||
return mp4_path, None
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
if transcode_path.exists():
|
||||
transcode_path.unlink()
|
||||
return None, "Transcoding timed out"
|
||||
except Exception as e:
|
||||
if transcode_path.exists():
|
||||
transcode_path.unlink()
|
||||
return None, f"Transcoding failed: {e}"
|
||||
|
||||
async def get_metadata(self, content_hash: str, actor_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Get content metadata."""
|
||||
if not self.cache.has_content(content_hash):
|
||||
return None
|
||||
return await self.db.load_item_metadata(content_hash, actor_id)
|
||||
|
||||
async def update_metadata(
|
||||
self,
|
||||
content_hash: str,
|
||||
name: str = None,
|
||||
actor_id: str,
|
||||
title: str = None,
|
||||
description: str = None,
|
||||
tags: List[str] = None,
|
||||
) -> bool:
|
||||
"""Update item metadata."""
|
||||
return await self.db.update_cache_metadata(
|
||||
custom: Dict[str, Any] = None,
|
||||
) -> Tuple[bool, Optional[str]]:
|
||||
"""Update content metadata. Returns (success, error)."""
|
||||
if not self.cache.has_content(content_hash):
|
||||
return False, "Content not found"
|
||||
|
||||
# Build update dict
|
||||
updates = {}
|
||||
if title is not None:
|
||||
updates["title"] = title
|
||||
if description is not None:
|
||||
updates["description"] = description
|
||||
if tags is not None:
|
||||
updates["tags"] = tags
|
||||
if custom is not None:
|
||||
updates["custom"] = custom
|
||||
|
||||
try:
|
||||
await self.db.update_item_metadata(content_hash, actor_id, **updates)
|
||||
return True, None
|
||||
except Exception as e:
|
||||
return False, str(e)
|
||||
|
||||
async def publish_to_l2(
|
||||
self,
|
||||
content_hash: str,
|
||||
actor_id: str,
|
||||
l2_server: str,
|
||||
auth_token: str,
|
||||
) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""Publish content to L2 and IPFS. Returns (ipfs_cid, error)."""
|
||||
if not self.cache.has_content(content_hash):
|
||||
return None, "Content not found"
|
||||
|
||||
# Get IPFS CID
|
||||
cache_item = await self.db.get_cache_item(content_hash)
|
||||
ipfs_cid = cache_item.get("ipfs_cid") if cache_item else None
|
||||
|
||||
# Get metadata for origin info
|
||||
meta = await self.db.load_item_metadata(content_hash, actor_id)
|
||||
origin = meta.get("origin") if meta else None
|
||||
|
||||
if not origin or "type" not in origin:
|
||||
return None, "Origin must be set before publishing"
|
||||
|
||||
if not auth_token:
|
||||
return None, "Authentication token required"
|
||||
|
||||
# Call L2 publish-cache endpoint
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=30) as client:
|
||||
resp = await client.post(
|
||||
f"{l2_server}/assets/publish-cache",
|
||||
headers={"Authorization": f"Bearer {auth_token}"},
|
||||
json={
|
||||
"content_hash": content_hash,
|
||||
"ipfs_cid": ipfs_cid,
|
||||
"asset_name": meta.get("title") or content_hash[:16],
|
||||
"asset_type": detect_media_type(self.cache.get_content_path(content_hash)),
|
||||
"origin": origin,
|
||||
"description": meta.get("description"),
|
||||
"tags": meta.get("tags", []),
|
||||
}
|
||||
)
|
||||
resp.raise_for_status()
|
||||
l2_result = resp.json()
|
||||
except httpx.HTTPStatusError as e:
|
||||
error_detail = str(e)
|
||||
try:
|
||||
error_detail = e.response.json().get("detail", str(e))
|
||||
except Exception:
|
||||
pass
|
||||
return None, f"L2 publish failed: {error_detail}"
|
||||
except Exception as e:
|
||||
return None, f"L2 publish failed: {e}"
|
||||
|
||||
# Update local metadata with publish status
|
||||
await self.db.save_l2_share(
|
||||
content_hash=content_hash,
|
||||
name=name,
|
||||
description=description,
|
||||
tags=tags,
|
||||
actor_id=actor_id,
|
||||
l2_server=l2_server,
|
||||
asset_name=meta.get("title") or content_hash[:16],
|
||||
content_type=detect_media_type(self.cache.get_content_path(content_hash))
|
||||
)
|
||||
await self.db.update_item_metadata(
|
||||
content_hash=content_hash,
|
||||
actor_id=actor_id,
|
||||
pinned=True,
|
||||
pin_reason="published"
|
||||
)
|
||||
|
||||
async def delete_item(self, content_hash: str) -> bool:
|
||||
"""Delete a cached item."""
|
||||
path = self.cache.get_by_content_hash(content_hash)
|
||||
if path and path.exists():
|
||||
path.unlink()
|
||||
return l2_result.get("ipfs_cid") or ipfs_cid, None
|
||||
|
||||
# Remove from database
|
||||
await self.db.delete_cache_item(content_hash)
|
||||
return True
|
||||
async def delete_content(self, content_hash: str, actor_id: str) -> Tuple[bool, Optional[str]]:
|
||||
"""Delete content from cache. Returns (success, error)."""
|
||||
if not self.cache.has_content(content_hash):
|
||||
return False, "Content not found"
|
||||
|
||||
def has_content(self, content_hash: str) -> bool:
|
||||
"""Check if content exists in cache."""
|
||||
return self.cache.has_content(content_hash)
|
||||
# Check if pinned
|
||||
meta = await self.db.load_item_metadata(content_hash, actor_id)
|
||||
if meta and meta.get("pinned"):
|
||||
pin_reason = meta.get("pin_reason", "unknown")
|
||||
return False, f"Cannot discard pinned item (reason: {pin_reason})"
|
||||
|
||||
def get_ipfs_cid(self, content_hash: str) -> Optional[str]:
|
||||
"""Get IPFS CID for cached content."""
|
||||
return self.cache.get_ipfs_cid(content_hash)
|
||||
# Check deletion rules via cache_manager
|
||||
can_delete, reason = self.cache.can_delete(content_hash)
|
||||
if not can_delete:
|
||||
return False, f"Cannot discard: {reason}"
|
||||
|
||||
# Delete via cache_manager
|
||||
success, msg = self.cache.delete_by_content_hash(content_hash)
|
||||
|
||||
# Clean up legacy metadata files
|
||||
meta_path = self.cache_dir / f"{content_hash}.meta.json"
|
||||
if meta_path.exists():
|
||||
meta_path.unlink()
|
||||
mp4_path = self.cache_dir / f"{content_hash}.mp4"
|
||||
if mp4_path.exists():
|
||||
mp4_path.unlink()
|
||||
|
||||
return True, None
|
||||
|
||||
async def import_from_ipfs(self, ipfs_cid: str, actor_id: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""Import content from IPFS. Returns (content_hash, error)."""
|
||||
try:
|
||||
import ipfs_client
|
||||
|
||||
# Download from IPFS
|
||||
legacy_dir = self.cache_dir / "legacy"
|
||||
legacy_dir.mkdir(parents=True, exist_ok=True)
|
||||
tmp_path = legacy_dir / f"import-{ipfs_cid[:16]}"
|
||||
|
||||
if not ipfs_client.get_file(ipfs_cid, str(tmp_path)):
|
||||
return None, f"Could not fetch CID {ipfs_cid} from IPFS"
|
||||
|
||||
# Store in cache
|
||||
cached, _ = self.cache.put(tmp_path, node_type="import", move=True)
|
||||
content_hash = cached.content_hash
|
||||
|
||||
# Save to database
|
||||
await self.db.create_cache_item(content_hash, ipfs_cid)
|
||||
await self.db.save_item_metadata(
|
||||
content_hash=content_hash,
|
||||
actor_id=actor_id,
|
||||
item_type="media",
|
||||
filename=f"ipfs-{ipfs_cid[:16]}"
|
||||
)
|
||||
|
||||
return content_hash, None
|
||||
except Exception as e:
|
||||
return None, f"Import failed: {e}"
|
||||
|
||||
async def upload_content(
|
||||
self,
|
||||
content: bytes,
|
||||
filename: str,
|
||||
actor_id: str,
|
||||
) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""Upload content to cache. Returns (content_hash, error)."""
|
||||
import tempfile
|
||||
|
||||
try:
|
||||
# Write to temp file
|
||||
with tempfile.NamedTemporaryFile(delete=False) as tmp:
|
||||
tmp.write(content)
|
||||
tmp_path = Path(tmp.name)
|
||||
|
||||
# Store in cache
|
||||
cached, ipfs_cid = self.cache.put(tmp_path, node_type="upload", move=True)
|
||||
content_hash = cached.content_hash
|
||||
|
||||
# Save to database
|
||||
await self.db.create_cache_item(content_hash, ipfs_cid)
|
||||
await self.db.save_item_metadata(
|
||||
content_hash=content_hash,
|
||||
actor_id=actor_id,
|
||||
item_type="media",
|
||||
filename=filename
|
||||
)
|
||||
|
||||
return content_hash, None
|
||||
except Exception as e:
|
||||
return None, f"Upload failed: {e}"
|
||||
|
||||
async def list_media(
|
||||
self,
|
||||
@@ -118,12 +489,20 @@ class CacheService:
|
||||
media_type: str = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""List media items in cache."""
|
||||
# Use list_items internally, converting offset to page
|
||||
page = (offset // limit) + 1 if limit > 0 else 1
|
||||
result = await self.list_items(
|
||||
# Get items from database
|
||||
items = await self.db.list_cache_items(
|
||||
actor_id=actor_id or username,
|
||||
media_type=media_type,
|
||||
page=page,
|
||||
offset=offset,
|
||||
limit=limit,
|
||||
)
|
||||
return result.get("items", [])
|
||||
return items
|
||||
|
||||
# Legacy compatibility methods
|
||||
def has_content(self, content_hash: str) -> bool:
|
||||
"""Check if content exists in cache."""
|
||||
return self.cache.has_content(content_hash)
|
||||
|
||||
def get_ipfs_cid(self, content_hash: str) -> Optional[str]:
|
||||
"""Get IPFS CID for cached content."""
|
||||
return self.cache.get_ipfs_cid(content_hash)
|
||||
|
||||
Reference in New Issue
Block a user