Refactor storage: remove Redis duplication, use proper data tiers

- Recipes: Now content-addressed only (cache + IPFS), removed Redis storage
- Runs: Completed runs stored in PostgreSQL, Redis only for task_id mapping
- Add list_runs_by_actor() to database.py for paginated run queries
- Add list_by_type() to cache_manager for filtering by node_type
- Fix upload endpoint to return size and filename fields
- Fix recipe run endpoint with proper DAG input binding
- Fix get_run_service() dependency to pass database module

Storage architecture:
- Redis: Ephemeral only (sessions, task mappings with TTL)
- PostgreSQL: Permanent records (completed runs, metadata)
- Cache: Content-addressed files (recipes, media, outputs)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-01-11 14:05:31 +00:00
parent 8591faf0fc
commit 854396680f
8 changed files with 965 additions and 264 deletions

View File

@@ -1,9 +1,14 @@
"""
Recipe Service - business logic for recipe management.
Recipes are content-addressed YAML files stored in the cache (and IPFS).
The recipe ID is the content hash of the YAML file.
"""
from typing import Optional, List, Dict, Any
import json
import tempfile
from pathlib import Path
from typing import Optional, List, Dict, Any, Tuple
import yaml
@@ -11,49 +16,54 @@ class RecipeService:
"""
Service for managing recipes.
Handles recipe parsing, validation, and DAG building.
Recipes are stored in the content-addressed cache, not Redis.
"""
def __init__(self, redis, cache):
# Redis kept for compatibility but not used for recipe storage
self.redis = redis
self.cache = cache
self.recipe_prefix = "recipe:"
async def get_recipe(self, recipe_id: str) -> Optional[Dict[str, Any]]:
"""Get a recipe by ID (content hash)."""
# First check Redis
data = self.redis.get(f"{self.recipe_prefix}{recipe_id}")
if data:
return json.loads(data)
# Fall back to cache
# Get from cache (content-addressed storage)
path = self.cache.get_by_content_hash(recipe_id)
if path and path.exists():
with open(path) as f:
return yaml.safe_load(f)
if not path or not path.exists():
return None
return None
with open(path) as f:
recipe_data = yaml.safe_load(f)
# Add the recipe_id to the data for convenience
if isinstance(recipe_data, dict):
recipe_data["recipe_id"] = recipe_id
# Get IPFS CID if available
ipfs_cid = self.cache.get_ipfs_cid(recipe_id)
if ipfs_cid:
recipe_data["ipfs_cid"] = ipfs_cid
return recipe_data
async def list_recipes(self, actor_id: str = None, offset: int = 0, limit: int = 20) -> list:
"""List available recipes with pagination."""
recipes = []
cursor = 0
"""
List available recipes.
while True:
cursor, keys = self.redis.scan(
cursor=cursor,
match=f"{self.recipe_prefix}*",
count=100
)
for key in keys:
data = self.redis.get(key)
if data:
recipe = json.loads(data)
Note: This scans the cache for recipe files. For production,
you might want a database index of recipes by owner.
"""
# Get all cached items and filter for recipes
# This is a simplified implementation - production would use a proper index
recipes = []
# Check if cache has a list method for recipes
if hasattr(self.cache, 'list_by_type'):
items = self.cache.list_by_type('recipe')
for content_hash in items:
recipe = await self.get_recipe(content_hash)
if recipe:
# Filter by actor if specified
if actor_id is None or recipe.get("actor_id") == actor_id:
if actor_id is None or recipe.get("uploader") == actor_id:
recipes.append(recipe)
if cursor == 0:
break
# Sort by name
recipes.sort(key=lambda r: r.get("name", ""))
@@ -61,13 +71,86 @@ class RecipeService:
# Paginate
return recipes[offset:offset + limit]
async def save_recipe(self, recipe_id: str, recipe_data: Dict[str, Any]) -> None:
"""Save a recipe to Redis."""
self.redis.set(f"{self.recipe_prefix}{recipe_id}", json.dumps(recipe_data))
async def upload_recipe(
self,
yaml_content: str,
uploader: str,
name: str = None,
description: str = None,
) -> Tuple[Optional[str], Optional[str]]:
"""
Upload a recipe from YAML content.
async def delete_recipe(self, recipe_id: str) -> bool:
"""Delete a recipe."""
return self.redis.delete(f"{self.recipe_prefix}{recipe_id}") > 0
The recipe is stored in the cache and optionally pinned to IPFS.
Returns (recipe_id, error_message).
"""
# Validate YAML
try:
recipe_data = yaml.safe_load(yaml_content)
except yaml.YAMLError as e:
return None, f"Invalid YAML: {e}"
if not isinstance(recipe_data, dict):
return None, "Recipe must be a YAML dictionary"
# Add uploader info to the YAML before storing
recipe_data["uploader"] = uploader
if name:
recipe_data["name"] = name
if description:
recipe_data["description"] = description
# Serialize back to YAML (with added metadata)
final_yaml = yaml.dump(recipe_data, default_flow_style=False)
# Write to temp file for caching
try:
with tempfile.NamedTemporaryFile(delete=False, suffix=".yaml", mode="w") as tmp:
tmp.write(final_yaml)
tmp_path = Path(tmp.name)
# Store in cache (content-addressed, auto-pins to IPFS)
cached, ipfs_cid = self.cache.put(tmp_path, node_type="recipe", move=True)
recipe_id = cached.content_hash
return recipe_id, None
except Exception as e:
return None, f"Failed to cache recipe: {e}"
async def delete_recipe(self, recipe_id: str, actor_id: str = None) -> Tuple[bool, Optional[str]]:
"""
Delete a recipe.
Note: This only removes from local cache. IPFS copies persist.
Returns (success, error_message).
"""
# Get recipe to check ownership
recipe = await self.get_recipe(recipe_id)
if not recipe:
return False, "Recipe not found"
# Check ownership if actor_id provided
if actor_id:
recipe_owner = recipe.get("uploader")
if recipe_owner and recipe_owner != actor_id:
return False, "Cannot delete: you don't own this recipe"
# Delete from cache
try:
if hasattr(self.cache, 'delete_by_content_hash'):
success, msg = self.cache.delete_by_content_hash(recipe_id)
if not success:
return False, msg
else:
# Fallback: get path and delete directly
path = self.cache.get_by_content_hash(recipe_id)
if path and path.exists():
path.unlink()
return True, None
except Exception as e:
return False, f"Failed to delete: {e}"
def parse_yaml(self, yaml_content: str) -> Dict[str, Any]:
"""Parse recipe YAML content."""