Fix completed runs not appearing in list + add purge-failed endpoint

- Update save_run_cache to also update actor_id, recipe, inputs on conflict
- Add logging for actor_id when saving runs to run_cache
- Add admin endpoint DELETE /runs/admin/purge-failed to delete all failed runs

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-02 23:24:39 +00:00
parent 581da68b3b
commit d20eef76ad
24 changed files with 1671 additions and 453 deletions

View File

@@ -2,9 +2,12 @@
#
# Tasks:
# 1. run_stream - Execute a streaming S-expression recipe
# 2. upload_to_ipfs - Background IPFS upload for media files
from .streaming import run_stream
from .ipfs_upload import upload_to_ipfs
__all__ = [
"run_stream",
"upload_to_ipfs",
]

83
tasks/ipfs_upload.py Normal file
View File

@@ -0,0 +1,83 @@
"""
Background IPFS upload task.
Uploads files to IPFS in the background after initial local storage.
This allows fast uploads while still getting IPFS CIDs eventually.
"""
import logging
import os
import sys
from pathlib import Path
from typing import Optional
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from celery_app import app
import ipfs_client
logger = logging.getLogger(__name__)
@app.task(bind=True, max_retries=3, default_retry_delay=60)
def upload_to_ipfs(self, local_cid: str, actor_id: str) -> Optional[str]:
"""
Upload a locally cached file to IPFS in the background.
Args:
local_cid: The local content hash of the file
actor_id: The user who uploaded the file
Returns:
IPFS CID if successful, None if failed
"""
from cache_manager import get_cache_manager
import asyncio
import database
logger.info(f"Background IPFS upload starting for {local_cid[:16]}...")
try:
cache_mgr = get_cache_manager()
# Get the file path from local cache
file_path = cache_mgr.get_by_cid(local_cid)
if not file_path or not file_path.exists():
logger.error(f"File not found for local CID {local_cid[:16]}...")
return None
# Upload to IPFS
logger.info(f"Uploading {file_path} to IPFS...")
ipfs_cid = ipfs_client.add_file(file_path)
if not ipfs_cid:
logger.error(f"IPFS upload failed for {local_cid[:16]}...")
raise self.retry(exc=Exception("IPFS upload failed"))
logger.info(f"IPFS upload successful: {local_cid[:16]}... -> {ipfs_cid[:16]}...")
# Update database with IPFS CID
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
# Initialize database pool if needed
loop.run_until_complete(database.init_pool())
# Update cache_items table
loop.run_until_complete(
database.update_cache_item_ipfs_cid(local_cid, ipfs_cid)
)
# Create index from IPFS CID to local cache
cache_mgr._set_content_index(ipfs_cid, local_cid)
logger.info(f"Database updated with IPFS CID for {local_cid[:16]}...")
finally:
loop.close()
return ipfs_cid
except Exception as e:
logger.error(f"Background IPFS upload error: {e}")
raise self.retry(exc=e)

View File

@@ -24,6 +24,11 @@ from cache_manager import get_cache_manager
logger = logging.getLogger(__name__)
# Module-level event loop for database operations
_resolve_loop = None
_db_initialized = False
def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
"""
Resolve an asset reference (CID or friendly name) to a file path.
@@ -35,6 +40,7 @@ def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
Returns:
Path to the asset file, or None if not found
"""
global _resolve_loop, _db_initialized
cache_mgr = get_cache_manager()
# Try as direct CID first
@@ -46,15 +52,22 @@ def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
# Try as friendly name if actor_id provided
if actor_id:
import asyncio
import database
from database import resolve_friendly_name
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
cid = loop.run_until_complete(resolve_friendly_name(actor_id, ref))
finally:
loop.close()
# Reuse event loop for database operations
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
_db_initialized = False
# Initialize database pool once per loop
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
cid = _resolve_loop.run_until_complete(resolve_friendly_name(actor_id, ref))
if cid:
path = cache_mgr.get_by_cid(cid)
@@ -173,6 +186,7 @@ def create_cid_primitives(actor_id: Optional[str] = None):
@app.task(bind=True, name='tasks.run_stream')
def run_stream(
self,
run_id: str,
recipe_sexp: str,
output_name: str = "output.mp4",
duration: Optional[float] = None,
@@ -185,6 +199,7 @@ def run_stream(
Execute a streaming S-expression recipe.
Args:
run_id: The run ID for database tracking
recipe_sexp: The recipe S-expression content
output_name: Name for the output file
duration: Optional duration override (seconds)
@@ -197,7 +212,7 @@ def run_stream(
Dict with output_cid, output_path, and status
"""
task_id = self.request.id
logger.info(f"Starting stream task {task_id}")
logger.info(f"Starting stream task {task_id} for run {run_id}")
self.update_state(state='INITIALIZING', meta={'progress': 0})
@@ -237,8 +252,8 @@ def run_stream(
# Import the streaming interpreter
from streaming.stream_sexp_generic import StreamInterpreter
# Create interpreter
interp = StreamInterpreter(str(recipe_path))
# Create interpreter (pass actor_id for friendly name resolution)
interp = StreamInterpreter(str(recipe_path), actor_id=actor_id)
# Set primitive library directory explicitly
interp.primitive_lib_dir = sexp_effects_dir / "primitive_libs"
@@ -258,8 +273,17 @@ def run_stream(
logger.info(f"Rendering to {output_path}")
interp.run(duration=duration, output=str(output_path))
# Check for interpreter errors
if interp.errors:
error_msg = f"Rendering failed with {len(interp.errors)} errors: {interp.errors[0]}"
raise RuntimeError(error_msg)
self.update_state(state='CACHING', meta={'progress': 90})
# Validate output file (must be > 1KB to have actual frames)
if output_path.exists() and output_path.stat().st_size < 1024:
raise RuntimeError(f"Output file is too small ({output_path.stat().st_size} bytes) - rendering likely failed")
# Store output in cache
if output_path.exists():
cache_mgr = get_cache_manager()
@@ -271,16 +295,73 @@ def run_stream(
logger.info(f"Stream output cached: CID={cached_file.cid}, IPFS={ipfs_cid}")
# Save to database
import asyncio
import database
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
# Initialize database pool if needed
if database.pool is None:
loop.run_until_complete(database.init_db())
# Get recipe CID from pending_run
pending = loop.run_until_complete(database.get_pending_run(run_id))
recipe_cid = pending.get("recipe", "streaming") if pending else "streaming"
# Save to run_cache for completed runs
logger.info(f"Saving run {run_id} to run_cache with actor_id={actor_id}")
loop.run_until_complete(database.save_run_cache(
run_id=run_id,
output_cid=cached_file.cid,
recipe=recipe_cid,
inputs=[],
ipfs_cid=ipfs_cid,
actor_id=actor_id,
))
# Update pending run status
loop.run_until_complete(database.update_pending_run_status(
run_id=run_id,
status="completed",
))
logger.info(f"Saved run {run_id} to database with actor_id={actor_id}")
except Exception as db_err:
logger.warning(f"Failed to save run to database: {db_err}")
finally:
loop.close()
return {
"status": "completed",
"run_id": run_id,
"task_id": task_id,
"output_cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"output_path": str(cached_file.path),
}
else:
# Update pending run status to failed
import asyncio
import database
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
if database.pool is None:
loop.run_until_complete(database.init_db())
loop.run_until_complete(database.update_pending_run_status(
run_id=run_id,
status="failed",
error="Output file not created",
))
except Exception as db_err:
logger.warning(f"Failed to update run status: {db_err}")
finally:
loop.close()
return {
"status": "failed",
"run_id": run_id,
"task_id": task_id,
"error": "Output file not created",
}
@@ -290,8 +371,28 @@ def run_stream(
import traceback
traceback.print_exc()
# Update pending run status to failed
import asyncio
import database
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
if database.pool is None:
loop.run_until_complete(database.init_db())
loop.run_until_complete(database.update_pending_run_status(
run_id=run_id,
status="failed",
error=str(e),
))
except Exception as db_err:
logger.warning(f"Failed to update run status: {db_err}")
finally:
loop.close()
return {
"status": "failed",
"run_id": run_id,
"task_id": task_id,
"error": str(e),
}