Fix completed runs not appearing in list + add purge-failed endpoint

- Update save_run_cache to also update actor_id, recipe, inputs on conflict
- Add logging for actor_id when saving runs to run_cache
- Add admin endpoint DELETE /runs/admin/purge-failed to delete all failed runs

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-02 23:24:39 +00:00
parent 581da68b3b
commit d20eef76ad
24 changed files with 1671 additions and 453 deletions

View File

@@ -24,6 +24,11 @@ from cache_manager import get_cache_manager
logger = logging.getLogger(__name__)
# Module-level event loop for database operations
_resolve_loop = None
_db_initialized = False
def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
"""
Resolve an asset reference (CID or friendly name) to a file path.
@@ -35,6 +40,7 @@ def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
Returns:
Path to the asset file, or None if not found
"""
global _resolve_loop, _db_initialized
cache_mgr = get_cache_manager()
# Try as direct CID first
@@ -46,15 +52,22 @@ def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
# Try as friendly name if actor_id provided
if actor_id:
import asyncio
import database
from database import resolve_friendly_name
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
cid = loop.run_until_complete(resolve_friendly_name(actor_id, ref))
finally:
loop.close()
# Reuse event loop for database operations
if _resolve_loop is None or _resolve_loop.is_closed():
_resolve_loop = asyncio.new_event_loop()
asyncio.set_event_loop(_resolve_loop)
_db_initialized = False
# Initialize database pool once per loop
if not _db_initialized:
_resolve_loop.run_until_complete(database.init_db())
_db_initialized = True
cid = _resolve_loop.run_until_complete(resolve_friendly_name(actor_id, ref))
if cid:
path = cache_mgr.get_by_cid(cid)
@@ -173,6 +186,7 @@ def create_cid_primitives(actor_id: Optional[str] = None):
@app.task(bind=True, name='tasks.run_stream')
def run_stream(
self,
run_id: str,
recipe_sexp: str,
output_name: str = "output.mp4",
duration: Optional[float] = None,
@@ -185,6 +199,7 @@ def run_stream(
Execute a streaming S-expression recipe.
Args:
run_id: The run ID for database tracking
recipe_sexp: The recipe S-expression content
output_name: Name for the output file
duration: Optional duration override (seconds)
@@ -197,7 +212,7 @@ def run_stream(
Dict with output_cid, output_path, and status
"""
task_id = self.request.id
logger.info(f"Starting stream task {task_id}")
logger.info(f"Starting stream task {task_id} for run {run_id}")
self.update_state(state='INITIALIZING', meta={'progress': 0})
@@ -237,8 +252,8 @@ def run_stream(
# Import the streaming interpreter
from streaming.stream_sexp_generic import StreamInterpreter
# Create interpreter
interp = StreamInterpreter(str(recipe_path))
# Create interpreter (pass actor_id for friendly name resolution)
interp = StreamInterpreter(str(recipe_path), actor_id=actor_id)
# Set primitive library directory explicitly
interp.primitive_lib_dir = sexp_effects_dir / "primitive_libs"
@@ -258,8 +273,17 @@ def run_stream(
logger.info(f"Rendering to {output_path}")
interp.run(duration=duration, output=str(output_path))
# Check for interpreter errors
if interp.errors:
error_msg = f"Rendering failed with {len(interp.errors)} errors: {interp.errors[0]}"
raise RuntimeError(error_msg)
self.update_state(state='CACHING', meta={'progress': 90})
# Validate output file (must be > 1KB to have actual frames)
if output_path.exists() and output_path.stat().st_size < 1024:
raise RuntimeError(f"Output file is too small ({output_path.stat().st_size} bytes) - rendering likely failed")
# Store output in cache
if output_path.exists():
cache_mgr = get_cache_manager()
@@ -271,16 +295,73 @@ def run_stream(
logger.info(f"Stream output cached: CID={cached_file.cid}, IPFS={ipfs_cid}")
# Save to database
import asyncio
import database
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
# Initialize database pool if needed
if database.pool is None:
loop.run_until_complete(database.init_db())
# Get recipe CID from pending_run
pending = loop.run_until_complete(database.get_pending_run(run_id))
recipe_cid = pending.get("recipe", "streaming") if pending else "streaming"
# Save to run_cache for completed runs
logger.info(f"Saving run {run_id} to run_cache with actor_id={actor_id}")
loop.run_until_complete(database.save_run_cache(
run_id=run_id,
output_cid=cached_file.cid,
recipe=recipe_cid,
inputs=[],
ipfs_cid=ipfs_cid,
actor_id=actor_id,
))
# Update pending run status
loop.run_until_complete(database.update_pending_run_status(
run_id=run_id,
status="completed",
))
logger.info(f"Saved run {run_id} to database with actor_id={actor_id}")
except Exception as db_err:
logger.warning(f"Failed to save run to database: {db_err}")
finally:
loop.close()
return {
"status": "completed",
"run_id": run_id,
"task_id": task_id,
"output_cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"output_path": str(cached_file.path),
}
else:
# Update pending run status to failed
import asyncio
import database
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
if database.pool is None:
loop.run_until_complete(database.init_db())
loop.run_until_complete(database.update_pending_run_status(
run_id=run_id,
status="failed",
error="Output file not created",
))
except Exception as db_err:
logger.warning(f"Failed to update run status: {db_err}")
finally:
loop.close()
return {
"status": "failed",
"run_id": run_id,
"task_id": task_id,
"error": "Output file not created",
}
@@ -290,8 +371,28 @@ def run_stream(
import traceback
traceback.print_exc()
# Update pending run status to failed
import asyncio
import database
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
if database.pool is None:
loop.run_until_complete(database.init_db())
loop.run_until_complete(database.update_pending_run_status(
run_id=run_id,
status="failed",
error=str(e),
))
except Exception as db_err:
logger.warning(f"Failed to update run status: {db_err}")
finally:
loop.close()
return {
"status": "failed",
"run_id": run_id,
"task_id": task_id,
"error": str(e),
}