Fix completed runs not appearing in list + add purge-failed endpoint

- Update save_run_cache to also update actor_id, recipe, inputs on conflict - Add logging for actor_id when saving runs to run_cache - Add admin endpoint DELETE /runs/admin/purge-failed to delete all failed runs Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 23:24:39 +00:00
parent 581da68b3b
commit d20eef76ad
24 changed files with 1671 additions and 453 deletions
--- a/tasks/init.py
+++ b/tasks/init.py
@@ -2,9 +2,12 @@
 #
 # Tasks:
 # 1. run_stream - Execute a streaming S-expression recipe
+# 2. upload_to_ipfs - Background IPFS upload for media files

 from .streaming import run_stream
+from .ipfs_upload import upload_to_ipfs

 __all__ = [
    "run_stream",
+    "upload_to_ipfs",
 ]
--- a/tasks/ipfs_upload.py
+++ b/tasks/ipfs_upload.py
@@ -0,0 +1,83 @@
+"""
+Background IPFS upload task.
+
+Uploads files to IPFS in the background after initial local storage.
+This allows fast uploads while still getting IPFS CIDs eventually.
+"""
+
+import logging
+import os
+import sys
+from pathlib import Path
+from typing import Optional
+
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from celery_app import app
+import ipfs_client
+
+logger = logging.getLogger(__name__)
+
+
+@app.task(bind=True, max_retries=3, default_retry_delay=60)
+def upload_to_ipfs(self, local_cid: str, actor_id: str) -> Optional[str]:
+    """
+    Upload a locally cached file to IPFS in the background.
+
+    Args:
+        local_cid: The local content hash of the file
+        actor_id: The user who uploaded the file
+
+    Returns:
+        IPFS CID if successful, None if failed
+    """
+    from cache_manager import get_cache_manager
+    import asyncio
+    import database
+
+    logger.info(f"Background IPFS upload starting for {local_cid[:16]}...")
+
+    try:
+        cache_mgr = get_cache_manager()
+
+        # Get the file path from local cache
+        file_path = cache_mgr.get_by_cid(local_cid)
+        if not file_path or not file_path.exists():
+            logger.error(f"File not found for local CID {local_cid[:16]}...")
+            return None
+
+        # Upload to IPFS
+        logger.info(f"Uploading {file_path} to IPFS...")
+        ipfs_cid = ipfs_client.add_file(file_path)
+
+        if not ipfs_cid:
+            logger.error(f"IPFS upload failed for {local_cid[:16]}...")
+            raise self.retry(exc=Exception("IPFS upload failed"))
+
+        logger.info(f"IPFS upload successful: {local_cid[:16]}... -> {ipfs_cid[:16]}...")
+
+        # Update database with IPFS CID
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            # Initialize database pool if needed
+            loop.run_until_complete(database.init_pool())
+
+            # Update cache_items table
+            loop.run_until_complete(
+                database.update_cache_item_ipfs_cid(local_cid, ipfs_cid)
+            )
+
+            # Create index from IPFS CID to local cache
+            cache_mgr._set_content_index(ipfs_cid, local_cid)
+
+            logger.info(f"Database updated with IPFS CID for {local_cid[:16]}...")
+        finally:
+            loop.close()
+
+        return ipfs_cid
+
+    except Exception as e:
+        logger.error(f"Background IPFS upload error: {e}")
+        raise self.retry(exc=e)
--- a/tasks/streaming.py
+++ b/tasks/streaming.py
@@ -24,6 +24,11 @@ from cache_manager import get_cache_manager
 logger = logging.getLogger(__name__)


+# Module-level event loop for database operations
+_resolve_loop = None
+_db_initialized = False
+
+
 def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
    """
    Resolve an asset reference (CID or friendly name) to a file path.
@@ -35,6 +40,7 @@ def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
    Returns:
        Path to the asset file, or None if not found
    """
+    global _resolve_loop, _db_initialized
    cache_mgr = get_cache_manager()

    # Try as direct CID first
@@ -46,15 +52,22 @@ def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
    # Try as friendly name if actor_id provided
    if actor_id:
        import asyncio
+        import database
        from database import resolve_friendly_name

        try:
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            try:
-                cid = loop.run_until_complete(resolve_friendly_name(actor_id, ref))
-            finally:
-                loop.close()
+            # Reuse event loop for database operations
+            if _resolve_loop is None or _resolve_loop.is_closed():
+                _resolve_loop = asyncio.new_event_loop()
+                asyncio.set_event_loop(_resolve_loop)
+                _db_initialized = False
+
+            # Initialize database pool once per loop
+            if not _db_initialized:
+                _resolve_loop.run_until_complete(database.init_db())
+                _db_initialized = True
+
+            cid = _resolve_loop.run_until_complete(resolve_friendly_name(actor_id, ref))

            if cid:
                path = cache_mgr.get_by_cid(cid)
@@ -173,6 +186,7 @@ def create_cid_primitives(actor_id: Optional[str] = None):
@app.task(bind=True, name='tasks.run_stream')
 def run_stream(
    self,
+    run_id: str,
    recipe_sexp: str,
    output_name: str = "output.mp4",
    duration: Optional[float] = None,
@@ -185,6 +199,7 @@ def run_stream(
    Execute a streaming S-expression recipe.

    Args:
+        run_id: The run ID for database tracking
        recipe_sexp: The recipe S-expression content
        output_name: Name for the output file
        duration: Optional duration override (seconds)
@@ -197,7 +212,7 @@ def run_stream(
        Dict with output_cid, output_path, and status
    """
    task_id = self.request.id
-    logger.info(f"Starting stream task {task_id}")
+    logger.info(f"Starting stream task {task_id} for run {run_id}")

    self.update_state(state='INITIALIZING', meta={'progress': 0})

@@ -237,8 +252,8 @@ def run_stream(
        # Import the streaming interpreter
        from streaming.stream_sexp_generic import StreamInterpreter

-        # Create interpreter
-        interp = StreamInterpreter(str(recipe_path))
+        # Create interpreter (pass actor_id for friendly name resolution)
+        interp = StreamInterpreter(str(recipe_path), actor_id=actor_id)

        # Set primitive library directory explicitly
        interp.primitive_lib_dir = sexp_effects_dir / "primitive_libs"
@@ -258,8 +273,17 @@ def run_stream(
        logger.info(f"Rendering to {output_path}")
        interp.run(duration=duration, output=str(output_path))

+        # Check for interpreter errors
+        if interp.errors:
+            error_msg = f"Rendering failed with {len(interp.errors)} errors: {interp.errors[0]}"
+            raise RuntimeError(error_msg)
+
        self.update_state(state='CACHING', meta={'progress': 90})

+        # Validate output file (must be > 1KB to have actual frames)
+        if output_path.exists() and output_path.stat().st_size < 1024:
+            raise RuntimeError(f"Output file is too small ({output_path.stat().st_size} bytes) - rendering likely failed")
+
        # Store output in cache
        if output_path.exists():
            cache_mgr = get_cache_manager()
@@ -271,16 +295,73 @@ def run_stream(

            logger.info(f"Stream output cached: CID={cached_file.cid}, IPFS={ipfs_cid}")

+            # Save to database
+            import asyncio
+            import database
+
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            try:
+                # Initialize database pool if needed
+                if database.pool is None:
+                    loop.run_until_complete(database.init_db())
+
+                # Get recipe CID from pending_run
+                pending = loop.run_until_complete(database.get_pending_run(run_id))
+                recipe_cid = pending.get("recipe", "streaming") if pending else "streaming"
+
+                # Save to run_cache for completed runs
+                logger.info(f"Saving run {run_id} to run_cache with actor_id={actor_id}")
+                loop.run_until_complete(database.save_run_cache(
+                    run_id=run_id,
+                    output_cid=cached_file.cid,
+                    recipe=recipe_cid,
+                    inputs=[],
+                    ipfs_cid=ipfs_cid,
+                    actor_id=actor_id,
+                ))
+                # Update pending run status
+                loop.run_until_complete(database.update_pending_run_status(
+                    run_id=run_id,
+                    status="completed",
+                ))
+                logger.info(f"Saved run {run_id} to database with actor_id={actor_id}")
+            except Exception as db_err:
+                logger.warning(f"Failed to save run to database: {db_err}")
+            finally:
+                loop.close()
+
            return {
                "status": "completed",
+                "run_id": run_id,
                "task_id": task_id,
                "output_cid": cached_file.cid,
                "ipfs_cid": ipfs_cid,
                "output_path": str(cached_file.path),
            }
        else:
+            # Update pending run status to failed
+            import asyncio
+            import database
+
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
+            try:
+                if database.pool is None:
+                    loop.run_until_complete(database.init_db())
+                loop.run_until_complete(database.update_pending_run_status(
+                    run_id=run_id,
+                    status="failed",
+                    error="Output file not created",
+                ))
+            except Exception as db_err:
+                logger.warning(f"Failed to update run status: {db_err}")
+            finally:
+                loop.close()
+
            return {
                "status": "failed",
+                "run_id": run_id,
                "task_id": task_id,
                "error": "Output file not created",
            }
@@ -290,8 +371,28 @@ def run_stream(
        import traceback
        traceback.print_exc()

+        # Update pending run status to failed
+        import asyncio
+        import database
+
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            if database.pool is None:
+                loop.run_until_complete(database.init_db())
+            loop.run_until_complete(database.update_pending_run_status(
+                run_id=run_id,
+                status="failed",
+                error=str(e),
+            ))
+        except Exception as db_err:
+            logger.warning(f"Failed to update run status: {db_err}")
+        finally:
+            loop.close()
+
        return {
            "status": "failed",
+            "run_id": run_id,
            "task_id": task_id,
            "error": str(e),
        }