Fix completed runs not appearing in list + add purge-failed endpoint

- Update save_run_cache to also update actor_id, recipe, inputs on conflict - Add logging for actor_id when saving runs to run_cache - Add admin endpoint DELETE /runs/admin/purge-failed to delete all failed runs Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-02-02 23:24:39 +00:00
parent 581da68b3b
commit d20eef76ad
24 changed files with 1671 additions and 453 deletions
--- a/cache_manager.py
+++ b/cache_manager.py
@@ -175,15 +175,17 @@ class L1CacheManager:
    # No fallbacks - failures raise exceptions.

    def _run_async(self, coro):
-        """Run async coroutine from sync context."""
+        """Run async coroutine from sync context.
+
+        Always creates a fresh event loop to avoid issues with Celery's
+        prefork workers where loops may be closed by previous tasks.
+        """
        import asyncio

+        # Check if we're already in an async context
        try:
-            loop = asyncio.get_running_loop()
-            # Already in async context - schedule on the running loop
-            future = asyncio.ensure_future(coro, loop=loop)
-            # Can't block here, so we need a different approach
-            # Use a new thread with its own loop
+            asyncio.get_running_loop()
+            # We're in an async context - use a thread with its own loop
            import threading
            result = [None]
            error = [None]
@@ -206,13 +208,13 @@ class L1CacheManager:
                raise error[0]
            return result[0]
        except RuntimeError:
-            # No running loop - safe to use run_until_complete
+            # No running loop - create a fresh one (don't reuse potentially closed loops)
+            loop = asyncio.new_event_loop()
+            asyncio.set_event_loop(loop)
            try:
-                loop = asyncio.get_event_loop()
-            except RuntimeError:
-                loop = asyncio.new_event_loop()
-                asyncio.set_event_loop(loop)
-            return loop.run_until_complete(coro)
+                return loop.run_until_complete(coro)
+            finally:
+                loop.close()

    def _set_content_index(self, cache_id: str, ipfs_cid: str):
        """Set content index entry in database (cache_id -> ipfs_cid)."""
@@ -341,28 +343,38 @@ class L1CacheManager:
        cache_id: str = None,
        execution_time: float = 0.0,
        move: bool = False,
+        skip_ipfs: bool = False,
    ) -> tuple[CachedFile, Optional[str]]:
        """
-        Store a file in the cache and upload to IPFS.
+        Store a file in the cache and optionally upload to IPFS.

-        Files are ALWAYS stored by IPFS CID. The cache_id parameter creates
-        an index from cache_id -> IPFS CID for code-addressed lookups.
+        Files are stored by IPFS CID when skip_ipfs=False (default), or by
+        local content hash when skip_ipfs=True. The cache_id parameter creates
+        an index from cache_id -> CID for code-addressed lookups.

        Args:
            source_path: Path to file to cache
            node_type: Type of node (e.g., "upload", "source", "effect")
-            node_id: DEPRECATED - ignored, always uses IPFS CID
+            node_id: DEPRECATED - ignored, always uses CID
            cache_id: Optional code-addressed cache ID to index
            execution_time: How long the operation took
            move: If True, move instead of copy
+            skip_ipfs: If True, skip IPFS upload and use local hash (faster for large files)

        Returns:
-            Tuple of (CachedFile with both node_id and cid, CID)
+            Tuple of (CachedFile with both node_id and cid, CID or None if skip_ipfs)
        """
-        # Upload to IPFS first to get the CID (primary identifier)
-        cid = ipfs_client.add_file(source_path)
-        if not cid:
-            raise RuntimeError(f"IPFS upload failed for {source_path}. IPFS is required.")
+        if skip_ipfs:
+            # Use local content hash instead of IPFS CID (much faster)
+            cid = file_hash(source_path)
+            ipfs_cid = None
+            logger.info(f"put: Using local hash (skip_ipfs=True): {cid[:16]}...")
+        else:
+            # Upload to IPFS first to get the CID (primary identifier)
+            cid = ipfs_client.add_file(source_path)
+            if not cid:
+                raise RuntimeError(f"IPFS upload failed for {source_path}. IPFS is required.")
+            ipfs_cid = cid

        # Always store by IPFS CID (node_id parameter is deprecated)
        node_id = cid
@@ -370,11 +382,12 @@ class L1CacheManager:
        # Check if already cached (by node_id)
        existing = self.cache.get_entry(node_id)
        if existing and existing.output_path.exists():
-            return CachedFile.from_cache_entry(existing), cid
+            return CachedFile.from_cache_entry(existing), ipfs_cid

        # Compute local hash BEFORE moving the file (for dual-indexing)
+        # Only needed if we uploaded to IPFS (to map local hash -> IPFS CID)
        local_hash = None
-        if self._is_ipfs_cid(cid):
+        if not skip_ipfs and self._is_ipfs_cid(cid):
            local_hash = file_hash(source_path)

        # Store in local cache
@@ -405,9 +418,9 @@ class L1CacheManager:
            self._set_content_index(local_hash, cid)
            logger.debug(f"Indexed local hash {local_hash[:16]}... -> IPFS {cid}")

-        logger.info(f"Cached: {cid[:16]}...")
+        logger.info(f"Cached: {cid[:16]}..." + (" (local only)" if skip_ipfs else " (IPFS)"))

-        return CachedFile.from_cache_entry(entry), cid
+        return CachedFile.from_cache_entry(entry), ipfs_cid if not skip_ipfs else None

    def get_by_node_id(self, node_id: str) -> Optional[Path]:
        """Get cached file path by node_id."""