Fix completed runs not appearing in list + add purge-failed endpoint

- Update save_run_cache to also update actor_id, recipe, inputs on conflict
- Add logging for actor_id when saving runs to run_cache
- Add admin endpoint DELETE /runs/admin/purge-failed to delete all failed runs

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-02 23:24:39 +00:00
parent 581da68b3b
commit d20eef76ad
24 changed files with 1671 additions and 453 deletions

View File

@@ -4,6 +4,7 @@ Cache Service - business logic for cache and media management.
import asyncio
import json
import logging
import os
import subprocess
from pathlib import Path
@@ -11,6 +12,8 @@ from typing import Optional, List, Dict, Any, Tuple, TYPE_CHECKING
import httpx
logger = logging.getLogger(__name__)
if TYPE_CHECKING:
from database import Database
from cache_manager import L1CacheManager
@@ -513,7 +516,11 @@ class CacheService:
filename: str,
actor_id: str,
) -> Tuple[Optional[str], Optional[str], Optional[str]]:
"""Upload content to cache. Returns (cid, ipfs_cid, error)."""
"""Upload content to cache. Returns (cid, ipfs_cid, error).
Files are stored locally first for fast response, then uploaded
to IPFS in the background.
"""
import tempfile
try:
@@ -525,21 +532,28 @@ class CacheService:
# Detect media type (video/image/audio) before moving file
media_type = detect_media_type(tmp_path)
# Store in cache (also stores in IPFS)
cached, ipfs_cid = self.cache.put(tmp_path, node_type="upload", move=True)
cid = ipfs_cid or cached.cid # Prefer IPFS CID
# Store locally first (skip_ipfs=True for fast response)
# IPFS upload happens in background
cached, ipfs_cid = self.cache.put(tmp_path, node_type="upload", move=True, skip_ipfs=True)
cid = cached.cid # Use local hash since we skipped IPFS
# Save to database with media category type
# Using media_type ("video", "image", "audio") not mime_type ("video/mp4")
# so list_media filtering works correctly
await self.db.create_cache_item(cid, ipfs_cid)
await self.db.create_cache_item(cid, ipfs_cid) # ipfs_cid is None initially
await self.db.save_item_metadata(
cid=cid,
actor_id=actor_id,
item_type=media_type, # Store media category for filtering
item_type=media_type,
filename=filename
)
# Queue background IPFS upload
try:
from tasks.ipfs_upload import upload_to_ipfs
upload_to_ipfs.delay(cid, actor_id)
logger.info(f"Queued background IPFS upload for {cid[:16]}...")
except Exception as e:
logger.warning(f"Failed to queue IPFS upload (will retry manually): {e}")
return cid, ipfs_cid, None
except Exception as e:
return None, None, f"Upload failed: {e}"

View File

@@ -60,16 +60,40 @@ class RecipeService:
logger = logging.getLogger(__name__)
if is_sexp_format(content):
# Parse S-expression
try:
compiled = compile_string(content)
recipe_data = compiled.to_dict()
recipe_data["sexp"] = content
recipe_data["format"] = "sexp"
logger.info(f"Parsed sexp recipe {recipe_id[:16]}..., keys: {list(recipe_data.keys())}")
except (ParseError, CompileError) as e:
logger.warning(f"Failed to parse sexp recipe {recipe_id[:16]}...: {e}")
return {"error": str(e), "recipe_id": recipe_id}
# Detect if this is a streaming recipe (starts with (stream ...))
def is_streaming_recipe(text):
for line in text.split('\n'):
stripped = line.strip()
if not stripped or stripped.startswith(';'):
continue
return stripped.startswith('(stream')
return False
if is_streaming_recipe(content):
# Streaming recipes have different format - parse manually
import re
name_match = re.search(r'\(stream\s+"([^"]+)"', content)
recipe_name = name_match.group(1) if name_match else "streaming"
recipe_data = {
"name": recipe_name,
"sexp": content,
"format": "sexp",
"type": "streaming",
"dag": {"nodes": []}, # Streaming recipes don't have traditional DAG
}
logger.info(f"Parsed streaming recipe {recipe_id[:16]}..., name: {recipe_name}")
else:
# Parse traditional (recipe ...) S-expression
try:
compiled = compile_string(content)
recipe_data = compiled.to_dict()
recipe_data["sexp"] = content
recipe_data["format"] = "sexp"
logger.info(f"Parsed sexp recipe {recipe_id[:16]}..., keys: {list(recipe_data.keys())}")
except (ParseError, CompileError) as e:
logger.warning(f"Failed to parse sexp recipe {recipe_id[:16]}...: {e}")
return {"error": str(e), "recipe_id": recipe_id}
else:
# Parse YAML
try:

View File

@@ -128,10 +128,25 @@ class RunService:
# Only return as completed if we have an output
# (runs with no output should be re-executed)
if output_cid:
# Also fetch recipe content from pending_runs for streaming runs
recipe_sexp = None
recipe_name = None
pending = await self.db.get_pending_run(run_id)
if pending:
recipe_sexp = pending.get("dag_json")
# Extract recipe name from streaming recipe content
if recipe_sexp:
import re
name_match = re.search(r'\(stream\s+"([^"]+)"', recipe_sexp)
if name_match:
recipe_name = name_match.group(1)
return {
"run_id": run_id,
"status": "completed",
"recipe": cached.get("recipe"),
"recipe_name": recipe_name,
"inputs": self._ensure_inputs_list(cached.get("inputs")),
"output_cid": output_cid,
"ipfs_cid": cached.get("ipfs_cid"),
@@ -140,6 +155,7 @@ class RunService:
"actor_id": cached.get("actor_id"),
"created_at": cached.get("created_at"),
"completed_at": cached.get("created_at"),
"recipe_sexp": recipe_sexp,
}
# Check database for pending run
@@ -175,6 +191,7 @@ class RunService:
"output_name": pending.get("output_name"),
"created_at": pending.get("created_at"),
"error": pending.get("error"),
"recipe_sexp": pending.get("dag_json"), # Recipe content for streaming runs
}
# If task completed, get result
@@ -209,6 +226,7 @@ class RunService:
"actor_id": pending.get("actor_id"),
"created_at": pending.get("created_at"),
"error": pending.get("error"),
"recipe_sexp": pending.get("dag_json"), # Recipe content for streaming runs
}
# Fallback: Check Redis for backwards compatibility
@@ -714,12 +732,21 @@ class RunService:
"""Get execution plan for a run.
Plans are just node outputs - cached by content hash like everything else.
For streaming runs, returns the recipe content as the plan.
"""
# Get run to find plan_cache_id
run = await self.get_run(run_id)
if not run:
return None
# For streaming runs, return the recipe as the plan
if run.get("recipe") == "streaming" and run.get("recipe_sexp"):
return {
"steps": [{"id": "stream", "type": "STREAM", "name": "Streaming Recipe"}],
"sexp": run.get("recipe_sexp"),
"format": "sexp",
}
# Check plan_cid (stored in database) or plan_cache_id (legacy)
plan_cid = run.get("plan_cid") or run.get("plan_cache_id")
if plan_cid: