Add 3-phase execution with IPFS cache and hash-based task claiming
New files:
- claiming.py - Redis Lua scripts for atomic task claiming
- tasks/analyze.py - Analysis Celery task
- tasks/execute.py - Step execution with IPFS-backed cache
- tasks/orchestrate.py - Plan orchestration (run_plan, run_recipe)
New API endpoints (/api/v2/):
- POST /api/v2/plan - Generate execution plan
- POST /api/v2/execute - Execute a plan
- POST /api/v2/run-recipe - Full 3-phase pipeline
- GET /api/v2/run/{run_id} - Get run status
Features:
- Hash-based task claiming prevents duplicate work
- Parallel execution within dependency levels
- IPFS-backed cache for durability
- Integration with artdag planning module
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
373
tasks/orchestrate.py
Normal file
373
tasks/orchestrate.py
Normal file
@@ -0,0 +1,373 @@
|
||||
"""
|
||||
Plan orchestration tasks.
|
||||
|
||||
Coordinates the full 3-phase execution:
|
||||
1. Analyze inputs
|
||||
2. Generate plan
|
||||
3. Execute steps level by level
|
||||
|
||||
Uses IPFS-backed cache for durability.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from celery import current_task, group, chain
|
||||
|
||||
# Import from the Celery app
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
from celery_app import app
|
||||
from claiming import get_claimer
|
||||
from cache_manager import get_cache_manager
|
||||
|
||||
# Import artdag modules
|
||||
try:
|
||||
from artdag import Cache
|
||||
from artdag.analysis import Analyzer, AnalysisResult
|
||||
from artdag.planning import RecipePlanner, ExecutionPlan, Recipe
|
||||
except ImportError:
|
||||
Cache = None
|
||||
Analyzer = None
|
||||
AnalysisResult = None
|
||||
RecipePlanner = None
|
||||
ExecutionPlan = None
|
||||
Recipe = None
|
||||
|
||||
from .execute import execute_step
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Cache directories
|
||||
CACHE_DIR = Path(os.environ.get('CACHE_DIR', '/data/cache'))
|
||||
ANALYSIS_CACHE_DIR = CACHE_DIR / 'analysis'
|
||||
PLAN_CACHE_DIR = CACHE_DIR / 'plans'
|
||||
|
||||
|
||||
@app.task(bind=True, name='tasks.run_plan')
|
||||
def run_plan(
|
||||
self,
|
||||
plan_json: str,
|
||||
run_id: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Execute a complete execution plan.
|
||||
|
||||
Runs steps level by level, with parallel execution within each level.
|
||||
Results are stored in IPFS-backed cache.
|
||||
|
||||
Args:
|
||||
plan_json: JSON-serialized ExecutionPlan
|
||||
run_id: Optional run ID for tracking
|
||||
|
||||
Returns:
|
||||
Dict with execution results
|
||||
"""
|
||||
if ExecutionPlan is None:
|
||||
raise ImportError("artdag.planning not available")
|
||||
|
||||
plan = ExecutionPlan.from_json(plan_json)
|
||||
cache_mgr = get_cache_manager()
|
||||
|
||||
logger.info(f"Executing plan {plan.plan_id[:16]}... ({len(plan.steps)} steps)")
|
||||
|
||||
# Build initial cache_ids mapping (step_id -> cache_id)
|
||||
cache_ids = {}
|
||||
for step in plan.steps:
|
||||
cache_ids[step.step_id] = step.cache_id
|
||||
|
||||
# Also map input hashes
|
||||
for name, content_hash in plan.input_hashes.items():
|
||||
cache_ids[name] = content_hash
|
||||
|
||||
# Group steps by level
|
||||
steps_by_level = plan.get_steps_by_level()
|
||||
max_level = max(steps_by_level.keys()) if steps_by_level else 0
|
||||
|
||||
results_by_step = {}
|
||||
total_cached = 0
|
||||
total_executed = 0
|
||||
|
||||
for level in range(max_level + 1):
|
||||
level_steps = steps_by_level.get(level, [])
|
||||
if not level_steps:
|
||||
continue
|
||||
|
||||
logger.info(f"Executing level {level}: {len(level_steps)} steps")
|
||||
|
||||
# Check which steps need execution
|
||||
steps_to_run = []
|
||||
|
||||
for step in level_steps:
|
||||
# Check if cached
|
||||
cached_path = cache_mgr.get_by_content_hash(step.cache_id)
|
||||
if cached_path:
|
||||
results_by_step[step.step_id] = {
|
||||
"status": "cached",
|
||||
"cache_id": step.cache_id,
|
||||
"output_path": str(cached_path),
|
||||
}
|
||||
total_cached += 1
|
||||
else:
|
||||
steps_to_run.append(step)
|
||||
|
||||
if not steps_to_run:
|
||||
logger.info(f"Level {level}: all steps cached")
|
||||
continue
|
||||
|
||||
# Build input cache_ids for this level
|
||||
level_cache_ids = dict(cache_ids)
|
||||
|
||||
# Execute steps in parallel
|
||||
tasks = [
|
||||
execute_step.s(step.to_json(), plan.plan_id, level_cache_ids)
|
||||
for step in steps_to_run
|
||||
]
|
||||
|
||||
job = group(tasks)
|
||||
async_results = job.apply_async()
|
||||
|
||||
# Wait for completion
|
||||
try:
|
||||
step_results = async_results.get(timeout=3600)
|
||||
except Exception as e:
|
||||
logger.error(f"Level {level} execution failed: {e}")
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": str(e),
|
||||
"level": level,
|
||||
"results": results_by_step,
|
||||
"run_id": run_id,
|
||||
}
|
||||
|
||||
# Process results
|
||||
for result in step_results:
|
||||
step_id = result.get("step_id")
|
||||
cache_id = result.get("cache_id")
|
||||
|
||||
results_by_step[step_id] = result
|
||||
cache_ids[step_id] = cache_id
|
||||
|
||||
if result.get("status") in ("completed", "cached", "completed_by_other"):
|
||||
total_executed += 1
|
||||
elif result.get("status") == "failed":
|
||||
logger.error(f"Step {step_id} failed: {result.get('error')}")
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": f"Step {step_id} failed: {result.get('error')}",
|
||||
"level": level,
|
||||
"results": results_by_step,
|
||||
"run_id": run_id,
|
||||
}
|
||||
|
||||
# Get final output
|
||||
output_step = plan.get_step(plan.output_step)
|
||||
output_cache_id = output_step.cache_id if output_step else None
|
||||
output_path = None
|
||||
output_ipfs_cid = None
|
||||
|
||||
if output_cache_id:
|
||||
output_path = cache_mgr.get_by_content_hash(output_cache_id)
|
||||
output_ipfs_cid = cache_mgr.get_ipfs_cid(output_cache_id)
|
||||
|
||||
return {
|
||||
"status": "completed",
|
||||
"run_id": run_id,
|
||||
"plan_id": plan.plan_id,
|
||||
"output_cache_id": output_cache_id,
|
||||
"output_path": str(output_path) if output_path else None,
|
||||
"output_ipfs_cid": output_ipfs_cid,
|
||||
"total_steps": len(plan.steps),
|
||||
"cached": total_cached,
|
||||
"executed": total_executed,
|
||||
"results": results_by_step,
|
||||
}
|
||||
|
||||
|
||||
@app.task(bind=True, name='tasks.run_recipe')
|
||||
def run_recipe(
|
||||
self,
|
||||
recipe_yaml: str,
|
||||
input_hashes: Dict[str, str],
|
||||
features: List[str] = None,
|
||||
run_id: Optional[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Run a complete recipe through all 3 phases.
|
||||
|
||||
1. Analyze: Extract features from inputs
|
||||
2. Plan: Generate execution plan
|
||||
3. Execute: Run the plan
|
||||
|
||||
Args:
|
||||
recipe_yaml: Recipe YAML content
|
||||
input_hashes: Mapping from input name to content hash
|
||||
features: Features to extract (default: ["beats", "energy"])
|
||||
run_id: Optional run ID for tracking
|
||||
|
||||
Returns:
|
||||
Dict with final results
|
||||
"""
|
||||
if RecipePlanner is None or Analyzer is None:
|
||||
raise ImportError("artdag modules not available")
|
||||
|
||||
if features is None:
|
||||
features = ["beats", "energy"]
|
||||
|
||||
cache_mgr = get_cache_manager()
|
||||
|
||||
logger.info(f"Running recipe with {len(input_hashes)} inputs")
|
||||
|
||||
# Phase 1: Analyze
|
||||
logger.info("Phase 1: Analyzing inputs...")
|
||||
|
||||
ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
|
||||
|
||||
analysis_results = {}
|
||||
for name, content_hash in input_hashes.items():
|
||||
# Get path from cache
|
||||
path = cache_mgr.get_by_content_hash(content_hash)
|
||||
if path:
|
||||
try:
|
||||
result = analyzer.analyze(
|
||||
input_hash=content_hash,
|
||||
features=features,
|
||||
input_path=Path(path),
|
||||
)
|
||||
analysis_results[content_hash] = result
|
||||
logger.info(f"Analyzed {name}: tempo={result.tempo}, beats={len(result.beat_times or [])}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Analysis failed for {name}: {e}")
|
||||
else:
|
||||
logger.warning(f"Input {name} ({content_hash[:16]}...) not in cache")
|
||||
|
||||
logger.info(f"Analyzed {len(analysis_results)} inputs")
|
||||
|
||||
# Phase 2: Plan
|
||||
logger.info("Phase 2: Generating execution plan...")
|
||||
|
||||
recipe = Recipe.from_yaml(recipe_yaml)
|
||||
planner = RecipePlanner(use_tree_reduction=True)
|
||||
|
||||
plan = planner.plan(
|
||||
recipe=recipe,
|
||||
input_hashes=input_hashes,
|
||||
analysis=analysis_results,
|
||||
)
|
||||
|
||||
logger.info(f"Generated plan with {len(plan.steps)} steps")
|
||||
|
||||
# Save plan for debugging
|
||||
PLAN_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
plan_path = PLAN_CACHE_DIR / f"{plan.plan_id}.json"
|
||||
with open(plan_path, "w") as f:
|
||||
f.write(plan.to_json())
|
||||
|
||||
# Phase 3: Execute
|
||||
logger.info("Phase 3: Executing plan...")
|
||||
|
||||
result = run_plan(plan.to_json(), run_id=run_id)
|
||||
|
||||
return {
|
||||
"status": result.get("status"),
|
||||
"run_id": run_id,
|
||||
"recipe": recipe.name,
|
||||
"plan_id": plan.plan_id,
|
||||
"output_path": result.get("output_path"),
|
||||
"output_cache_id": result.get("output_cache_id"),
|
||||
"output_ipfs_cid": result.get("output_ipfs_cid"),
|
||||
"analysis_count": len(analysis_results),
|
||||
"total_steps": len(plan.steps),
|
||||
"cached": result.get("cached", 0),
|
||||
"executed": result.get("executed", 0),
|
||||
"error": result.get("error"),
|
||||
}
|
||||
|
||||
|
||||
@app.task(bind=True, name='tasks.generate_plan')
|
||||
def generate_plan(
|
||||
self,
|
||||
recipe_yaml: str,
|
||||
input_hashes: Dict[str, str],
|
||||
features: List[str] = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Generate an execution plan without executing it.
|
||||
|
||||
Useful for:
|
||||
- Previewing what will be executed
|
||||
- Checking cache status
|
||||
- Debugging recipe issues
|
||||
|
||||
Args:
|
||||
recipe_yaml: Recipe YAML content
|
||||
input_hashes: Mapping from input name to content hash
|
||||
features: Features to extract for analysis
|
||||
|
||||
Returns:
|
||||
Dict with plan details
|
||||
"""
|
||||
if RecipePlanner is None or Analyzer is None:
|
||||
raise ImportError("artdag modules not available")
|
||||
|
||||
if features is None:
|
||||
features = ["beats", "energy"]
|
||||
|
||||
cache_mgr = get_cache_manager()
|
||||
|
||||
# Analyze inputs
|
||||
ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
||||
analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
|
||||
|
||||
analysis_results = {}
|
||||
for name, content_hash in input_hashes.items():
|
||||
path = cache_mgr.get_by_content_hash(content_hash)
|
||||
if path:
|
||||
try:
|
||||
result = analyzer.analyze(
|
||||
input_hash=content_hash,
|
||||
features=features,
|
||||
input_path=Path(path),
|
||||
)
|
||||
analysis_results[content_hash] = result
|
||||
except Exception as e:
|
||||
logger.warning(f"Analysis failed for {name}: {e}")
|
||||
|
||||
# Generate plan
|
||||
recipe = Recipe.from_yaml(recipe_yaml)
|
||||
planner = RecipePlanner(use_tree_reduction=True)
|
||||
|
||||
plan = planner.plan(
|
||||
recipe=recipe,
|
||||
input_hashes=input_hashes,
|
||||
analysis=analysis_results,
|
||||
)
|
||||
|
||||
# Check cache status for each step
|
||||
steps_status = []
|
||||
for step in plan.steps:
|
||||
cached = cache_mgr.has_content(step.cache_id)
|
||||
steps_status.append({
|
||||
"step_id": step.step_id,
|
||||
"node_type": step.node_type,
|
||||
"cache_id": step.cache_id,
|
||||
"level": step.level,
|
||||
"cached": cached,
|
||||
})
|
||||
|
||||
cached_count = sum(1 for s in steps_status if s["cached"])
|
||||
|
||||
return {
|
||||
"status": "planned",
|
||||
"recipe": recipe.name,
|
||||
"plan_id": plan.plan_id,
|
||||
"total_steps": len(plan.steps),
|
||||
"cached_steps": cached_count,
|
||||
"pending_steps": len(plan.steps) - cached_count,
|
||||
"steps": steps_status,
|
||||
"plan_json": plan.to_json(),
|
||||
}
|
||||
Reference in New Issue
Block a user