Add 3-phase execution with IPFS cache and hash-based task claiming

New files: - claiming.py - Redis Lua scripts for atomic task claiming - tasks/analyze.py - Analysis Celery task - tasks/execute.py - Step execution with IPFS-backed cache - tasks/orchestrate.py - Plan orchestration (run_plan, run_recipe) New API endpoints (/api/v2/): - POST /api/v2/plan - Generate execution plan - POST /api/v2/execute - Execute a plan - POST /api/v2/run-recipe - Full 3-phase pipeline - GET /api/v2/run/{run_id} - Get run status Features: - Hash-based task claiming prevents duplicate work - Parallel execution within dependency levels - IPFS-backed cache for durability - Integration with artdag planning module Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 11:44:00 +00:00
parent 7d05011daa
commit f7890dd1ad
7 changed files with 1468 additions and 1 deletions
--- a/server.py
+++ b/server.py
@@ -4964,6 +4964,231 @@ async def download_client():
    )


+# ============================================================================
+# 3-Phase Execution API (Analyze → Plan → Execute)
+# ============================================================================
+
+class RecipeRunRequest(BaseModel):
+    """Request to run a recipe with the 3-phase execution model."""
+    recipe_yaml: str  # Recipe YAML content
+    input_hashes: dict  # Mapping from input name to content hash
+    features: Optional[list[str]] = None  # Features to extract (default: beats, energy)
+
+
+class PlanRequest(BaseModel):
+    """Request to generate an execution plan."""
+    recipe_yaml: str
+    input_hashes: dict
+    features: Optional[list[str]] = None
+
+
+class ExecutePlanRequest(BaseModel):
+    """Request to execute a pre-generated plan."""
+    plan_json: str  # JSON-serialized ExecutionPlan
+
+
+@app.post("/api/v2/plan")
+async def generate_plan_endpoint(
+    request: PlanRequest,
+    ctx: UserContext = Depends(get_required_user_context)
+):
+    """
+    Generate an execution plan without executing it.
+
+    Phase 1 (Analyze) + Phase 2 (Plan) of the 3-phase model.
+
+    Returns the plan with cache status for each step.
+    """
+    from tasks.orchestrate import generate_plan
+
+    try:
+        # Submit to Celery
+        task = generate_plan.delay(
+            recipe_yaml=request.recipe_yaml,
+            input_hashes=request.input_hashes,
+            features=request.features,
+        )
+
+        # Wait for result (plan generation is usually fast)
+        result = task.get(timeout=60)
+
+        return {
+            "status": result.get("status"),
+            "recipe": result.get("recipe"),
+            "plan_id": result.get("plan_id"),
+            "total_steps": result.get("total_steps"),
+            "cached_steps": result.get("cached_steps"),
+            "pending_steps": result.get("pending_steps"),
+            "steps": result.get("steps"),
+        }
+    except Exception as e:
+        logger.error(f"Plan generation failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/v2/execute")
+async def execute_plan_endpoint(
+    request: ExecutePlanRequest,
+    ctx: UserContext = Depends(get_required_user_context)
+):
+    """
+    Execute a pre-generated execution plan.
+
+    Phase 3 (Execute) of the 3-phase model.
+
+    Submits the plan to Celery for parallel execution.
+    """
+    from tasks.orchestrate import run_plan
+
+    run_id = str(uuid.uuid4())
+
+    try:
+        # Submit to Celery (async)
+        task = run_plan.delay(
+            plan_json=request.plan_json,
+            run_id=run_id,
+        )
+
+        return {
+            "status": "submitted",
+            "run_id": run_id,
+            "celery_task_id": task.id,
+        }
+    except Exception as e:
+        logger.error(f"Plan execution failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.post("/api/v2/run-recipe")
+async def run_recipe_endpoint(
+    request: RecipeRunRequest,
+    ctx: UserContext = Depends(get_required_user_context)
+):
+    """
+    Run a complete recipe through all 3 phases.
+
+    1. Analyze: Extract features from inputs
+    2. Plan: Generate execution plan with cache IDs
+    3. Execute: Run steps with parallel execution
+
+    Returns immediately with run_id. Poll /api/v2/run/{run_id} for status.
+    """
+    from tasks.orchestrate import run_recipe
+
+    # Compute run_id from inputs and recipe
+    try:
+        recipe_data = yaml.safe_load(request.recipe_yaml)
+        recipe_name = recipe_data.get("name", "unknown")
+    except Exception:
+        recipe_name = "unknown"
+
+    run_id = compute_run_id(
+        list(request.input_hashes.values()),
+        recipe_name,
+        hashlib.sha3_256(request.recipe_yaml.encode()).hexdigest()
+    )
+
+    # Check if already completed
+    cached = await database.get_run_cache(run_id)
+    if cached:
+        output_hash = cached.get("output_hash")
+        if cache_manager.has_content(output_hash):
+            return {
+                "status": "completed",
+                "run_id": run_id,
+                "output_hash": output_hash,
+                "output_ipfs_cid": cache_manager.get_ipfs_cid(output_hash),
+                "cached": True,
+            }
+
+    # Submit to Celery
+    try:
+        task = run_recipe.delay(
+            recipe_yaml=request.recipe_yaml,
+            input_hashes=request.input_hashes,
+            features=request.features,
+            run_id=run_id,
+        )
+
+        # Store run status in Redis
+        run_data = {
+            "run_id": run_id,
+            "status": "pending",
+            "recipe": recipe_name,
+            "inputs": list(request.input_hashes.values()),
+            "celery_task_id": task.id,
+            "created_at": datetime.now(timezone.utc).isoformat(),
+            "username": ctx.actor_id,
+        }
+        redis_client.setex(
+            f"{RUNS_KEY_PREFIX}{run_id}",
+            86400,  # 24 hour expiry
+            json.dumps(run_data)
+        )
+
+        return {
+            "status": "submitted",
+            "run_id": run_id,
+            "celery_task_id": task.id,
+            "recipe": recipe_name,
+        }
+    except Exception as e:
+        logger.error(f"Recipe run failed: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@app.get("/api/v2/run/{run_id}")
+async def get_run_v2(run_id: str, ctx: UserContext = Depends(get_required_user_context)):
+    """
+    Get status of a 3-phase execution run.
+    """
+    # Check Redis for run status
+    run_data = redis_client.get(f"{RUNS_KEY_PREFIX}{run_id}")
+    if run_data:
+        data = json.loads(run_data)
+
+        # If pending, check Celery task status
+        if data.get("status") == "pending" and data.get("celery_task_id"):
+            from celery.result import AsyncResult
+            result = AsyncResult(data["celery_task_id"])
+
+            if result.ready():
+                if result.successful():
+                    task_result = result.get()
+                    data["status"] = task_result.get("status", "completed")
+                    data["output_hash"] = task_result.get("output_cache_id")
+                    data["output_ipfs_cid"] = task_result.get("output_ipfs_cid")
+                    data["total_steps"] = task_result.get("total_steps")
+                    data["cached"] = task_result.get("cached")
+                    data["executed"] = task_result.get("executed")
+
+                    # Update Redis
+                    redis_client.setex(
+                        f"{RUNS_KEY_PREFIX}{run_id}",
+                        86400,
+                        json.dumps(data)
+                    )
+                else:
+                    data["status"] = "failed"
+                    data["error"] = str(result.result)
+            else:
+                data["celery_status"] = result.status
+
+        return data
+
+    # Check database cache
+    cached = await database.get_run_cache(run_id)
+    if cached:
+        return {
+            "run_id": run_id,
+            "status": "completed",
+            "output_hash": cached.get("output_hash"),
+            "cached": True,
+        }
+
+    raise HTTPException(status_code=404, detail="Run not found")
+
+
 if __name__ == "__main__":
    import uvicorn
    # Workers enabled - cache indexes shared via Redis