Add 3-phase execution with IPFS cache and hash-based task claiming

New files: - claiming.py - Redis Lua scripts for atomic task claiming - tasks/analyze.py - Analysis Celery task - tasks/execute.py - Step execution with IPFS-backed cache - tasks/orchestrate.py - Plan orchestration (run_plan, run_recipe) New API endpoints (/api/v2/): - POST /api/v2/plan - Generate execution plan - POST /api/v2/execute - Execute a plan - POST /api/v2/run-recipe - Full 3-phase pipeline - GET /api/v2/run/{run_id} - Get run status Features: - Hash-based task claiming prevents duplicate work - Parallel execution within dependency levels - IPFS-backed cache for durability - Integration with artdag planning module Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-10 11:44:00 +00:00
parent 7d05011daa
commit f7890dd1ad
7 changed files with 1468 additions and 1 deletions
--- a/tasks/analyze.py
+++ b/tasks/analyze.py
@@ -0,0 +1,132 @@
+"""
+Analysis tasks for extracting features from input media.
+
+Phase 1 of the 3-phase execution model.
+"""
+
+import json
+import logging
+import os
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from celery import current_task
+
+# Import from the Celery app
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from celery_app import app
+
+# Import artdag analysis module
+try:
+    from artdag.analysis import Analyzer, AnalysisResult
+except ImportError:
+    # artdag not installed, will fail at runtime
+    Analyzer = None
+    AnalysisResult = None
+
+logger = logging.getLogger(__name__)
+
+# Cache directory for analysis results
+CACHE_DIR = Path(os.environ.get('CACHE_DIR', '/data/cache'))
+ANALYSIS_CACHE_DIR = CACHE_DIR / 'analysis'
+
+
+@app.task(bind=True, name='tasks.analyze_input')
+def analyze_input(
+    self,
+    input_hash: str,
+    input_path: str,
+    features: List[str],
+) -> dict:
+    """
+    Analyze a single input file.
+
+    Args:
+        input_hash: Content hash of the input
+        input_path: Path to the input file
+        features: List of features to extract
+
+    Returns:
+        Dict with analysis results
+    """
+    if Analyzer is None:
+        raise ImportError("artdag.analysis not available")
+
+    logger.info(f"Analyzing {input_hash[:16]}... for features: {features}")
+
+    # Create analyzer with caching
+    ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
+
+    try:
+        result = analyzer.analyze(
+            input_hash=input_hash,
+            features=features,
+            input_path=Path(input_path),
+        )
+
+        return {
+            "status": "completed",
+            "input_hash": input_hash,
+            "cache_id": result.cache_id,
+            "features": features,
+            "result": result.to_dict(),
+        }
+
+    except Exception as e:
+        logger.error(f"Analysis failed for {input_hash}: {e}")
+        return {
+            "status": "failed",
+            "input_hash": input_hash,
+            "error": str(e),
+        }
+
+
+@app.task(bind=True, name='tasks.analyze_inputs')
+def analyze_inputs(
+    self,
+    inputs: Dict[str, str],
+    features: List[str],
+) -> dict:
+    """
+    Analyze multiple inputs in parallel.
+
+    Args:
+        inputs: Dict mapping input_hash to file path
+        features: List of features to extract from all inputs
+
+    Returns:
+        Dict with all analysis results
+    """
+    if Analyzer is None:
+        raise ImportError("artdag.analysis not available")
+
+    logger.info(f"Analyzing {len(inputs)} inputs for features: {features}")
+
+    ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+    analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
+
+    results = {}
+    errors = []
+
+    for input_hash, input_path in inputs.items():
+        try:
+            result = analyzer.analyze(
+                input_hash=input_hash,
+                features=features,
+                input_path=Path(input_path),
+            )
+            results[input_hash] = result.to_dict()
+
+        except Exception as e:
+            logger.error(f"Analysis failed for {input_hash}: {e}")
+            errors.append({"input_hash": input_hash, "error": str(e)})
+
+    return {
+        "status": "completed" if not errors else "partial",
+        "results": results,
+        "errors": errors,
+        "total": len(inputs),
+        "successful": len(results),
+    }