""" Art DAG Celery Tasks Distributed rendering tasks for the Art DAG system. Supports both single-effect runs and multi-step DAG execution. """ import json import logging import os import subprocess import sys from datetime import datetime, timezone from pathlib import Path from typing import Dict, List, Optional from celery import Task from celery_app import app from cache_manager import file_hash # Import artdag components from artdag import DAG, Node, NodeType from artdag.engine import Engine from artdag.executor import register_executor, Executor, get_executor from artdag.nodes.effect import register_effect import artdag.nodes # Register all built-in executors (SOURCE, EFFECT, etc.) # Add effects to path (use env var in Docker, fallback to home dir locally) EFFECTS_PATH = Path(os.environ.get("EFFECTS_PATH", str(Path.home() / "artdag-effects"))) ARTDAG_PATH = Path(os.environ.get("ARTDAG_PATH", str(Path.home() / "art" / "artdag"))) logger = logging.getLogger(__name__) def get_effects_commit() -> str: """Get current git commit hash of effects repo.""" try: result = subprocess.run( ["git", "rev-parse", "HEAD"], cwd=EFFECTS_PATH, capture_output=True, text=True ) if result.returncode == 0: return result.stdout.strip() except Exception: pass return "unknown" def get_artdag_commit() -> str: """Get current git commit hash of artdag repo.""" try: result = subprocess.run( ["git", "rev-parse", "HEAD"], cwd=ARTDAG_PATH, capture_output=True, text=True ) if result.returncode == 0: return result.stdout.strip() except Exception: pass return "unknown" sys.path.insert(0, str(EFFECTS_PATH / "dog")) # Register the dog effect with the EFFECT executor # New format uses process() instead of effect_dog() from effect import process as dog_process @register_effect("dog") def _dog_effect(input_path: Path, output_path: Path, config: dict) -> Path: """Dog effect wrapper - registered for DAG EFFECT nodes.""" # Wrap for new whole-video API return dog_process([input_path], output_path, config, None) # Cache directory (shared between server and worker) CACHE_DIR = Path(os.environ.get("CACHE_DIR", str(Path.home() / ".artdag" / "cache"))) # ============ Executors for Effects ============ @register_executor("effect:dog") class DogExecutor(Executor): """Executor for the dog effect.""" def execute(self, config: Dict, inputs: List[Path], output_path: Path) -> Path: from effect import process as dog_process if len(inputs) != 1: raise ValueError(f"Dog effect expects 1 input, got {len(inputs)}") return dog_process(inputs, output_path, config, None) @register_executor("effect:identity") class IdentityExecutor(Executor): """Executor for the identity effect (passthrough).""" def execute(self, config: Dict, inputs: List[Path], output_path: Path) -> Path: from artdag.nodes.effect import effect_identity if len(inputs) != 1: raise ValueError(f"Identity effect expects 1 input, got {len(inputs)}") return effect_identity(inputs[0], output_path, config) @register_executor(NodeType.SOURCE) class SourceExecutor(Executor): """Executor for SOURCE nodes - loads content from cache by hash.""" def execute(self, config: Dict, inputs: List[Path], output_path: Path) -> Path: # Source nodes load from cache by cid cid = config.get("cid") if not cid: raise ValueError("SOURCE node requires cid in config") # Look up in cache from cache_manager import get_cache_manager cache_manager = get_cache_manager() source_path = cache_manager.get_by_cid(cid) if not source_path or not source_path.exists(): # Not in cache - fetch from IPFS import logging logger = logging.getLogger(__name__) logger.info(f"SOURCE {cid[:16]}... not in cache, fetching from IPFS") import ipfs_client fetch_path = CACHE_DIR / "ipfs_fetch" / cid fetch_path.parent.mkdir(parents=True, exist_ok=True) if ipfs_client.get_file(cid, str(fetch_path)): logger.info(f"SOURCE {cid[:16]}... fetched from IPFS to {fetch_path}") source_path = fetch_path else: raise ValueError(f"Source content not in cache and IPFS fetch failed: {cid}") # For source nodes, we just return the path (no transformation) # The engine will use this as input to subsequent nodes return source_path class RenderTask(Task): """Base task with provenance tracking.""" def on_success(self, retval, task_id, args, kwargs): """Record successful render.""" print(f"Task {task_id} completed: {retval}") def on_failure(self, exc, task_id, args, kwargs, einfo): """Record failed render.""" print(f"Task {task_id} failed: {exc}") @app.task(base=RenderTask, bind=True) def render_effect(self, input_hash: str, effect_name: str, output_name: str) -> dict: """ Render an effect on an input asset. Args: input_hash: SHA3-256 hash of input asset effect_name: Name of effect (e.g., "dog", "identity") output_name: Name for output asset Returns: Provenance record with output hash """ from cache_manager import get_cache_manager # Registry hashes (for effects/infra metadata only) REGISTRY = { "effect:dog": { "hash": "d048fe313433eb4e38f0e24194ffae91b896ca3e6eed3e50b2cc37b7be495555" }, "effect:identity": { "hash": "640ea11ee881ebf4101af0a955439105ab11e763682b209e88ea08fc66e1cc03" }, "infra:artdag": { "hash": "96a5972de216aee12ec794dcad5f9360da2e676171eabf24a46dfe1ee5fee4b0" }, "infra:giles-hp": { "hash": "964bf6e69dc4e2493f42375013caffe26404ec3cf8eb5d9bc170cd42a361523b" } } # Input comes from cache by hash (supports both legacy and new cache locations) cache_manager = get_cache_manager() input_path = cache_manager.get_by_cid(input_hash) if not input_path or not input_path.exists(): raise ValueError(f"Input not in cache: {input_hash}") output_dir = CACHE_DIR # Verify input actual_hash = file_hash(input_path) if actual_hash != input_hash: raise ValueError(f"Input hash mismatch: expected {input_hash}, got {actual_hash}") self.update_state(state='RENDERING', meta={'effect': effect_name, 'input': input_hash[:16]}) # Load and apply effect if effect_name == "dog": from effect import effect_dog, DOG_HASH output_path = output_dir / f"{output_name}.mkv" result = effect_dog(input_path, output_path, {}) expected_hash = DOG_HASH elif effect_name == "identity": from artdag.nodes.effect import effect_identity output_path = output_dir / f"{output_name}{input_path.suffix}" result = effect_identity(input_path, output_path, {}) expected_hash = input_hash else: raise ValueError(f"Unknown effect: {effect_name}") # Verify output output_cid = file_hash(result) if output_cid != expected_hash: raise ValueError(f"Output hash mismatch: expected {expected_hash}, got {output_cid}") # Build effect info based on source if effect_name == "identity": # Identity is from artdag package on GitHub artdag_commit = get_artdag_commit() effect_info = { "name": f"effect:{effect_name}", "cid": REGISTRY[f"effect:{effect_name}"]["hash"], "repo": "github", "repo_commit": artdag_commit, "repo_url": f"https://github.com/gilesbradshaw/art-dag/blob/{artdag_commit}/artdag/nodes/effect.py" } else: # Other effects from rose-ash effects repo effects_commit = get_effects_commit() effect_info = { "name": f"effect:{effect_name}", "cid": REGISTRY[f"effect:{effect_name}"]["hash"], "repo": "rose-ash", "repo_commit": effects_commit, "repo_url": f"https://git.rose-ash.com/art-dag/effects/src/commit/{effects_commit}/{effect_name}" } # Build provenance provenance = { "task_id": self.request.id, "rendered_at": datetime.now(timezone.utc).isoformat(), "rendered_by": "@giles@artdag.rose-ash.com", "output": { "name": output_name, "cid": output_cid, }, "inputs": [ {"cid": input_hash} ], "effects": [effect_info], "infrastructure": { "software": {"name": "infra:artdag", "cid": REGISTRY["infra:artdag"]["hash"]}, "hardware": {"name": "infra:giles-hp", "cid": REGISTRY["infra:giles-hp"]["hash"]} } } # Store provenance on IPFS import ipfs_client provenance_cid = ipfs_client.add_json(provenance) if provenance_cid: provenance["provenance_cid"] = provenance_cid logger.info(f"Stored provenance on IPFS: {provenance_cid}") else: logger.warning("Failed to store provenance on IPFS") return provenance @app.task(base=RenderTask, bind=True) def execute_dag(self, dag_json: str, run_id: str = None) -> dict: """ Execute a multi-step DAG. Args: dag_json: Serialized DAG as JSON string run_id: Optional run ID for tracking Returns: Execution result with output hash and node results """ from cache_manager import get_cache_manager # Parse DAG try: dag = DAG.from_json(dag_json) except Exception as e: raise ValueError(f"Invalid DAG JSON: {e}") # Validate DAG errors = dag.validate() if errors: raise ValueError(f"Invalid DAG: {errors}") # Create engine with cache directory engine = Engine(CACHE_DIR / "nodes") # Set up progress callback def progress_callback(progress): self.update_state( state='EXECUTING', meta={ 'node_id': progress.node_id, 'node_type': progress.node_type, 'status': progress.status, 'progress': progress.progress, 'message': progress.message, } ) logger.info(f"DAG progress: {progress.node_id} - {progress.status} - {progress.message}") engine.set_progress_callback(progress_callback) # Execute DAG self.update_state(state='EXECUTING', meta={'status': 'starting', 'nodes': len(dag.nodes)}) result = engine.execute(dag) if not result.success: raise RuntimeError(f"DAG execution failed: {result.error}") # Index all node outputs by cid and upload to IPFS cache_manager = get_cache_manager() output_cid = None node_hashes = {} # node_id -> cid mapping node_ipfs_cids = {} # node_id -> ipfs_cid mapping # Process all node results (intermediates + output) for node_id, node_path in result.node_results.items(): if node_path and Path(node_path).exists(): node = dag.nodes.get(node_id) # Skip SOURCE nodes - they're already in cache if node and (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE"): cid = node.config.get("cid") if cid: node_hashes[node_id] = cid continue # Determine node type for cache metadata node_type_str = str(node.node_type) if node else "intermediate" if "effect" in node_type_str.lower(): cache_node_type = "effect_output" else: cache_node_type = "dag_intermediate" # Store in cache_manager (stored by IPFS CID, indexed by node_id) cached, content_cid = cache_manager.put( Path(node_path), node_type=cache_node_type, cache_id=node_id, ) # content_cid is always IPFS CID now (IPFS failures are fatal) node_hashes[node_id] = content_cid node_ipfs_cids[node_id] = content_cid logger.info(f"Cached node {node_id}: IPFS CID {content_cid}") # Get output hash from the output node # Use the same identifier that's in the cache index (IPFS CID if available) if result.output_path and result.output_path.exists(): local_hash = file_hash(result.output_path) output_ipfs_cid = node_ipfs_cids.get(dag.output_id) # Use IPFS CID as primary identifier if available, otherwise local hash # This must match what's in the content_index from cache_manager.put() output_cid = node_hashes.get(dag.output_id, local_hash) # Store output in database (for L2 to query IPFS CID) import asyncio import database # Store plan (DAG) to IPFS and local cache plan_cid = None try: import ipfs_client dag_dict = json.loads(dag_json) plan_cid = ipfs_client.add_json(dag_dict) if plan_cid: logger.info(f"Stored plan to IPFS: {plan_cid}") # Also store locally so it can be retrieved without IPFS # Store directly in cache_dir (get_by_cid checks cache_dir/cid) plan_path = CACHE_DIR / plan_cid CACHE_DIR.mkdir(parents=True, exist_ok=True) with open(plan_path, "w") as f: json.dump(dag_dict, f, indent=2) except Exception as e: logger.warning(f"Failed to store plan to IPFS: {e}") async def save_to_db(): if database.pool is None: await database.init_db() await database.create_cache_item(output_cid, output_ipfs_cid) # Also save the run result if run_id: input_hashes_for_db = [ node.config.get("cid") for node in dag.nodes.values() if (node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE") and node.config.get("cid") ] # Get actor_id and recipe from pending_runs (saved when run started) actor_id = None recipe_name = "dag" pending = await database.get_pending_run(run_id) if pending: actor_id = pending.get("actor_id") recipe_name = pending.get("recipe") or "dag" await database.save_run_cache( run_id=run_id, output_cid=output_cid, recipe=recipe_name, inputs=input_hashes_for_db, ipfs_cid=output_ipfs_cid, actor_id=actor_id, plan_cid=plan_cid, ) # Save output as media for the user if actor_id: await database.save_item_metadata( cid=output_cid, actor_id=actor_id, item_type="media", description=f"Output from recipe: {recipe_name}", source_type="recipe", source_note=f"run_id: {run_id}", ) # Clean up pending run if pending: await database.complete_pending_run(run_id) try: loop = asyncio.get_event_loop() if loop.is_running(): asyncio.ensure_future(save_to_db()) else: loop.run_until_complete(save_to_db()) except RuntimeError: asyncio.run(save_to_db()) # Record activity for deletion tracking input_hashes = [] intermediate_hashes = [] for node_id, node in dag.nodes.items(): if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE": cid = node.config.get("cid") if cid: input_hashes.append(cid) elif node_id != dag.output_id and node_id in node_hashes: intermediate_hashes.append(node_hashes[node_id]) if input_hashes: from artdag.activities import Activity from datetime import datetime, timezone activity = Activity( activity_id=run_id or f"dag-{output_cid[:16]}", input_ids=sorted(input_hashes), output_id=output_cid, intermediate_ids=intermediate_hashes, created_at=datetime.now(timezone.utc).timestamp(), status="completed", ) cache_manager.activity_store.add(activity) # Build provenance input_hashes_for_provenance = [] for node_id, node in dag.nodes.items(): if node.node_type == NodeType.SOURCE or str(node.node_type) == "SOURCE": cid = node.config.get("cid") if cid: input_hashes_for_provenance.append({"cid": cid}) provenance = { "task_id": self.request.id, "run_id": run_id, "rendered_at": datetime.now(timezone.utc).isoformat(), "output": { "cid": output_cid, "ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None, }, "inputs": input_hashes_for_provenance, "dag": dag_json, # Full DAG definition "nodes": { node_id: { "cid": node_hashes.get(node_id), "ipfs_cid": node_ipfs_cids.get(node_id), } for node_id in dag.nodes.keys() if node_id in node_hashes }, "execution": { "execution_time": result.execution_time, "nodes_executed": result.nodes_executed, "nodes_cached": result.nodes_cached, } } # Store provenance on IPFS import ipfs_client provenance_cid = ipfs_client.add_json(provenance) if provenance_cid: provenance["provenance_cid"] = provenance_cid logger.info(f"Stored DAG provenance on IPFS: {provenance_cid}") else: logger.warning("Failed to store DAG provenance on IPFS") # Build result return { "success": True, "run_id": run_id, "output_cid": output_cid, "output_ipfs_cid": node_ipfs_cids.get(dag.output_id) if dag.output_id else None, "output_path": str(result.output_path) if result.output_path else None, "execution_time": result.execution_time, "nodes_executed": result.nodes_executed, "nodes_cached": result.nodes_cached, "node_results": { node_id: str(path) for node_id, path in result.node_results.items() }, "node_hashes": node_hashes, # node_id -> cid "node_ipfs_cids": node_ipfs_cids, # node_id -> ipfs_cid "provenance_cid": provenance_cid, } @app.task(base=RenderTask, bind=True) def execute_recipe(self, recipe_sexp: str, input_hashes: Dict[str, str], run_id: str = None) -> dict: """ Execute an S-expression recipe. The recipe S-expression unfolds into a plan S-expression with code-addressed cache IDs computed before execution. Each plan node gets a deterministic hash "bucket" based on the computation definition (Merkle tree), not the results. Phases: 1. Parse: compile_string(recipe_sexp) -> CompiledRecipe 2. Analyze: Extract and run analysis nodes from recipe 3. Plan: create_plan(compiled, inputs) -> ExecutionPlanSexp with cache IDs 4. Store: plan.to_string() -> store as S-expression 5. Execute: Run steps level-by-level, checking cache by cache_id 6. Return: Include plan_sexp in result Args: recipe_sexp: Recipe as S-expression string input_hashes: Mapping from input name to content hash (CID) run_id: Optional run ID for tracking Returns: Execution result with output CID, plan S-expression, and node results """ from cache_manager import get_cache_manager import ipfs_client # Try to import S-expression modules try: from artdag.sexp import compile_string, CompileError, ParseError from artdag.sexp.planner import create_plan, ExecutionPlanSexp, PlanStep except ImportError as e: raise ImportError(f"S-expression modules not available: {e}") cache_manager = get_cache_manager() logger.info(f"Executing recipe with {len(input_hashes)} inputs, run_id={run_id}") # ============ Phase 1: Parse ============ self.update_state(state='PARSING', meta={'status': 'parsing recipe'}) logger.info("Phase 1: Parsing recipe S-expression...") try: compiled = compile_string(recipe_sexp) except (ParseError, CompileError) as e: raise ValueError(f"Recipe parse error: {e}") recipe_name = compiled.name or "unnamed" logger.info(f"Parsed recipe: {recipe_name}") # ============ Phase 2: Analysis ============ self.update_state(state='ANALYZING', meta={'status': 'running analysis'}) logger.info("Phase 2: Running analysis nodes...") analysis_results = {} # Extract analysis nodes from compiled recipe for node in compiled.nodes: node_type = node.get("type", "").upper() config = node.get("config", {}) if node_type == "ANALYZE" or config.get("analyze"): node_id = node.get("id") input_ref = config.get("input") or config.get("source") feature = config.get("feature") or config.get("analyze") # Resolve input reference to CID cid = input_hashes.get(input_ref) if not cid: logger.warning(f"Analysis node {node_id}: input '{input_ref}' not in input_hashes") continue # Get input file path input_path = cache_manager.get_by_cid(cid) if not input_path: logger.warning(f"Analysis node {node_id}: content {cid[:16]}... not in cache") continue # Run analysis try: from artdag.analysis import Analyzer analysis_dir = CACHE_DIR / "analysis" analysis_dir.mkdir(parents=True, exist_ok=True) analyzer = Analyzer(cache_dir=analysis_dir) features = [feature] if feature else ["beats", "energy"] result = analyzer.analyze( input_hash=cid, features=features, input_path=Path(input_path), ) analysis_results[node_id] = result analysis_results[cid] = result logger.info(f"Analysis {node_id}: feature={feature}") except Exception as e: logger.warning(f"Analysis failed for {node_id}: {e}") logger.info(f"Completed {len(analysis_results)} analysis results") # ============ Phase 3: Generate Plan ============ self.update_state(state='PLANNING', meta={'status': 'generating plan'}) logger.info("Phase 3: Generating execution plan with code-addressed cache IDs...") plan = create_plan(compiled, inputs=input_hashes) logger.info(f"Generated plan with {len(plan.steps)} steps, plan_id={plan.plan_id[:16]}...") # ============ Phase 4: Store Plan as S-expression ============ plan_sexp = plan.to_string(pretty=True) plan_cid = None try: plan_cid = ipfs_client.add_string(plan_sexp) if plan_cid: logger.info(f"Stored plan to IPFS: {plan_cid}") # Also store locally for fast retrieval plan_path = CACHE_DIR / plan_cid CACHE_DIR.mkdir(parents=True, exist_ok=True) plan_path.write_text(plan_sexp) # Save plan_cid to database immediately so it's available even if run fails if run_id: import asyncio import database async def save_plan_cid(): if database.pool is None: await database.init_db() await database.update_pending_run_plan(run_id, plan_cid) try: loop = asyncio.get_event_loop() if loop.is_running(): asyncio.ensure_future(save_plan_cid()) else: loop.run_until_complete(save_plan_cid()) except RuntimeError: asyncio.run(save_plan_cid()) logger.info(f"Saved plan_cid to pending run: {run_id}") except Exception as e: logger.warning(f"Failed to store plan to IPFS: {e}") # ============ Phase 5: Execute Steps Level-by-Level ============ self.update_state(state='EXECUTING', meta={'status': 'executing steps', 'total_steps': len(plan.steps)}) logger.info("Phase 4: Executing plan steps...") # Group steps by level steps_by_level: Dict[int, List[PlanStep]] = {} for step in plan.steps: level = step.level steps_by_level.setdefault(level, []).append(step) max_level = max(steps_by_level.keys()) if steps_by_level else 0 step_results = {} # step_id -> {"status", "path", "cid", "ipfs_cid"} cache_id_to_path = {} # cache_id -> output path (for resolving inputs) total_cached = 0 total_executed = 0 # Map input names to their cache_ids (inputs are their own cache_ids) for name, cid in input_hashes.items(): cache_id_to_path[cid] = cache_manager.get_by_cid(cid) for level in range(max_level + 1): level_steps = steps_by_level.get(level, []) if not level_steps: continue logger.info(f"Executing level {level}: {len(level_steps)} steps") for step in level_steps: self.update_state( state='EXECUTING', meta={ 'step_id': step.step_id, 'step_type': step.node_type, 'level': level, 'cache_id': step.cache_id[:16], } ) # Check if cached using code-addressed cache_id cached_path = cache_manager.get_by_cid(step.cache_id) if cached_path and cached_path.exists(): logger.info(f"Step {step.step_id}: cached at {step.cache_id[:16]}...") step_results[step.step_id] = { "status": "cached", "path": str(cached_path), "cache_id": step.cache_id, } cache_id_to_path[step.cache_id] = cached_path total_cached += 1 continue # Execute the step try: # Resolve input paths from previous step cache_ids input_paths = [] for input_ref in step.inputs: # input_ref is a step_id - find its cache_id and path input_step = next((s for s in plan.steps if s.step_id == input_ref), None) if input_step: input_cache_id = input_step.cache_id input_path = cache_id_to_path.get(input_cache_id) if input_path: input_paths.append(Path(input_path)) else: # Check if it's a source input source_cid = step.config.get("cid") if source_cid: input_path = cache_manager.get_by_cid(source_cid) if input_path: input_paths.append(Path(input_path)) else: # Direct CID reference (source node) source_cid = input_hashes.get(input_ref) or step.config.get("cid") if source_cid: input_path = cache_manager.get_by_cid(source_cid) if input_path: input_paths.append(Path(input_path)) # Handle SOURCE nodes if step.node_type.upper() == "SOURCE": source_cid = step.config.get("cid") # If source has :input true, resolve CID from input_hashes if not source_cid and step.config.get("input"): source_name = step.config.get("name", "") # Try various key formats for lookup name_variants = [ source_name, source_name.lower().replace(" ", "-"), source_name.lower().replace(" ", "_"), source_name.lower(), ] for variant in name_variants: if variant in input_hashes: source_cid = input_hashes[variant] logger.info(f"Resolved SOURCE '{source_name}' -> {source_cid[:16]}... via '{variant}'") break if not source_cid: raise ValueError(f"SOURCE '{source_name}' not found in input_hashes. Available: {list(input_hashes.keys())}") if source_cid: source_path = cache_manager.get_by_cid(source_cid) if source_path: step_results[step.step_id] = { "status": "source", "path": str(source_path), "cache_id": step.cache_id, "cid": source_cid, } cache_id_to_path[step.cache_id] = source_path total_cached += 1 continue else: raise ValueError(f"Source content not found in cache: {source_cid[:16]}...") else: raise ValueError(f"SOURCE step has no cid and no :input flag: {step.config}") # Handle COMPOUND nodes (collapsed effect chains) if step.node_type.upper() == "COMPOUND": import subprocess import tempfile filter_chain = step.config.get("filter_chain", []) if not filter_chain: raise ValueError("COMPOUND step has empty filter_chain") # Get input path if not input_paths: raise ValueError("COMPOUND step has no inputs") # For COMPOUND with EFFECT filters, run effects sequentially current_input = input_paths[0] temp_files = [] for i, filter_item in enumerate(filter_chain): filter_type = filter_item.get("type", "") filter_config = filter_item.get("config", {}) if filter_type == "EFFECT": effect_name = filter_config.get("effect") effect_cid = filter_config.get("cid") if effect_name: # Try specific executor first, fall back to generic EFFECT executor effect_executor = get_executor(f"effect:{effect_name}") if not effect_executor: effect_executor = get_executor("EFFECT") if effect_executor: temp_dir = Path(tempfile.mkdtemp()) temp_output = temp_dir / f"compound_{i}_{effect_name}.mkv" logger.info(f"COMPOUND: Running effect {effect_name} (cid={effect_cid[:16] if effect_cid else 'built-in'}...) step {i+1}/{len(filter_chain)}") result_path = effect_executor.execute(filter_config, [current_input], temp_output) current_input = result_path temp_files.append(temp_dir) else: raise ValueError(f"COMPOUND: No executor for effect {effect_name}") # Store final result output_dir = CACHE_DIR / "nodes" / step.cache_id output_dir.mkdir(parents=True, exist_ok=True) final_output = output_dir / "output.mkv" import shutil shutil.copy2(current_input, final_output) # Upload to IPFS (stored by IPFS CID, indexed by cache_id) cached, content_cid = cache_manager.put( final_output, node_type="COMPOUND", cache_id=step.cache_id, ) # Cleanup temp files for temp_dir in temp_files: if temp_dir.exists(): shutil.rmtree(temp_dir, ignore_errors=True) step_results[step.step_id] = { "status": "executed", "path": str(final_output), "cache_id": step.cache_id, "cid": content_cid, "filter_count": len(filter_chain), } cache_id_to_path[step.cache_id] = final_output total_executed += 1 logger.info(f"COMPOUND step {step.step_id}: {len(filter_chain)} effects -> {content_cid[:16]}...") continue # Handle SEQUENCE nodes (concatenate clips) if step.node_type.upper() == "SEQUENCE": import subprocess import tempfile if len(input_paths) < 2: raise ValueError(f"SEQUENCE requires at least 2 inputs, got {len(input_paths)}") # Create concat list file for FFmpeg temp_dir = Path(tempfile.mkdtemp()) concat_list = temp_dir / "concat.txt" with open(concat_list, "w") as f: for inp in input_paths: f.write(f"file '{inp}'\n") output_dir = CACHE_DIR / "nodes" / step.cache_id output_dir.mkdir(parents=True, exist_ok=True) final_output = output_dir / "output.mkv" # FFmpeg concat demuxer cmd = [ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(concat_list), "-c", "copy", str(final_output) ] logger.info(f"SEQUENCE: Concatenating {len(input_paths)} clips") result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: # Try with re-encoding if copy fails cmd = [ "ffmpeg", "-y", "-f", "concat", "-safe", "0", "-i", str(concat_list), "-c:v", "libx264", "-c:a", "aac", str(final_output) ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: raise RuntimeError(f"FFmpeg concat failed: {result.stderr}") # Upload to IPFS (stored by IPFS CID, indexed by cache_id) cached, content_cid = cache_manager.put( final_output, node_type="SEQUENCE", cache_id=step.cache_id, ) # Cleanup import shutil shutil.rmtree(temp_dir, ignore_errors=True) step_results[step.step_id] = { "status": "executed", "path": str(final_output), "cache_id": step.cache_id, "cid": content_cid, "input_count": len(input_paths), } cache_id_to_path[step.cache_id] = final_output total_executed += 1 logger.info(f"SEQUENCE step {step.step_id}: {len(input_paths)} clips -> {content_cid[:16]}...") continue # Handle EFFECT nodes if step.node_type.upper() == "EFFECT": effect_name = step.config.get("effect") if not effect_name: raise ValueError(f"EFFECT node missing 'effect' in config: {step.config}") # Try specific executor first (e.g., effect:dog) executor = get_executor(f"effect:{effect_name}") logger.info(f"EFFECT: get_executor('effect:{effect_name}') = {executor}") if not executor: # Fall back to generic EFFECT executor (handles IPFS effects) executor = get_executor("EFFECT") logger.info(f"EFFECT: Fallback get_executor('EFFECT') = {executor}") if not executor: raise ValueError(f"No executor for effect: {effect_name}") if len(input_paths) != 1: raise ValueError(f"EFFECT expects 1 input, got {len(input_paths)}") output_dir = CACHE_DIR / "nodes" / step.cache_id output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / "output.mkv" effect_cid = step.config.get("cid") logger.info(f"EFFECT: Running {effect_name} (cid={effect_cid[:16] if effect_cid else 'built-in'}...)") result_path = executor.execute(step.config, input_paths, output_path) cached, content_cid = cache_manager.put( result_path, node_type="EFFECT", cache_id=step.cache_id, ) step_results[step.step_id] = { "status": "executed", "path": str(result_path), "cache_id": step.cache_id, "cid": content_cid, "effect": effect_name, } cache_id_to_path[step.cache_id] = result_path total_executed += 1 logger.info(f"EFFECT step {step.step_id}: {effect_name} -> {content_cid[:16]}...") continue # Fallback: try to get executor for unknown node types executor = get_executor(step.node_type.upper()) if not executor: executor = get_executor(step.node_type) if not executor: raise ValueError(f"No executor for node type: {step.node_type}") # Determine output path output_dir = CACHE_DIR / "nodes" / step.cache_id output_dir.mkdir(parents=True, exist_ok=True) output_path = output_dir / "output.mkv" # Execute logger.info(f"Executing step {step.step_id} ({step.node_type}) with {len(input_paths)} inputs") result_path = executor.execute(step.config, input_paths, output_path) # Store result in cache (by IPFS CID, indexed by cache_id) cached, content_cid = cache_manager.put( result_path, node_type=step.node_type, cache_id=step.cache_id, ) step_results[step.step_id] = { "status": "executed", "path": str(result_path), "cache_id": step.cache_id, "cid": content_cid, "ipfs_cid": content_cid if content_cid.startswith("Qm") or content_cid.startswith("bafy") else None, } cache_id_to_path[step.cache_id] = result_path total_executed += 1 logger.info(f"Step {step.step_id}: executed -> {content_cid[:16]}...") except Exception as e: logger.error(f"Step {step.step_id} failed: {e}") return { "success": False, "run_id": run_id, "error": f"Step {step.step_id} failed: {e}", "step_results": step_results, "plan_cid": plan_cid, "plan_sexp": plan_sexp, } # Get output from final step output_step = next((s for s in plan.steps if s.step_id == plan.output_step_id), None) output_cid = None output_ipfs_cid = None output_path = None output_cache_id = None # Keep track of cache_id separately if output_step: output_result = step_results.get(output_step.step_id, {}) output_cache_id = output_result.get("cache_id") output_ipfs_cid = output_result.get("cid") # cid should be IPFS CID now output_path = output_result.get("path") # Upload final output to IPFS if not already there if output_path and not output_ipfs_cid: output_path_obj = Path(output_path) if isinstance(output_path, str) else output_path if output_path_obj.exists(): logger.info(f"Uploading final output to IPFS: {output_path}") output_ipfs_cid = ipfs_client.add_file(str(output_path_obj)) if output_ipfs_cid: logger.info(f"Uploaded output to IPFS: {output_ipfs_cid}") else: logger.error(f"Failed to upload output to IPFS: {output_path}") # Use IPFS CID as the primary output identifier output_cid = output_ipfs_cid # Fail if output couldn't be uploaded to IPFS if not output_ipfs_cid: logger.error(f"Recipe failed: Could not upload output to IPFS! output_cid={output_cid}, output_path={output_path}") return { "success": False, "run_id": run_id, "error": "Failed to upload output to IPFS", "plan_cid": plan_cid, "plan_sexp": plan_sexp, "output_cid": output_cid, "output_path": output_path, "step_results": step_results, "total_steps": len(plan.steps), "cached": total_cached, "executed": total_executed, } # Fail if no output was produced if not output_cid: logger.error(f"Recipe produced no output! output_step={plan.output_step_id}, result={output_result if output_step else 'no output step'}") return { "success": False, "run_id": run_id, "error": "Recipe produced no output", "plan_cid": plan_cid, "plan_sexp": plan_sexp, "step_results": step_results, "total_steps": len(plan.steps), "cached": total_cached, "executed": total_executed, } # ============ Phase 6: Store Results ============ logger.info("Phase 5: Storing results...") # Store in database import asyncio import database async def save_to_db(): if database.pool is None: await database.init_db() # Get actor_id from pending run actor_id = None pending = await database.get_pending_run(run_id) if run_id else None if pending: actor_id = pending.get("actor_id") await database.save_run_cache( run_id=run_id, output_cid=output_cid, recipe=recipe_name, inputs=list(input_hashes.values()), ipfs_cid=output_ipfs_cid, actor_id=actor_id, plan_cid=plan_cid, ) # Save output as media for user if actor_id and output_cid: await database.save_item_metadata( cid=output_cid, actor_id=actor_id, item_type="media", description=f"Output from recipe: {recipe_name}", source_type="recipe", source_note=f"run_id: {run_id}", ) # Complete pending run if pending and run_id: await database.complete_pending_run(run_id) try: loop = asyncio.get_event_loop() if loop.is_running(): asyncio.ensure_future(save_to_db()) else: loop.run_until_complete(save_to_db()) except RuntimeError: asyncio.run(save_to_db()) # Build and store provenance provenance = { "task_id": self.request.id, "run_id": run_id, "rendered_at": datetime.now(timezone.utc).isoformat(), "recipe": recipe_name, "recipe_sexp": recipe_sexp, "plan_sexp": plan_sexp, "plan_cid": plan_cid, "output": { "cid": output_cid, "ipfs_cid": output_ipfs_cid, }, "inputs": input_hashes, "steps": { step_id: { "cache_id": result.get("cache_id"), "cid": result.get("cid"), "status": result.get("status"), } for step_id, result in step_results.items() }, "execution": { "total_steps": len(plan.steps), "cached": total_cached, "executed": total_executed, } } provenance_cid = ipfs_client.add_json(provenance) if provenance_cid: logger.info(f"Stored provenance on IPFS: {provenance_cid}") logger.info(f"Recipe execution complete: output={output_cid[:16] if output_cid else 'none'}...") return { "success": True, "run_id": run_id, "recipe": recipe_name, "plan_cid": plan_cid, "plan_sexp": plan_sexp, "output_cid": output_cid, "output_ipfs_cid": output_ipfs_cid, "output_path": output_path, "total_steps": len(plan.steps), "cached": total_cached, "executed": total_executed, "step_results": step_results, "provenance_cid": provenance_cid, } def build_effect_dag(input_hashes: List[str], effect_name: str) -> DAG: """ Build a simple DAG for applying an effect to inputs. Args: input_hashes: List of input content hashes effect_name: Name of effect to apply (e.g., "dog", "identity") Returns: DAG ready for execution """ dag = DAG() # Add source nodes for each input source_ids = [] for i, cid in enumerate(input_hashes): source_node = Node( node_type=NodeType.SOURCE, config={"cid": cid}, name=f"source_{i}", ) dag.add_node(source_node) source_ids.append(source_node.node_id) # Add effect node effect_node = Node( node_type=f"effect:{effect_name}", config={}, inputs=source_ids, name=f"effect_{effect_name}", ) dag.add_node(effect_node) dag.set_output(effect_node.node_id) return dag