Replace batch DAG system with streaming architecture

- Remove legacy_tasks.py, hybrid_state.py, render.py
- Remove old task modules (analyze, execute, execute_sexp, orchestrate)
- Add streaming interpreter from test repo
- Add sexp_effects with primitives and video effects
- Add streaming Celery task with CID-based asset resolution
- Support both CID and friendly name references for assets
- Add .dockerignore to prevent local clones from conflicting

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
giles
2026-02-02 19:10:11 +00:00
parent 270eeb3fcf
commit bb458aa924
107 changed files with 15830 additions and 3211 deletions

View File

@@ -1,26 +1,10 @@
# art-celery/tasks - Celery tasks for 3-phase execution
# art-celery/tasks - Celery tasks for streaming video rendering
#
# Tasks for the Art DAG distributed execution system:
# 1. analyze_input - Extract features from input media
# 2. execute_step - Execute a single step from the plan
# 3. run_plan - Orchestrate execution of a full plan
#
# S-expression tasks:
# 4. execute_step_sexp - Execute step from S-expression
# 5. run_plan_sexp - Run plan from S-expression
# Tasks:
# 1. run_stream - Execute a streaming S-expression recipe
from .analyze import analyze_input, analyze_inputs
from .execute import execute_step
from .orchestrate import run_plan, run_recipe
from .execute_sexp import execute_step_sexp, run_plan_sexp
from .streaming import run_stream
__all__ = [
"analyze_input",
"analyze_inputs",
"execute_step",
"run_plan",
"run_recipe",
# S-expression tasks
"execute_step_sexp",
"run_plan_sexp",
"run_stream",
]

View File

@@ -1,132 +0,0 @@
"""
Analysis tasks for extracting features from input media.
Phase 1 of the 3-phase execution model.
"""
import json
import logging
import os
from pathlib import Path
from typing import Dict, List, Optional
from celery import current_task
# Import from the Celery app
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from celery_app import app
# Import artdag analysis module
try:
from artdag.analysis import Analyzer, AnalysisResult
except ImportError:
# artdag not installed, will fail at runtime
Analyzer = None
AnalysisResult = None
logger = logging.getLogger(__name__)
# Cache directory for analysis results
CACHE_DIR = Path(os.environ.get('CACHE_DIR', str(Path.home() / ".artdag" / "cache")))
ANALYSIS_CACHE_DIR = CACHE_DIR / 'analysis'
@app.task(bind=True, name='tasks.analyze_input')
def analyze_input(
self,
input_hash: str,
input_path: str,
features: List[str],
) -> dict:
"""
Analyze a single input file.
Args:
input_hash: Content hash of the input
input_path: Path to the input file
features: List of features to extract
Returns:
Dict with analysis results
"""
if Analyzer is None:
raise ImportError("artdag.analysis not available")
logger.info(f"Analyzing {input_hash[:16]}... for features: {features}")
# Create analyzer with caching
ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
try:
result = analyzer.analyze(
input_hash=input_hash,
features=features,
input_path=Path(input_path),
)
return {
"status": "completed",
"input_hash": input_hash,
"cache_id": result.cache_id,
"features": features,
"result": result.to_dict(),
}
except Exception as e:
logger.error(f"Analysis failed for {input_hash}: {e}")
return {
"status": "failed",
"input_hash": input_hash,
"error": str(e),
}
@app.task(bind=True, name='tasks.analyze_inputs')
def analyze_inputs(
self,
inputs: Dict[str, str],
features: List[str],
) -> dict:
"""
Analyze multiple inputs in parallel.
Args:
inputs: Dict mapping input_hash to file path
features: List of features to extract from all inputs
Returns:
Dict with all analysis results
"""
if Analyzer is None:
raise ImportError("artdag.analysis not available")
logger.info(f"Analyzing {len(inputs)} inputs for features: {features}")
ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
results = {}
errors = []
for input_hash, input_path in inputs.items():
try:
result = analyzer.analyze(
input_hash=input_hash,
features=features,
input_path=Path(input_path),
)
results[input_hash] = result.to_dict()
except Exception as e:
logger.error(f"Analysis failed for {input_hash}: {e}")
errors.append({"input_hash": input_hash, "error": str(e)})
return {
"status": "completed" if not errors else "partial",
"results": results,
"errors": errors,
"total": len(inputs),
"successful": len(results),
}

View File

@@ -1,381 +0,0 @@
"""
Step execution task.
Phase 3 of the 3-phase execution model.
Executes individual steps from an execution plan with IPFS-backed caching.
"""
import json
import logging
import os
import socket
from pathlib import Path
from typing import Dict, List, Optional
from celery import current_task
# Import from the Celery app
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from celery_app import app
from claiming import (
get_claimer,
claim_task,
complete_task,
fail_task,
ClaimStatus,
)
from cache_manager import get_cache_manager, L1CacheManager
# Import artdag
try:
from artdag import Cache, NodeType
from artdag.executor import get_executor
from artdag.planning import ExecutionStep
except ImportError:
Cache = None
NodeType = None
get_executor = None
ExecutionStep = None
logger = logging.getLogger(__name__)
def get_worker_id() -> str:
"""Get a unique identifier for this worker."""
hostname = socket.gethostname()
pid = os.getpid()
return f"{hostname}:{pid}"
@app.task(bind=True, name='tasks.execute_step')
def execute_step(
self,
step_json: str,
plan_id: str,
input_cache_ids: Dict[str, str],
) -> dict:
"""
Execute a single step from an execution plan.
Uses hash-based claiming to prevent duplicate work.
Results are stored in IPFS-backed cache.
Args:
step_json: JSON-serialized ExecutionStep
plan_id: ID of the parent execution plan
input_cache_ids: Mapping from input step_id to their cache_id
Returns:
Dict with execution result
"""
if ExecutionStep is None:
raise ImportError("artdag.planning not available")
step = ExecutionStep.from_json(step_json)
worker_id = get_worker_id()
task_id = self.request.id
logger.info(f"Executing step {step.step_id} ({step.node_type}) cache_id={step.cache_id[:16]}...")
# Get L1 cache manager (IPFS-backed)
cache_mgr = get_cache_manager()
# Check if already cached (by cache_id as cid)
cached_path = cache_mgr.get_by_cid(step.cache_id)
if cached_path:
logger.info(f"Step {step.step_id} already cached at {cached_path}")
# Mark as cached in claiming system
claimer = get_claimer()
claimer.mark_cached(step.cache_id, str(cached_path))
return {
"status": "cached",
"step_id": step.step_id,
"cache_id": step.cache_id,
"output_path": str(cached_path),
}
# Try to claim the task
if not claim_task(step.cache_id, worker_id, task_id):
# Another worker is handling it
logger.info(f"Step {step.step_id} claimed by another worker, waiting...")
claimer = get_claimer()
result = claimer.wait_for_completion(step.cache_id, timeout=600)
if result and result.status == ClaimStatus.COMPLETED:
return {
"status": "completed_by_other",
"step_id": step.step_id,
"cache_id": step.cache_id,
"output_path": result.output_path,
}
elif result and result.status == ClaimStatus.CACHED:
return {
"status": "cached",
"step_id": step.step_id,
"cache_id": step.cache_id,
"output_path": result.output_path,
}
elif result and result.status == ClaimStatus.FAILED:
return {
"status": "failed",
"step_id": step.step_id,
"cache_id": step.cache_id,
"error": result.error,
}
else:
return {
"status": "timeout",
"step_id": step.step_id,
"cache_id": step.cache_id,
"error": "Timeout waiting for other worker",
}
# We have the claim, update to running
claimer = get_claimer()
claimer.update_status(step.cache_id, worker_id, ClaimStatus.RUNNING)
try:
# Handle SOURCE nodes
if step.node_type == "SOURCE":
cid = step.config.get("cid")
if not cid:
raise ValueError(f"SOURCE step missing cid")
# Look up in cache
path = cache_mgr.get_by_cid(cid)
if not path:
raise ValueError(f"SOURCE input not found in cache: {cid[:16]}...")
output_path = str(path)
complete_task(step.cache_id, worker_id, output_path)
return {
"status": "completed",
"step_id": step.step_id,
"cache_id": step.cache_id,
"output_path": output_path,
}
# Handle _LIST virtual nodes
if step.node_type == "_LIST":
item_paths = []
for item_id in step.config.get("items", []):
item_cache_id = input_cache_ids.get(item_id)
if item_cache_id:
path = cache_mgr.get_by_cid(item_cache_id)
if path:
item_paths.append(str(path))
complete_task(step.cache_id, worker_id, json.dumps(item_paths))
return {
"status": "completed",
"step_id": step.step_id,
"cache_id": step.cache_id,
"output_path": None,
"item_paths": item_paths,
}
# Handle COMPOUND nodes (collapsed effect chains)
if step.node_type == "COMPOUND":
filter_chain = step.config.get("filter_chain", [])
if not filter_chain:
raise ValueError("COMPOUND step has empty filter_chain")
# Resolve input paths
input_paths = []
for input_step_id in step.input_steps:
input_cache_id = input_cache_ids.get(input_step_id)
if not input_cache_id:
raise ValueError(f"No cache_id for input step: {input_step_id}")
path = cache_mgr.get_by_cid(input_cache_id)
if not path:
raise ValueError(f"Input not in cache: {input_cache_id[:16]}...")
input_paths.append(Path(path))
if not input_paths:
raise ValueError("COMPOUND step has no inputs")
# Build FFmpeg filter graph from chain
import subprocess
import tempfile
filters = []
for filter_item in filter_chain:
filter_type = filter_item.get("type", "")
filter_config = filter_item.get("config", {})
if filter_type == "TRANSFORM":
effects = filter_config.get("effects", {})
for eff_name, eff_value in effects.items():
if eff_name == "saturation":
filters.append(f"eq=saturation={eff_value}")
elif eff_name == "brightness":
filters.append(f"eq=brightness={eff_value}")
elif eff_name == "contrast":
filters.append(f"eq=contrast={eff_value}")
elif eff_name == "hue":
filters.append(f"hue=h={eff_value}")
elif filter_type == "RESIZE":
width = filter_config.get("width", -1)
height = filter_config.get("height", -1)
mode = filter_config.get("mode", "fit")
if mode == "fit":
filters.append(f"scale={width}:{height}:force_original_aspect_ratio=decrease")
elif mode == "fill":
filters.append(f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}")
else:
filters.append(f"scale={width}:{height}")
output_dir = Path(tempfile.mkdtemp())
output_path = output_dir / f"compound_{step.cache_id[:16]}.mp4"
cmd = ["ffmpeg", "-y", "-i", str(input_paths[0])]
# Handle segment timing
for filter_item in filter_chain:
if filter_item.get("type") == "SEGMENT":
seg_config = filter_item.get("config", {})
if "start" in seg_config:
cmd.extend(["-ss", str(seg_config["start"])])
if "end" in seg_config:
duration = seg_config["end"] - seg_config.get("start", 0)
cmd.extend(["-t", str(duration)])
elif "duration" in seg_config:
cmd.extend(["-t", str(seg_config["duration"])])
if filters:
cmd.extend(["-vf", ",".join(filters)])
cmd.extend(["-c:v", "libx264", "-c:a", "aac", str(output_path)])
logger.info(f"Running COMPOUND FFmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg failed: {result.stderr}")
cached_file, ipfs_cid = cache_mgr.put(
source_path=output_path,
node_type="COMPOUND",
node_id=step.cache_id,
)
logger.info(f"COMPOUND step {step.step_id} completed with {len(filter_chain)} filters")
complete_task(step.cache_id, worker_id, str(cached_file.path))
import shutil
if output_dir.exists():
shutil.rmtree(output_dir, ignore_errors=True)
return {
"status": "completed",
"step_id": step.step_id,
"cache_id": step.cache_id,
"output_path": str(cached_file.path),
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"filter_count": len(filter_chain),
}
# Get executor for this node type
try:
node_type = NodeType[step.node_type]
except KeyError:
node_type = step.node_type
executor = get_executor(node_type)
if executor is None:
raise ValueError(f"No executor for node type: {step.node_type}")
# Resolve input paths from cache
input_paths = []
for input_step_id in step.input_steps:
input_cache_id = input_cache_ids.get(input_step_id)
if not input_cache_id:
raise ValueError(f"No cache_id for input step: {input_step_id}")
path = cache_mgr.get_by_cid(input_cache_id)
if not path:
raise ValueError(f"Input not in cache: {input_cache_id[:16]}...")
input_paths.append(Path(path))
# Create temp output path
import tempfile
output_dir = Path(tempfile.mkdtemp())
output_path = output_dir / f"output_{step.cache_id[:16]}.mp4"
# Execute
logger.info(f"Running executor for {step.node_type} with {len(input_paths)} inputs")
result_path = executor.execute(step.config, input_paths, output_path)
# Store in IPFS-backed cache
cached_file, ipfs_cid = cache_mgr.put(
source_path=result_path,
node_type=step.node_type,
node_id=step.cache_id,
)
logger.info(f"Step {step.step_id} completed, IPFS CID: {ipfs_cid}")
# Mark completed
complete_task(step.cache_id, worker_id, str(cached_file.path))
# Build outputs list (for multi-output support)
outputs = []
if step.outputs:
# Use pre-defined outputs from step
for output_def in step.outputs:
outputs.append({
"name": output_def.name,
"cache_id": output_def.cache_id,
"media_type": output_def.media_type,
"index": output_def.index,
"path": str(cached_file.path),
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
})
else:
# Single output (backwards compat)
outputs.append({
"name": step.name or step.step_id,
"cache_id": step.cache_id,
"media_type": "video/mp4",
"index": 0,
"path": str(cached_file.path),
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
})
# Cleanup temp
if output_dir.exists():
import shutil
shutil.rmtree(output_dir, ignore_errors=True)
return {
"status": "completed",
"step_id": step.step_id,
"name": step.name,
"cache_id": step.cache_id,
"output_path": str(cached_file.path),
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"outputs": outputs,
}
except Exception as e:
logger.error(f"Step {step.step_id} failed: {e}")
fail_task(step.cache_id, worker_id, str(e))
return {
"status": "failed",
"step_id": step.step_id,
"cache_id": step.cache_id,
"error": str(e),
}

View File

@@ -1,582 +0,0 @@
"""
S-expression step execution task.
Executes individual steps received as S-expressions.
The S-expression is the canonical format - workers verify
cache_ids by hashing the received S-expression.
"""
import json
import logging
import os
import socket
from pathlib import Path
from typing import Dict, Optional
from celery import current_task
# Import from the Celery app
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from celery_app import app
from claiming import (
get_claimer,
claim_task,
complete_task,
fail_task,
ClaimStatus,
)
from cache_manager import get_cache_manager
# Import artdag S-expression modules
try:
from artdag.sexp import parse, Symbol, Keyword
from artdag import NodeType
from artdag.executor import get_executor
except ImportError:
parse = None
Symbol = None
Keyword = None
NodeType = None
get_executor = None
logger = logging.getLogger(__name__)
def get_worker_id() -> str:
"""Get a unique identifier for this worker."""
hostname = socket.gethostname()
pid = os.getpid()
return f"{hostname}:{pid}"
def sexp_to_config(sexp) -> Dict:
"""
Convert parsed S-expression to config dict.
Input: (effect :hash "abc123" :inputs ["step1"])
Output: {"node_type": "EFFECT", "hash": "abc123", "inputs": ["step1"]}
"""
if not isinstance(sexp, list) or len(sexp) < 1:
raise ValueError(f"Invalid step S-expression: {sexp}")
# First element is the node type
head = sexp[0]
if isinstance(head, Symbol):
node_type = head.name.upper()
else:
node_type = str(head).upper()
config = {"node_type": node_type}
# Parse keyword arguments
i = 1
while i < len(sexp):
item = sexp[i]
if isinstance(item, Keyword):
if i + 1 < len(sexp):
key = item.name.replace('-', '_')
value = sexp[i + 1]
config[key] = value
i += 2
else:
i += 1
else:
# Positional argument
i += 1
return config
@app.task(bind=True, name='tasks.execute_step_sexp')
def execute_step_sexp(
self,
step_sexp: str,
step_id: str,
cache_id: str,
plan_id: str,
input_cache_ids: Dict[str, str],
) -> dict:
"""
Execute a single step from an S-expression.
The step is received as a serialized S-expression string.
Workers can verify the cache_id by hashing the S-expression.
Args:
step_sexp: Serialized S-expression for the step
step_id: Human-readable step identifier
cache_id: Expected cache_id (SHA3-256 of step_sexp)
plan_id: ID of the parent execution plan
input_cache_ids: Mapping from input step_id to their cache_id
Returns:
Dict with execution result
"""
if parse is None:
raise ImportError("artdag.sexp not available")
worker_id = get_worker_id()
task_id = self.request.id
logger.info(f"Executing step {step_id} cache_id={cache_id[:16]}...")
logger.debug(f"Step S-expression: {step_sexp[:100]}...")
# Parse the S-expression
try:
parsed = parse(step_sexp)
config = sexp_to_config(parsed)
node_type = config.pop("node_type")
except Exception as e:
logger.error(f"Failed to parse step S-expression: {e}")
return {
"status": "failed",
"step_id": step_id,
"cache_id": cache_id,
"error": f"Parse error: {e}",
}
# Get cache manager
cache_mgr = get_cache_manager()
# Check if already cached
cached_path = cache_mgr.get_by_cid(cache_id)
if cached_path:
logger.info(f"Step {step_id} already cached at {cached_path}")
claimer = get_claimer()
claimer.mark_cached(cache_id, str(cached_path))
return {
"status": "cached",
"step_id": step_id,
"cache_id": cache_id,
"output_path": str(cached_path),
}
# Try to claim the task
if not claim_task(cache_id, worker_id, task_id):
logger.info(f"Step {step_id} claimed by another worker, waiting...")
claimer = get_claimer()
result = claimer.wait_for_completion(cache_id, timeout=600)
if result and result.status == ClaimStatus.COMPLETED:
return {
"status": "completed_by_other",
"step_id": step_id,
"cache_id": cache_id,
"output_path": result.output_path,
}
elif result and result.status == ClaimStatus.CACHED:
return {
"status": "cached",
"step_id": step_id,
"cache_id": cache_id,
"output_path": result.output_path,
}
elif result and result.status == ClaimStatus.FAILED:
return {
"status": "failed",
"step_id": step_id,
"cache_id": cache_id,
"error": result.error,
}
else:
return {
"status": "timeout",
"step_id": step_id,
"cache_id": cache_id,
"error": "Timeout waiting for other worker",
}
# We have the claim, update to running
claimer = get_claimer()
claimer.update_status(cache_id, worker_id, ClaimStatus.RUNNING)
try:
# Handle SOURCE nodes
if node_type == "SOURCE":
# Support both :cid (new IPFS) and :hash (legacy)
content_id = config.get("cid") or config.get("hash")
if not content_id:
raise ValueError("SOURCE step missing :cid or :hash")
path = cache_mgr.get_by_cid(content_id)
if not path:
raise ValueError(f"SOURCE input not found: {content_id[:16]}...")
output_path = str(path)
complete_task(cache_id, worker_id, output_path)
return {
"status": "completed",
"step_id": step_id,
"cache_id": cache_id,
"output_path": output_path,
}
# Handle EFFECT nodes
if node_type == "EFFECT":
effect_hash = config.get("cid") or config.get("hash")
if not effect_hash:
raise ValueError("EFFECT step missing :cid")
# Get input paths
inputs = config.get("inputs", [])
input_paths = []
for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_cid(inp_cache_id)
if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path))
# Get executor
try:
executor = get_executor(NodeType.SOURCE) # Effects use SOURCE executor for now
except:
executor = None
if executor is None:
# Fallback: copy input to output (identity-like behavior)
if input_paths:
output_path = str(input_paths[0])
complete_task(cache_id, worker_id, output_path)
return {
"status": "completed",
"step_id": step_id,
"cache_id": cache_id,
"output_path": output_path,
}
raise ValueError(f"No executor for EFFECT and no inputs")
# Handle COMPOUND nodes (collapsed effect chains)
if node_type == "COMPOUND":
filter_chain = config.get("filter_chain", [])
if not filter_chain:
raise ValueError("COMPOUND step has empty filter_chain")
# Get input paths
inputs = config.get("inputs", [])
input_paths = []
for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_cid(inp_cache_id)
if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path))
if not input_paths:
raise ValueError("COMPOUND step has no inputs")
# Build FFmpeg filter graph from chain
filters = []
for i, filter_item in enumerate(filter_chain):
filter_type = filter_item.get("type", "")
filter_config = filter_item.get("config", {})
if filter_type == "EFFECT":
# Effect - for now identity-like, can be extended
effect_hash = filter_config.get("cid") or filter_config.get("hash") or filter_config.get("effect")
# TODO: resolve effect to actual FFmpeg filter
# For now, skip identity-like effects
pass
elif filter_type == "TRANSFORM":
# Transform effects map to FFmpeg filters
effects = filter_config.get("effects", {})
for eff_name, eff_value in effects.items():
if eff_name == "saturation":
filters.append(f"eq=saturation={eff_value}")
elif eff_name == "brightness":
filters.append(f"eq=brightness={eff_value}")
elif eff_name == "contrast":
filters.append(f"eq=contrast={eff_value}")
elif eff_name == "hue":
filters.append(f"hue=h={eff_value}")
elif filter_type == "RESIZE":
width = filter_config.get("width", -1)
height = filter_config.get("height", -1)
mode = filter_config.get("mode", "fit")
if mode == "fit":
filters.append(f"scale={width}:{height}:force_original_aspect_ratio=decrease")
elif mode == "fill":
filters.append(f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}")
else:
filters.append(f"scale={width}:{height}")
elif filter_type == "SEGMENT":
# Segment handled via -ss and -t, not filter
pass
# Create temp output
import tempfile
import subprocess
output_dir = Path(tempfile.mkdtemp())
output_path = output_dir / f"compound_{cache_id[:16]}.mp4"
# Build FFmpeg command
input_path = input_paths[0]
cmd = ["ffmpeg", "-y", "-i", str(input_path)]
# Handle segment timing if present
for filter_item in filter_chain:
if filter_item.get("type") == "SEGMENT":
seg_config = filter_item.get("config", {})
if "start" in seg_config:
cmd.extend(["-ss", str(seg_config["start"])])
if "end" in seg_config:
duration = seg_config["end"] - seg_config.get("start", 0)
cmd.extend(["-t", str(duration)])
elif "duration" in seg_config:
cmd.extend(["-t", str(seg_config["duration"])])
# Add filter graph if any
if filters:
cmd.extend(["-vf", ",".join(filters)])
# Output options
cmd.extend(["-c:v", "libx264", "-c:a", "aac", str(output_path)])
logger.info(f"Running COMPOUND FFmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg failed: {result.stderr}")
# Store in cache
cached_file, ipfs_cid = cache_mgr.put(
source_path=output_path,
node_type="COMPOUND",
node_id=cache_id,
)
logger.info(f"COMPOUND step {step_id} completed with {len(filter_chain)} filters, IPFS CID: {ipfs_cid}")
complete_task(cache_id, worker_id, str(cached_file.path))
# Cleanup temp
if output_dir.exists():
import shutil
shutil.rmtree(output_dir, ignore_errors=True)
return {
"status": "completed",
"step_id": step_id,
"cache_id": cache_id,
"output_path": str(cached_file.path),
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"filter_count": len(filter_chain),
}
# Get executor for other node types
try:
node_type_enum = NodeType[node_type]
except (KeyError, TypeError):
node_type_enum = node_type
executor = get_executor(node_type_enum)
if executor is None:
raise ValueError(f"No executor for node type: {node_type}")
# Resolve input paths
inputs = config.pop("inputs", [])
input_paths = []
for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_cid(inp_cache_id)
if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path))
# Create temp output
import tempfile
output_dir = Path(tempfile.mkdtemp())
output_path = output_dir / f"output_{cache_id[:16]}.mp4"
# Execute
logger.info(f"Running executor for {node_type} with {len(input_paths)} inputs")
result_path = executor.execute(config, input_paths, output_path)
# Store in cache
cached_file, ipfs_cid = cache_mgr.put(
source_path=result_path,
node_type=node_type,
node_id=cache_id,
)
logger.info(f"Step {step_id} completed, IPFS CID: {ipfs_cid}")
complete_task(cache_id, worker_id, str(cached_file.path))
# Cleanup temp
if output_dir.exists():
import shutil
shutil.rmtree(output_dir, ignore_errors=True)
return {
"status": "completed",
"step_id": step_id,
"cache_id": cache_id,
"output_path": str(cached_file.path),
"cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
}
except Exception as e:
logger.error(f"Step {step_id} failed: {e}")
fail_task(cache_id, worker_id, str(e))
return {
"status": "failed",
"step_id": step_id,
"cache_id": cache_id,
"error": str(e),
}
@app.task(bind=True, name='tasks.run_plan_sexp')
def run_plan_sexp(
self,
plan_sexp: str,
run_id: Optional[str] = None,
) -> dict:
"""
Execute a complete S-expression execution plan.
Args:
plan_sexp: Serialized S-expression plan
run_id: Optional run ID for tracking
Returns:
Dict with execution results
"""
if parse is None:
raise ImportError("artdag.sexp not available")
from artdag.sexp.scheduler import PlanScheduler
from artdag.sexp.planner import ExecutionPlanSexp, PlanStep
logger.info(f"Running plan from S-expression (run_id={run_id})")
# Parse the plan S-expression
parsed = parse(plan_sexp)
# Extract plan metadata and steps
plan_id = None
recipe_id = None
recipe_hash = None
inputs = {}
steps = []
output_step_id = None
i = 1
while i < len(parsed):
item = parsed[i]
if isinstance(item, Keyword):
key = item.name
if i + 1 < len(parsed):
value = parsed[i + 1]
if key == "id":
plan_id = value
elif key == "recipe":
recipe_id = value
elif key == "recipe-hash":
recipe_hash = value
elif key == "output":
output_step_id = value
i += 2
else:
i += 1
elif isinstance(item, list) and len(item) > 0:
head = item[0]
if isinstance(head, Symbol):
if head.name == "inputs":
# Parse inputs block
for j in range(1, len(item)):
inp = item[j]
if isinstance(inp, list) and len(inp) >= 2:
name = inp[0].name if isinstance(inp[0], Symbol) else str(inp[0])
value = inp[1]
inputs[name] = value
elif head.name == "step":
# Parse step
step_id = item[1] if len(item) > 1 else None
step_cache_id = None
step_level = 0
step_node = None
j = 2
while j < len(item):
sub = item[j]
if isinstance(sub, Keyword):
if sub.name == "cache-id" and j + 1 < len(item):
step_cache_id = item[j + 1]
j += 2
elif sub.name == "level" and j + 1 < len(item):
step_level = item[j + 1]
j += 2
else:
j += 1
elif isinstance(sub, list):
step_node = sub
j += 1
else:
j += 1
if step_id and step_cache_id and step_node:
# Convert step_node to config
config = sexp_to_config(step_node)
node_type = config.pop("node_type")
step_inputs = config.pop("inputs", [])
steps.append(PlanStep(
step_id=step_id,
node_type=node_type,
config=config,
inputs=step_inputs if isinstance(step_inputs, list) else [],
cache_id=step_cache_id,
level=step_level,
))
i += 1
else:
i += 1
# Create plan object
plan = ExecutionPlanSexp(
plan_id=plan_id or "unknown",
recipe_id=recipe_id or "unknown",
recipe_hash=recipe_hash or "",
steps=steps,
output_step_id=output_step_id or (steps[-1].step_id if steps else ""),
inputs=inputs,
)
# Create scheduler and run
cache_mgr = get_cache_manager()
scheduler = PlanScheduler(
cache_manager=cache_mgr,
celery_app=app,
execute_task_name='tasks.execute_step_sexp',
)
result = scheduler.schedule(plan)
return {
"status": result.status,
"run_id": run_id,
"plan_id": result.plan_id,
"output_cache_id": result.output_cache_id,
"output_path": result.output_path,
"output_ipfs_cid": result.output_ipfs_cid,
"steps_completed": result.steps_completed,
"steps_cached": result.steps_cached,
"steps_failed": result.steps_failed,
"error": result.error,
}

View File

@@ -1,514 +0,0 @@
"""
Plan orchestration tasks.
Coordinates the full 3-phase execution:
1. Analyze inputs
2. Generate plan
3. Execute steps level by level
Uses IPFS-backed cache for durability.
"""
import json
import logging
import os
from pathlib import Path
from typing import Dict, List, Optional
from celery import current_task, group, chain
# Import from the Celery app
import sys
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from celery_app import app
from claiming import get_claimer
from cache_manager import get_cache_manager
# Import artdag modules
try:
from artdag import Cache
from artdag.analysis import Analyzer, AnalysisResult
from artdag.planning import RecipePlanner, ExecutionPlan, Recipe
except ImportError:
Cache = None
Analyzer = None
AnalysisResult = None
RecipePlanner = None
ExecutionPlan = None
Recipe = None
from .execute import execute_step
logger = logging.getLogger(__name__)
# Cache directories
CACHE_DIR = Path(os.environ.get('CACHE_DIR', str(Path.home() / ".artdag" / "cache")))
ANALYSIS_CACHE_DIR = CACHE_DIR / 'analysis'
PLAN_CACHE_DIR = CACHE_DIR / 'plans'
@app.task(bind=True, name='tasks.run_plan')
def run_plan(
self,
plan_json: str,
run_id: Optional[str] = None,
) -> dict:
"""
Execute a complete execution plan.
Runs steps level by level, with parallel execution within each level.
Results are stored in IPFS-backed cache.
Args:
plan_json: JSON-serialized ExecutionPlan
run_id: Optional run ID for tracking
Returns:
Dict with execution results
"""
if ExecutionPlan is None:
raise ImportError("artdag.planning not available")
plan = ExecutionPlan.from_json(plan_json)
cache_mgr = get_cache_manager()
logger.info(f"Executing plan {plan.plan_id[:16]}... ({len(plan.steps)} steps)")
# Build initial cache_ids mapping (step_id -> cache_id)
cache_ids = {}
for step in plan.steps:
cache_ids[step.step_id] = step.cache_id
# Also map input hashes
for name, cid in plan.input_hashes.items():
cache_ids[name] = cid
# Group steps by level
steps_by_level = plan.get_steps_by_level()
max_level = max(steps_by_level.keys()) if steps_by_level else 0
results_by_step = {}
total_cached = 0
total_executed = 0
for level in range(max_level + 1):
level_steps = steps_by_level.get(level, [])
if not level_steps:
continue
logger.info(f"Executing level {level}: {len(level_steps)} steps")
# Check which steps need execution
steps_to_run = []
for step in level_steps:
# Check if cached
cached_path = cache_mgr.get_by_cid(step.cache_id)
if cached_path:
results_by_step[step.step_id] = {
"status": "cached",
"cache_id": step.cache_id,
"output_path": str(cached_path),
}
total_cached += 1
else:
steps_to_run.append(step)
if not steps_to_run:
logger.info(f"Level {level}: all steps cached")
continue
# Build input cache_ids for this level
level_cache_ids = dict(cache_ids)
# Execute steps in parallel
tasks = [
execute_step.s(step.to_json(), plan.plan_id, level_cache_ids)
for step in steps_to_run
]
job = group(tasks)
async_results = job.apply_async()
# Wait for completion
try:
step_results = async_results.get(timeout=3600)
except Exception as e:
logger.error(f"Level {level} execution failed: {e}")
return {
"status": "failed",
"error": str(e),
"level": level,
"results": results_by_step,
"run_id": run_id,
}
# Process results
for result in step_results:
step_id = result.get("step_id")
cache_id = result.get("cache_id")
results_by_step[step_id] = result
cache_ids[step_id] = cache_id
if result.get("status") in ("completed", "cached", "completed_by_other"):
total_executed += 1
elif result.get("status") == "failed":
logger.error(f"Step {step_id} failed: {result.get('error')}")
return {
"status": "failed",
"error": f"Step {step_id} failed: {result.get('error')}",
"level": level,
"results": results_by_step,
"run_id": run_id,
}
# Get final output
output_step = plan.get_step(plan.output_step)
output_cache_id = output_step.cache_id if output_step else None
output_path = None
output_ipfs_cid = None
output_name = plan.output_name
if output_cache_id:
output_path = cache_mgr.get_by_cid(output_cache_id)
output_ipfs_cid = cache_mgr.get_ipfs_cid(output_cache_id)
# Build list of all outputs with their names and artifacts
all_outputs = []
for step in plan.steps:
step_result = results_by_step.get(step.step_id, {})
step_outputs = step_result.get("outputs", [])
# If no outputs in result, build from step definition
if not step_outputs and step.outputs:
for output_def in step.outputs:
output_cache_path = cache_mgr.get_by_cid(output_def.cache_id)
output_ipfs = cache_mgr.get_ipfs_cid(output_def.cache_id) if output_cache_path else None
all_outputs.append({
"name": output_def.name,
"step_id": step.step_id,
"step_name": step.name,
"cache_id": output_def.cache_id,
"media_type": output_def.media_type,
"path": str(output_cache_path) if output_cache_path else None,
"ipfs_cid": output_ipfs,
"status": "cached" if output_cache_path else "missing",
})
else:
for output in step_outputs:
all_outputs.append({
**output,
"step_id": step.step_id,
"step_name": step.name,
"status": "completed",
})
return {
"status": "completed",
"run_id": run_id,
"plan_id": plan.plan_id,
"plan_name": plan.name,
"recipe_name": plan.recipe_name,
"output_name": output_name,
"output_cache_id": output_cache_id,
"output_path": str(output_path) if output_path else None,
"output_ipfs_cid": output_ipfs_cid,
"total_steps": len(plan.steps),
"cached": total_cached,
"executed": total_executed,
"results": results_by_step,
"outputs": all_outputs,
}
def _extract_analysis_from_recipe(compiled_recipe) -> List[Dict]:
"""
Extract analysis nodes from a compiled recipe.
Finds all (analyze ...) nodes and returns their configurations.
Analysis nodes are identified by type "ANALYZE" or by having
an "analyze" config key.
"""
analysis_nodes = []
nodes = compiled_recipe.nodes
if isinstance(nodes, dict):
nodes = list(nodes.values())
for node in nodes:
node_type = node.get("type", "").upper()
config = node.get("config", {})
# Check if this is an analysis node
if node_type == "ANALYZE" or config.get("analyze"):
analysis_nodes.append({
"node_id": node.get("id"),
"input_ref": config.get("input") or config.get("source"),
"feature": config.get("feature") or config.get("analyze"),
"config": config,
})
return analysis_nodes
@app.task(bind=True, name='tasks.run_recipe')
def run_recipe(
self,
recipe_sexp: str,
input_hashes: Dict[str, str],
run_id: Optional[str] = None,
) -> dict:
"""
Run a complete recipe through all phases.
The recipe S-expression declares what analysis is needed.
Analysis nodes in the recipe are executed first, then their
outputs are used to generate the execution plan.
1. Parse: Compile recipe S-expression
2. Analyze: Run analysis nodes from recipe
3. Plan: Generate execution plan using analysis results
4. Execute: Run the plan
Args:
recipe_sexp: Recipe S-expression content
input_hashes: Mapping from input name to content hash
run_id: Optional run ID for tracking
Returns:
Dict with final results
"""
# Import S-expression compiler
try:
from artdag.sexp import compile_string
except ImportError:
raise ImportError("artdag.sexp not available")
if Analyzer is None:
raise ImportError("artdag.analysis not available")
cache_mgr = get_cache_manager()
logger.info(f"Running recipe with {len(input_hashes)} inputs")
# Phase 1: Parse recipe
logger.info("Phase 1: Parsing recipe S-expression...")
try:
compiled = compile_string(recipe_sexp)
except Exception as e:
return {"status": "failed", "error": f"Recipe parse error: {e}"}
logger.info(f"Parsed recipe: {compiled.name}")
# Phase 2: Run analysis nodes from recipe
logger.info("Phase 2: Running analysis from recipe...")
analysis_nodes = _extract_analysis_from_recipe(compiled)
logger.info(f"Found {len(analysis_nodes)} analysis nodes in recipe")
ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
analysis_results = {}
for analysis_node in analysis_nodes:
input_ref = analysis_node["input_ref"]
feature = analysis_node["feature"]
node_id = analysis_node["node_id"]
# Resolve input reference to content hash
cid = input_hashes.get(input_ref)
if not cid:
logger.warning(f"Analysis node {node_id}: input '{input_ref}' not in input_hashes")
continue
path = cache_mgr.get_by_cid(cid)
if not path:
logger.warning(f"Analysis node {node_id}: content {cid[:16]}... not in cache")
continue
try:
# Run analysis for the specific feature
features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze(
input_hash=cid,
features=features,
input_path=Path(path),
)
# Store result keyed by node_id so plan can reference it
analysis_results[node_id] = result
# Also store by cid for compatibility
analysis_results[cid] = result
logger.info(f"Analysis {node_id}: feature={feature}, tempo={result.tempo}")
except Exception as e:
logger.warning(f"Analysis failed for {node_id}: {e}")
logger.info(f"Completed {len(analysis_results)} analysis results")
# Phase 3: Generate plan
logger.info("Phase 3: Generating execution plan...")
# Use the S-expression planner if available
try:
from artdag.sexp.planner import create_plan
plan = create_plan(compiled, inputs=input_hashes)
except ImportError:
# Fall back to legacy planner
if RecipePlanner is None:
raise ImportError("No planner available")
recipe = Recipe.from_dict(compiled.to_dict())
planner = RecipePlanner(use_tree_reduction=True)
plan = planner.plan(
recipe=recipe,
input_hashes=input_hashes,
analysis=analysis_results,
)
logger.info(f"Generated plan with {len(plan.steps)} steps")
# Save plan as S-expression through cache manager (goes to IPFS)
import tempfile
plan_content = plan.to_sexp_string() if hasattr(plan, 'to_sexp_string') else plan.to_json()
plan_suffix = ".sexp" if hasattr(plan, 'to_sexp_string') else ".json"
with tempfile.NamedTemporaryFile(delete=False, suffix=plan_suffix, mode="w") as tmp:
tmp.write(plan_content)
tmp_path = Path(tmp.name)
# Store in cache (content-addressed, auto-pins to IPFS)
# Plan is just another node output - no special treatment needed
cached, plan_ipfs_cid = cache_mgr.put(tmp_path, node_type="plan", move=True)
plan_cache_id = plan_ipfs_cid or cached.cid # Prefer IPFS CID
logger.info(f"Plan cached: cid={plan_cache_id}, ipfs={plan_ipfs_cid}")
# Phase 4: Execute
logger.info("Phase 4: Executing plan...")
result = run_plan(plan.to_json(), run_id=run_id)
return {
"status": result.get("status"),
"run_id": run_id,
"recipe": compiled.name,
"plan_id": plan.plan_id,
"plan_cache_id": plan_cache_id,
"plan_ipfs_cid": plan_ipfs_cid,
"output_path": result.get("output_path"),
"output_cache_id": result.get("output_cache_id"),
"output_ipfs_cid": result.get("output_ipfs_cid"),
"analysis_count": len(analysis_results),
"total_steps": len(plan.steps),
"cached": result.get("cached", 0),
"executed": result.get("executed", 0),
"error": result.get("error"),
}
@app.task(bind=True, name='tasks.generate_plan')
def generate_plan(
self,
recipe_sexp: str,
input_hashes: Dict[str, str],
) -> dict:
"""
Generate an execution plan without executing it.
Useful for:
- Previewing what will be executed
- Checking cache status
- Debugging recipe issues
Args:
recipe_sexp: Recipe S-expression content
input_hashes: Mapping from input name to content hash
Returns:
Dict with plan details
"""
try:
from artdag.sexp import compile_string
except ImportError:
raise ImportError("artdag.sexp not available")
if Analyzer is None:
raise ImportError("artdag.analysis not available")
cache_mgr = get_cache_manager()
# Parse recipe
try:
compiled = compile_string(recipe_sexp)
except Exception as e:
return {"status": "failed", "error": f"Recipe parse error: {e}"}
# Extract and run analysis nodes from recipe
analysis_nodes = _extract_analysis_from_recipe(compiled)
ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
analysis_results = {}
for analysis_node in analysis_nodes:
input_ref = analysis_node["input_ref"]
feature = analysis_node["feature"]
node_id = analysis_node["node_id"]
cid = input_hashes.get(input_ref)
if not cid:
continue
path = cache_mgr.get_by_cid(cid)
if path:
try:
features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze(
input_hash=cid,
features=features,
input_path=Path(path),
)
analysis_results[node_id] = result
analysis_results[cid] = result
except Exception as e:
logger.warning(f"Analysis failed for {node_id}: {e}")
# Generate plan
try:
from artdag.sexp.planner import create_plan
plan = create_plan(compiled, inputs=input_hashes)
except ImportError:
if RecipePlanner is None:
raise ImportError("No planner available")
recipe = Recipe.from_dict(compiled.to_dict())
planner = RecipePlanner(use_tree_reduction=True)
plan = planner.plan(
recipe=recipe,
input_hashes=input_hashes,
analysis=analysis_results,
)
# Check cache status for each step
steps_status = []
for step in plan.steps:
cached = cache_mgr.has_content(step.cache_id)
steps_status.append({
"step_id": step.step_id,
"node_type": step.node_type,
"cache_id": step.cache_id,
"level": step.level,
"cached": cached,
})
cached_count = sum(1 for s in steps_status if s["cached"])
return {
"status": "planned",
"recipe": compiled.name,
"plan_id": plan.plan_id,
"total_steps": len(plan.steps),
"cached_steps": cached_count,
"pending_steps": len(plan.steps) - cached_count,
"steps": steps_status,
"plan_json": plan.to_json(),
}

303
tasks/streaming.py Normal file
View File

@@ -0,0 +1,303 @@
"""
Streaming video rendering task.
Executes S-expression recipes for frame-by-frame video processing.
Supports CID and friendly name references for assets.
"""
import hashlib
import logging
import os
import sys
import tempfile
from pathlib import Path
from typing import Dict, Optional
from celery import current_task
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from celery_app import app
from cache_manager import get_cache_manager
logger = logging.getLogger(__name__)
def resolve_asset(ref: str, actor_id: Optional[str] = None) -> Optional[Path]:
"""
Resolve an asset reference (CID or friendly name) to a file path.
Args:
ref: CID or friendly name (e.g., "my-video" or "QmXyz...")
actor_id: User ID for friendly name resolution
Returns:
Path to the asset file, or None if not found
"""
cache_mgr = get_cache_manager()
# Try as direct CID first
path = cache_mgr.get_by_cid(ref)
if path and path.exists():
logger.info(f"Resolved {ref[:16]}... as CID to {path}")
return path
# Try as friendly name if actor_id provided
if actor_id:
import asyncio
from database import resolve_friendly_name
try:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
cid = loop.run_until_complete(resolve_friendly_name(actor_id, ref))
finally:
loop.close()
if cid:
path = cache_mgr.get_by_cid(cid)
if path and path.exists():
logger.info(f"Resolved '{ref}' via friendly name to {path}")
return path
except Exception as e:
logger.warning(f"Failed to resolve friendly name '{ref}': {e}")
logger.warning(f"Could not resolve asset reference: {ref}")
return None
class CIDVideoSource:
"""
Video source that resolves CIDs to file paths.
Wraps the streaming VideoSource to work with cached assets.
"""
def __init__(self, cid: str, fps: float = 30, actor_id: Optional[str] = None):
self.cid = cid
self.fps = fps
self.actor_id = actor_id
self._source = None
def _ensure_source(self):
if self._source is None:
path = resolve_asset(self.cid, self.actor_id)
if not path:
raise ValueError(f"Could not resolve video source: {self.cid}")
from streaming.stream_sexp_generic import VideoSource
# Import from primitives where VideoSource is defined
from sexp_effects.primitive_libs.streaming import VideoSource
self._source = VideoSource(str(path), self.fps)
def read_at(self, t: float):
self._ensure_source()
return self._source.read_at(t)
def read(self):
self._ensure_source()
return self._source.read()
@property
def size(self):
self._ensure_source()
return self._source.size
@property
def duration(self):
self._ensure_source()
return self._source._duration
def close(self):
if self._source:
self._source.close()
class CIDAudioAnalyzer:
"""
Audio analyzer that resolves CIDs to file paths.
"""
def __init__(self, cid: str, actor_id: Optional[str] = None):
self.cid = cid
self.actor_id = actor_id
self._analyzer = None
def _ensure_analyzer(self):
if self._analyzer is None:
path = resolve_asset(self.cid, self.actor_id)
if not path:
raise ValueError(f"Could not resolve audio source: {self.cid}")
from sexp_effects.primitive_libs.streaming import AudioAnalyzer
self._analyzer = AudioAnalyzer(str(path))
def get_energy(self, t: float) -> float:
self._ensure_analyzer()
return self._analyzer.get_energy(t)
def get_beat(self, t: float) -> bool:
self._ensure_analyzer()
return self._analyzer.get_beat(t)
def get_beat_count(self, t: float) -> int:
self._ensure_analyzer()
return self._analyzer.get_beat_count(t)
@property
def duration(self):
self._ensure_analyzer()
return self._analyzer.duration
def create_cid_primitives(actor_id: Optional[str] = None):
"""
Create CID-aware primitive functions.
Returns dict of primitives that resolve CIDs before creating sources.
"""
def prim_make_video_source_cid(cid: str, fps: float = 30):
return CIDVideoSource(cid, fps, actor_id)
def prim_make_audio_analyzer_cid(cid: str):
return CIDAudioAnalyzer(cid, actor_id)
return {
'streaming:make-video-source': prim_make_video_source_cid,
'streaming:make-audio-analyzer': prim_make_audio_analyzer_cid,
}
@app.task(bind=True, name='tasks.run_stream')
def run_stream(
self,
recipe_sexp: str,
output_name: str = "output.mp4",
duration: Optional[float] = None,
fps: Optional[float] = None,
actor_id: Optional[str] = None,
sources_sexp: Optional[str] = None,
audio_sexp: Optional[str] = None,
) -> dict:
"""
Execute a streaming S-expression recipe.
Args:
recipe_sexp: The recipe S-expression content
output_name: Name for the output file
duration: Optional duration override (seconds)
fps: Optional FPS override
actor_id: User ID for friendly name resolution
sources_sexp: Optional sources config S-expression
audio_sexp: Optional audio config S-expression
Returns:
Dict with output_cid, output_path, and status
"""
task_id = self.request.id
logger.info(f"Starting stream task {task_id}")
self.update_state(state='INITIALIZING', meta={'progress': 0})
# Get the app directory for primitive/effect paths
app_dir = Path(__file__).parent.parent # celery/
sexp_effects_dir = app_dir / "sexp_effects"
effects_dir = app_dir / "effects"
templates_dir = app_dir / "templates"
# Create temp directory for work
work_dir = Path(tempfile.mkdtemp(prefix="stream_"))
recipe_path = work_dir / "recipe.sexp"
output_path = work_dir / output_name
# Create symlinks to effect directories so relative paths work
(work_dir / "sexp_effects").symlink_to(sexp_effects_dir)
(work_dir / "effects").symlink_to(effects_dir)
(work_dir / "templates").symlink_to(templates_dir)
try:
# Write recipe to temp file
recipe_path.write_text(recipe_sexp)
# Write optional config files
sources_path = None
if sources_sexp:
sources_path = work_dir / "sources.sexp"
sources_path.write_text(sources_sexp)
audio_path = None
if audio_sexp:
audio_path = work_dir / "audio.sexp"
audio_path.write_text(audio_sexp)
self.update_state(state='RENDERING', meta={'progress': 5})
# Import the streaming interpreter
from streaming.stream_sexp_generic import StreamInterpreter
# Create interpreter
interp = StreamInterpreter(str(recipe_path))
# Set primitive library directory explicitly
interp.primitive_lib_dir = sexp_effects_dir / "primitive_libs"
if fps:
interp.config['fps'] = fps
if sources_path:
interp.sources_config = sources_path
if audio_path:
interp.audio_config = audio_path
# Override primitives with CID-aware versions
cid_prims = create_cid_primitives(actor_id)
interp.primitives.update(cid_prims)
# Run rendering to file
logger.info(f"Rendering to {output_path}")
interp.run(duration=duration, output=str(output_path))
self.update_state(state='CACHING', meta={'progress': 90})
# Store output in cache
if output_path.exists():
cache_mgr = get_cache_manager()
cached_file, ipfs_cid = cache_mgr.put(
source_path=output_path,
node_type="STREAM_OUTPUT",
node_id=f"stream_{task_id}",
)
logger.info(f"Stream output cached: CID={cached_file.cid}, IPFS={ipfs_cid}")
return {
"status": "completed",
"task_id": task_id,
"output_cid": cached_file.cid,
"ipfs_cid": ipfs_cid,
"output_path": str(cached_file.path),
}
else:
return {
"status": "failed",
"task_id": task_id,
"error": "Output file not created",
}
except Exception as e:
logger.error(f"Stream task {task_id} failed: {e}")
import traceback
traceback.print_exc()
return {
"status": "failed",
"task_id": task_id,
"error": str(e),
}
finally:
# Cleanup temp directory
import shutil
if work_dir.exists():
shutil.rmtree(work_dir, ignore_errors=True)