Add COMPOUND node execution and S-expression API

- Execute COMPOUND nodes with combined FFmpeg filter chain
- Handle TRANSFORM, RESIZE, SEGMENT filters in chain
- Migrate orchestrator to S-expression recipes (remove YAML)
- Update API endpoints to use recipe_sexp parameter
- Extract analysis nodes from recipe for dynamic analysis

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gilesb
2026-01-12 01:26:26 +00:00
parent 3599f3779b
commit 8e0b473925
4 changed files with 391 additions and 79 deletions

View File

@@ -11,7 +11,6 @@ import uuid
from datetime import datetime, timezone
from typing import Dict, List, Optional
import yaml
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel
@@ -26,9 +25,8 @@ RUNS_KEY_PREFIX = "artdag:run:"
class PlanRequest(BaseModel):
recipe_yaml: str
recipe_sexp: str
input_hashes: Dict[str, str]
features: List[str] = ["beats", "energy"]
class ExecutePlanRequest(BaseModel):
@@ -37,9 +35,8 @@ class ExecutePlanRequest(BaseModel):
class RecipeRunRequest(BaseModel):
recipe_yaml: str
recipe_sexp: str
input_hashes: Dict[str, str]
features: List[str] = ["beats", "energy"]
def compute_run_id(input_hashes: List[str], recipe: str, recipe_hash: str = None) -> str:
@@ -68,9 +65,8 @@ async def generate_plan_endpoint(
try:
task = generate_plan.delay(
recipe_yaml=request.recipe_yaml,
recipe_sexp=request.recipe_sexp,
input_hashes=request.input_hashes,
features=request.features,
)
# Wait for result (plan generation is usually fast)
@@ -136,15 +132,16 @@ async def run_recipe_endpoint(
Returns immediately with run_id. Poll /api/run/{run_id} for status.
"""
from tasks.orchestrate import run_recipe
from artdag.sexp import compile_string
import database
redis = get_redis_client()
cache = get_cache_manager()
# Parse recipe name
# Parse recipe name from S-expression
try:
recipe_data = yaml.safe_load(request.recipe_yaml)
recipe_name = recipe_data.get("name", "unknown")
compiled = compile_string(request.recipe_sexp)
recipe_name = compiled.name or "unknown"
except Exception:
recipe_name = "unknown"
@@ -152,7 +149,7 @@ async def run_recipe_endpoint(
run_id = compute_run_id(
list(request.input_hashes.values()),
recipe_name,
hashlib.sha3_256(request.recipe_yaml.encode()).hexdigest()
hashlib.sha3_256(request.recipe_sexp.encode()).hexdigest()
)
# Check if already completed
@@ -171,9 +168,8 @@ async def run_recipe_endpoint(
# Submit to Celery
try:
task = run_recipe.delay(
recipe_yaml=request.recipe_yaml,
recipe_sexp=request.recipe_sexp,
input_hashes=request.input_hashes,
features=request.features,
run_id=run_id,
)

View File

@@ -178,6 +178,109 @@ def execute_step(
"item_paths": item_paths,
}
# Handle COMPOUND nodes (collapsed effect chains)
if step.node_type == "COMPOUND":
filter_chain = step.config.get("filter_chain", [])
if not filter_chain:
raise ValueError("COMPOUND step has empty filter_chain")
# Resolve input paths
input_paths = []
for input_step_id in step.input_steps:
input_cache_id = input_cache_ids.get(input_step_id)
if not input_cache_id:
raise ValueError(f"No cache_id for input step: {input_step_id}")
path = cache_mgr.get_by_content_hash(input_cache_id)
if not path:
raise ValueError(f"Input not in cache: {input_cache_id[:16]}...")
input_paths.append(Path(path))
if not input_paths:
raise ValueError("COMPOUND step has no inputs")
# Build FFmpeg filter graph from chain
import subprocess
import tempfile
filters = []
for filter_item in filter_chain:
filter_type = filter_item.get("type", "")
filter_config = filter_item.get("config", {})
if filter_type == "TRANSFORM":
effects = filter_config.get("effects", {})
for eff_name, eff_value in effects.items():
if eff_name == "saturation":
filters.append(f"eq=saturation={eff_value}")
elif eff_name == "brightness":
filters.append(f"eq=brightness={eff_value}")
elif eff_name == "contrast":
filters.append(f"eq=contrast={eff_value}")
elif eff_name == "hue":
filters.append(f"hue=h={eff_value}")
elif filter_type == "RESIZE":
width = filter_config.get("width", -1)
height = filter_config.get("height", -1)
mode = filter_config.get("mode", "fit")
if mode == "fit":
filters.append(f"scale={width}:{height}:force_original_aspect_ratio=decrease")
elif mode == "fill":
filters.append(f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}")
else:
filters.append(f"scale={width}:{height}")
output_dir = Path(tempfile.mkdtemp())
output_path = output_dir / f"compound_{step.cache_id[:16]}.mp4"
cmd = ["ffmpeg", "-y", "-i", str(input_paths[0])]
# Handle segment timing
for filter_item in filter_chain:
if filter_item.get("type") == "SEGMENT":
seg_config = filter_item.get("config", {})
if "start" in seg_config:
cmd.extend(["-ss", str(seg_config["start"])])
if "end" in seg_config:
duration = seg_config["end"] - seg_config.get("start", 0)
cmd.extend(["-t", str(duration)])
elif "duration" in seg_config:
cmd.extend(["-t", str(seg_config["duration"])])
if filters:
cmd.extend(["-vf", ",".join(filters)])
cmd.extend(["-c:v", "libx264", "-c:a", "aac", str(output_path)])
logger.info(f"Running COMPOUND FFmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg failed: {result.stderr}")
cached_file, ipfs_cid = cache_mgr.put(
source_path=output_path,
node_type="COMPOUND",
node_id=step.cache_id,
)
logger.info(f"COMPOUND step {step.step_id} completed with {len(filter_chain)} filters")
complete_task(step.cache_id, worker_id, str(cached_file.path))
import shutil
if output_dir.exists():
shutil.rmtree(output_dir, ignore_errors=True)
return {
"status": "completed",
"step_id": step.step_id,
"cache_id": step.cache_id,
"output_path": str(cached_file.path),
"content_hash": cached_file.content_hash,
"ipfs_cid": ipfs_cid,
"filter_count": len(filter_chain),
}
# Get executor for this node type
try:
node_type = NodeType[step.node_type]

View File

@@ -249,6 +249,127 @@ def execute_step_sexp(
}
raise ValueError(f"No executor for EFFECT and no inputs")
# Handle COMPOUND nodes (collapsed effect chains)
if node_type == "COMPOUND":
filter_chain = config.get("filter_chain", [])
if not filter_chain:
raise ValueError("COMPOUND step has empty filter_chain")
# Get input paths
inputs = config.get("inputs", [])
input_paths = []
for inp in inputs:
inp_cache_id = input_cache_ids.get(inp, inp)
path = cache_mgr.get_by_content_hash(inp_cache_id)
if not path:
raise ValueError(f"Input not found: {inp_cache_id[:16]}...")
input_paths.append(Path(path))
if not input_paths:
raise ValueError("COMPOUND step has no inputs")
# Build FFmpeg filter graph from chain
filters = []
for i, filter_item in enumerate(filter_chain):
filter_type = filter_item.get("type", "")
filter_config = filter_item.get("config", {})
if filter_type == "EFFECT":
# Effect - for now identity-like, can be extended
effect_hash = filter_config.get("hash") or filter_config.get("effect")
# TODO: resolve effect to actual FFmpeg filter
# For now, skip identity-like effects
pass
elif filter_type == "TRANSFORM":
# Transform effects map to FFmpeg filters
effects = filter_config.get("effects", {})
for eff_name, eff_value in effects.items():
if eff_name == "saturation":
filters.append(f"eq=saturation={eff_value}")
elif eff_name == "brightness":
filters.append(f"eq=brightness={eff_value}")
elif eff_name == "contrast":
filters.append(f"eq=contrast={eff_value}")
elif eff_name == "hue":
filters.append(f"hue=h={eff_value}")
elif filter_type == "RESIZE":
width = filter_config.get("width", -1)
height = filter_config.get("height", -1)
mode = filter_config.get("mode", "fit")
if mode == "fit":
filters.append(f"scale={width}:{height}:force_original_aspect_ratio=decrease")
elif mode == "fill":
filters.append(f"scale={width}:{height}:force_original_aspect_ratio=increase,crop={width}:{height}")
else:
filters.append(f"scale={width}:{height}")
elif filter_type == "SEGMENT":
# Segment handled via -ss and -t, not filter
pass
# Create temp output
import tempfile
import subprocess
output_dir = Path(tempfile.mkdtemp())
output_path = output_dir / f"compound_{cache_id[:16]}.mp4"
# Build FFmpeg command
input_path = input_paths[0]
cmd = ["ffmpeg", "-y", "-i", str(input_path)]
# Handle segment timing if present
for filter_item in filter_chain:
if filter_item.get("type") == "SEGMENT":
seg_config = filter_item.get("config", {})
if "start" in seg_config:
cmd.extend(["-ss", str(seg_config["start"])])
if "end" in seg_config:
duration = seg_config["end"] - seg_config.get("start", 0)
cmd.extend(["-t", str(duration)])
elif "duration" in seg_config:
cmd.extend(["-t", str(seg_config["duration"])])
# Add filter graph if any
if filters:
cmd.extend(["-vf", ",".join(filters)])
# Output options
cmd.extend(["-c:v", "libx264", "-c:a", "aac", str(output_path)])
logger.info(f"Running COMPOUND FFmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg failed: {result.stderr}")
# Store in cache
cached_file, ipfs_cid = cache_mgr.put(
source_path=output_path,
node_type="COMPOUND",
node_id=cache_id,
)
logger.info(f"COMPOUND step {step_id} completed with {len(filter_chain)} filters, IPFS CID: {ipfs_cid}")
complete_task(cache_id, worker_id, str(cached_file.path))
# Cleanup temp
if output_dir.exists():
import shutil
shutil.rmtree(output_dir, ignore_errors=True)
return {
"status": "completed",
"step_id": step_id,
"cache_id": cache_id,
"output_path": str(cached_file.path),
"content_hash": cached_file.content_hash,
"ipfs_cid": ipfs_cid,
"filter_count": len(filter_chain),
}
# Get executor for other node types
try:
node_type_enum = NodeType[node_type]

View File

@@ -222,90 +222,159 @@ def run_plan(
}
def _extract_analysis_from_recipe(compiled_recipe) -> List[Dict]:
"""
Extract analysis nodes from a compiled recipe.
Finds all (analyze ...) nodes and returns their configurations.
Analysis nodes are identified by type "ANALYZE" or by having
an "analyze" config key.
"""
analysis_nodes = []
nodes = compiled_recipe.nodes
if isinstance(nodes, dict):
nodes = list(nodes.values())
for node in nodes:
node_type = node.get("type", "").upper()
config = node.get("config", {})
# Check if this is an analysis node
if node_type == "ANALYZE" or config.get("analyze"):
analysis_nodes.append({
"node_id": node.get("id"),
"input_ref": config.get("input") or config.get("source"),
"feature": config.get("feature") or config.get("analyze"),
"config": config,
})
return analysis_nodes
@app.task(bind=True, name='tasks.run_recipe')
def run_recipe(
self,
recipe_yaml: str,
recipe_sexp: str,
input_hashes: Dict[str, str],
features: List[str] = None,
run_id: Optional[str] = None,
) -> dict:
"""
Run a complete recipe through all 3 phases.
Run a complete recipe through all phases.
1. Analyze: Extract features from inputs
2. Plan: Generate execution plan
3. Execute: Run the plan
The recipe S-expression declares what analysis is needed.
Analysis nodes in the recipe are executed first, then their
outputs are used to generate the execution plan.
1. Parse: Compile recipe S-expression
2. Analyze: Run analysis nodes from recipe
3. Plan: Generate execution plan using analysis results
4. Execute: Run the plan
Args:
recipe_yaml: Recipe YAML content
recipe_sexp: Recipe S-expression content
input_hashes: Mapping from input name to content hash
features: Features to extract (default: ["beats", "energy"])
run_id: Optional run ID for tracking
Returns:
Dict with final results
"""
if RecipePlanner is None or Analyzer is None:
raise ImportError("artdag modules not available")
# Import S-expression compiler
try:
from artdag.sexp import compile_string
except ImportError:
raise ImportError("artdag.sexp not available")
if features is None:
features = ["beats", "energy"]
if Analyzer is None:
raise ImportError("artdag.analysis not available")
cache_mgr = get_cache_manager()
logger.info(f"Running recipe with {len(input_hashes)} inputs")
# Phase 1: Analyze
logger.info("Phase 1: Analyzing inputs...")
# Phase 1: Parse recipe
logger.info("Phase 1: Parsing recipe S-expression...")
try:
compiled = compile_string(recipe_sexp)
except Exception as e:
return {"status": "failed", "error": f"Recipe parse error: {e}"}
logger.info(f"Parsed recipe: {compiled.name}")
# Phase 2: Run analysis nodes from recipe
logger.info("Phase 2: Running analysis from recipe...")
analysis_nodes = _extract_analysis_from_recipe(compiled)
logger.info(f"Found {len(analysis_nodes)} analysis nodes in recipe")
ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
analysis_results = {}
for name, content_hash in input_hashes.items():
# Get path from cache
for analysis_node in analysis_nodes:
input_ref = analysis_node["input_ref"]
feature = analysis_node["feature"]
node_id = analysis_node["node_id"]
# Resolve input reference to content hash
content_hash = input_hashes.get(input_ref)
if not content_hash:
logger.warning(f"Analysis node {node_id}: input '{input_ref}' not in input_hashes")
continue
path = cache_mgr.get_by_content_hash(content_hash)
if path:
try:
result = analyzer.analyze(
input_hash=content_hash,
features=features,
input_path=Path(path),
)
analysis_results[content_hash] = result
logger.info(f"Analyzed {name}: tempo={result.tempo}, beats={len(result.beat_times or [])}")
except Exception as e:
logger.warning(f"Analysis failed for {name}: {e}")
else:
logger.warning(f"Input {name} ({content_hash[:16]}...) not in cache")
if not path:
logger.warning(f"Analysis node {node_id}: content {content_hash[:16]}... not in cache")
continue
logger.info(f"Analyzed {len(analysis_results)} inputs")
try:
# Run analysis for the specific feature
features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze(
input_hash=content_hash,
features=features,
input_path=Path(path),
)
# Store result keyed by node_id so plan can reference it
analysis_results[node_id] = result
# Also store by content_hash for compatibility
analysis_results[content_hash] = result
logger.info(f"Analysis {node_id}: feature={feature}, tempo={result.tempo}")
except Exception as e:
logger.warning(f"Analysis failed for {node_id}: {e}")
# Phase 2: Plan
logger.info("Phase 2: Generating execution plan...")
logger.info(f"Completed {len(analysis_results)} analysis results")
recipe = Recipe.from_yaml(recipe_yaml)
planner = RecipePlanner(use_tree_reduction=True)
# Phase 3: Generate plan
logger.info("Phase 3: Generating execution plan...")
plan = planner.plan(
recipe=recipe,
input_hashes=input_hashes,
analysis=analysis_results,
)
# Use the S-expression planner if available
try:
from artdag.sexp.planner import create_plan
plan = create_plan(compiled, inputs=input_hashes)
except ImportError:
# Fall back to legacy planner
if RecipePlanner is None:
raise ImportError("No planner available")
recipe = Recipe.from_dict(compiled.to_dict())
planner = RecipePlanner(use_tree_reduction=True)
plan = planner.plan(
recipe=recipe,
input_hashes=input_hashes,
analysis=analysis_results,
)
logger.info(f"Generated plan with {len(plan.steps)} steps")
# Save plan as S-expression through cache manager (goes to IPFS)
import tempfile
from cache_manager import get_cache_manager
cache_mgr = get_cache_manager()
plan_sexp = plan.to_sexp_string() if hasattr(plan, 'to_sexp_string') else plan.to_json()
plan_content = plan.to_sexp_string() if hasattr(plan, 'to_sexp_string') else plan.to_json()
plan_suffix = ".sexp" if hasattr(plan, 'to_sexp_string') else ".json"
with tempfile.NamedTemporaryFile(delete=False, suffix=plan_suffix, mode="w") as tmp:
tmp.write(plan_sexp)
tmp.write(plan_content)
tmp_path = Path(tmp.name)
# Store in cache (content-addressed, auto-pins to IPFS)
@@ -313,15 +382,15 @@ def run_recipe(
cached, plan_ipfs_cid = cache_mgr.put(tmp_path, node_type="plan", move=True)
logger.info(f"Plan cached: hash={cached.content_hash}, ipfs={plan_ipfs_cid}")
# Phase 3: Execute
logger.info("Phase 3: Executing plan...")
# Phase 4: Execute
logger.info("Phase 4: Executing plan...")
result = run_plan(plan.to_json(), run_id=run_id)
return {
"status": result.get("status"),
"run_id": run_id,
"recipe": recipe.name,
"recipe": compiled.name,
"plan_id": plan.plan_id,
"plan_cache_id": cached.content_hash,
"plan_ipfs_cid": plan_ipfs_cid,
@@ -339,9 +408,8 @@ def run_recipe(
@app.task(bind=True, name='tasks.generate_plan')
def generate_plan(
self,
recipe_yaml: str,
recipe_sexp: str,
input_hashes: Dict[str, str],
features: List[str] = None,
) -> dict:
"""
Generate an execution plan without executing it.
@@ -352,48 +420,72 @@ def generate_plan(
- Debugging recipe issues
Args:
recipe_yaml: Recipe YAML content
recipe_sexp: Recipe S-expression content
input_hashes: Mapping from input name to content hash
features: Features to extract for analysis
Returns:
Dict with plan details
"""
if RecipePlanner is None or Analyzer is None:
raise ImportError("artdag modules not available")
try:
from artdag.sexp import compile_string
except ImportError:
raise ImportError("artdag.sexp not available")
if features is None:
features = ["beats", "energy"]
if Analyzer is None:
raise ImportError("artdag.analysis not available")
cache_mgr = get_cache_manager()
# Analyze inputs
# Parse recipe
try:
compiled = compile_string(recipe_sexp)
except Exception as e:
return {"status": "failed", "error": f"Recipe parse error: {e}"}
# Extract and run analysis nodes from recipe
analysis_nodes = _extract_analysis_from_recipe(compiled)
ANALYSIS_CACHE_DIR.mkdir(parents=True, exist_ok=True)
analyzer = Analyzer(cache_dir=ANALYSIS_CACHE_DIR)
analysis_results = {}
for name, content_hash in input_hashes.items():
for analysis_node in analysis_nodes:
input_ref = analysis_node["input_ref"]
feature = analysis_node["feature"]
node_id = analysis_node["node_id"]
content_hash = input_hashes.get(input_ref)
if not content_hash:
continue
path = cache_mgr.get_by_content_hash(content_hash)
if path:
try:
features = [feature] if feature else ["beats", "energy"]
result = analyzer.analyze(
input_hash=content_hash,
features=features,
input_path=Path(path),
)
analysis_results[node_id] = result
analysis_results[content_hash] = result
except Exception as e:
logger.warning(f"Analysis failed for {name}: {e}")
logger.warning(f"Analysis failed for {node_id}: {e}")
# Generate plan
recipe = Recipe.from_yaml(recipe_yaml)
planner = RecipePlanner(use_tree_reduction=True)
plan = planner.plan(
recipe=recipe,
input_hashes=input_hashes,
analysis=analysis_results,
)
try:
from artdag.sexp.planner import create_plan
plan = create_plan(compiled, inputs=input_hashes)
except ImportError:
if RecipePlanner is None:
raise ImportError("No planner available")
recipe = Recipe.from_dict(compiled.to_dict())
planner = RecipePlanner(use_tree_reduction=True)
plan = planner.plan(
recipe=recipe,
input_hashes=input_hashes,
analysis=analysis_results,
)
# Check cache status for each step
steps_status = []
@@ -411,7 +503,7 @@ def generate_plan(
return {
"status": "planned",
"recipe": recipe.name,
"recipe": compiled.name,
"plan_id": plan.plan_id,
"total_steps": len(plan.steps),
"cached_steps": cached_count,