2188 lines
78 KiB
Python
2188 lines
78 KiB
Python
"""
|
|
Execution plan generation from S-expression recipes.
|
|
|
|
The planner:
|
|
1. Takes a compiled recipe + input content hashes
|
|
2. Runs analyzers to get concrete data (beat times, etc.)
|
|
3. Expands dynamic nodes (SLICE_ON) into primitive operations
|
|
4. Resolves all registry references to content hashes
|
|
5. Generates an execution plan with pre-computed cache IDs
|
|
|
|
Plans are S-expressions with all references resolved to hashes,
|
|
ready for distribution to Celery workers.
|
|
"""
|
|
|
|
import hashlib
|
|
import importlib.util
|
|
import json
|
|
import sys
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Callable
|
|
|
|
from .parser import Symbol, Keyword, Binding, serialize
|
|
from .compiler import CompiledRecipe
|
|
|
|
|
|
# Node types that can be collapsed into a single FFmpeg filter chain
|
|
COLLAPSIBLE_TYPES = {"EFFECT", "SEGMENT"}
|
|
|
|
# Node types that are boundaries (sources, merges, or special processing)
|
|
BOUNDARY_TYPES = {"SOURCE", "SEQUENCE", "MUX", "ANALYZE", "SCAN", "LIST"}
|
|
|
|
# Node types that need expansion during planning
|
|
EXPANDABLE_TYPES = {"SLICE_ON", "CONSTRUCT"}
|
|
|
|
|
|
def _load_module(module_path: Path, module_name: str = "module"):
|
|
"""Load a Python module from file path."""
|
|
spec = importlib.util.spec_from_file_location(module_name, module_path)
|
|
module = importlib.util.module_from_spec(spec)
|
|
spec.loader.exec_module(module)
|
|
return module
|
|
|
|
|
|
def _run_analyzer(
|
|
analyzer_path: Path,
|
|
input_path: Path,
|
|
params: Dict[str, Any],
|
|
) -> Dict[str, Any]:
|
|
"""Run an analyzer module and return results."""
|
|
analyzer = _load_module(analyzer_path, "analyzer")
|
|
return analyzer.analyze(input_path, params)
|
|
|
|
|
|
def _pre_execute_segment(
|
|
node: Dict,
|
|
input_path: Path,
|
|
work_dir: Path,
|
|
) -> Path:
|
|
"""
|
|
Pre-execute a SEGMENT node during planning.
|
|
|
|
This is needed when ANALYZE depends on a SEGMENT output.
|
|
Returns path to the segmented file.
|
|
"""
|
|
import subprocess
|
|
import tempfile
|
|
|
|
config = node.get("config", {})
|
|
start = config.get("start", 0)
|
|
duration = config.get("duration")
|
|
end = config.get("end")
|
|
|
|
# Detect if input is audio-only
|
|
suffix = input_path.suffix.lower()
|
|
is_audio = suffix in ('.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a')
|
|
|
|
if is_audio:
|
|
output_ext = ".m4a" # Use m4a for aac codec
|
|
else:
|
|
output_ext = ".mp4"
|
|
|
|
output_path = work_dir / f"segment_{node['id'][:16]}{output_ext}"
|
|
|
|
cmd = ["ffmpeg", "-y", "-i", str(input_path)]
|
|
if start:
|
|
cmd.extend(["-ss", str(start)])
|
|
if duration:
|
|
cmd.extend(["-t", str(duration)])
|
|
elif end:
|
|
cmd.extend(["-t", str(end - start)])
|
|
|
|
if is_audio:
|
|
cmd.extend(["-c:a", "aac", str(output_path)])
|
|
else:
|
|
cmd.extend(["-c:v", "libx264", "-preset", "fast", "-crf", "18",
|
|
"-c:a", "aac", str(output_path)])
|
|
|
|
subprocess.run(cmd, check=True, capture_output=True)
|
|
return output_path
|
|
|
|
|
|
def _serialize_for_hash(obj) -> str:
|
|
"""Serialize any value to canonical S-expression string for hashing."""
|
|
from .parser import Lambda
|
|
|
|
if obj is None:
|
|
return "nil"
|
|
if isinstance(obj, bool):
|
|
return "true" if obj else "false"
|
|
if isinstance(obj, (int, float)):
|
|
return str(obj)
|
|
if isinstance(obj, str):
|
|
escaped = obj.replace('\\', '\\\\').replace('"', '\\"')
|
|
return f'"{escaped}"'
|
|
if isinstance(obj, Symbol):
|
|
return obj.name
|
|
if isinstance(obj, Keyword):
|
|
return f":{obj.name}"
|
|
if isinstance(obj, Lambda):
|
|
params = " ".join(obj.params)
|
|
body = _serialize_for_hash(obj.body)
|
|
return f"(fn [{params}] {body})"
|
|
if isinstance(obj, Binding):
|
|
# analysis_ref can be a string, node ID, or dict - serialize it properly
|
|
if isinstance(obj.analysis_ref, str):
|
|
ref_str = f'"{obj.analysis_ref}"'
|
|
else:
|
|
ref_str = _serialize_for_hash(obj.analysis_ref)
|
|
return f"(bind {ref_str} :range [{obj.range_min} {obj.range_max}])"
|
|
if isinstance(obj, dict):
|
|
items = []
|
|
for k, v in sorted(obj.items()):
|
|
items.append(f":{k} {_serialize_for_hash(v)}")
|
|
return "{" + " ".join(items) + "}"
|
|
if isinstance(obj, list):
|
|
items = [_serialize_for_hash(x) for x in obj]
|
|
return "(" + " ".join(items) + ")"
|
|
return str(obj)
|
|
|
|
|
|
def _stable_hash(data: Any, cluster_key: str = None) -> str:
|
|
"""Create stable SHA3-256 hash from data using S-expression serialization."""
|
|
if cluster_key:
|
|
data = {"_cluster_key": cluster_key, "_data": data}
|
|
sexp_str = _serialize_for_hash(data)
|
|
return hashlib.sha3_256(sexp_str.encode()).hexdigest()
|
|
|
|
|
|
@dataclass
|
|
class PlanStep:
|
|
"""A step in the execution plan."""
|
|
step_id: str
|
|
node_type: str
|
|
config: Dict[str, Any]
|
|
inputs: List[str] # List of input step_ids
|
|
cache_id: str
|
|
level: int = 0
|
|
stage: Optional[str] = None # Stage this step belongs to
|
|
|
|
def to_sexp(self) -> List:
|
|
"""Convert to S-expression."""
|
|
sexp = [Symbol("step"), self.step_id]
|
|
|
|
# Add cache-id
|
|
sexp.extend([Keyword("cache-id"), self.cache_id])
|
|
|
|
# Add level if > 0
|
|
if self.level > 0:
|
|
sexp.extend([Keyword("level"), self.level])
|
|
|
|
# Add stage info if present
|
|
if self.stage:
|
|
sexp.extend([Keyword("stage"), self.stage])
|
|
|
|
# Add the node expression
|
|
node_sexp = [Symbol(self.node_type.lower())]
|
|
|
|
# Add config as keywords
|
|
for key, value in self.config.items():
|
|
# Convert Binding to sexp form
|
|
if isinstance(value, Binding):
|
|
value = [Symbol("bind"), value.analysis_ref,
|
|
Keyword("range"), [value.range_min, value.range_max]]
|
|
node_sexp.extend([Keyword(key), value])
|
|
|
|
# Add inputs if any
|
|
if self.inputs:
|
|
node_sexp.extend([Keyword("inputs"), self.inputs])
|
|
|
|
sexp.append(node_sexp)
|
|
return sexp
|
|
|
|
|
|
@dataclass
|
|
class StagePlan:
|
|
"""A stage in the execution plan."""
|
|
stage_name: str
|
|
steps: List[PlanStep]
|
|
requires: List[str] # Names of required stages
|
|
output_bindings: Dict[str, str] # binding_name -> cache_id of output
|
|
level: int = 0 # Stage level for parallel execution
|
|
|
|
|
|
@dataclass
|
|
class ExecutionPlanSexp:
|
|
"""Execution plan as S-expression."""
|
|
plan_id: str
|
|
steps: List[PlanStep]
|
|
output_step_id: str
|
|
source_hash: str = "" # CID of recipe source
|
|
params: Dict[str, Any] = field(default_factory=dict) # Resolved parameter values
|
|
params_hash: str = "" # Hash of params for quick comparison
|
|
inputs: Dict[str, str] = field(default_factory=dict) # name -> hash
|
|
analysis: Dict[str, Dict] = field(default_factory=dict) # name -> {times, values}
|
|
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
stage_plans: List[StagePlan] = field(default_factory=list) # Stage-level plans
|
|
stage_order: List[str] = field(default_factory=list) # Topologically sorted stage names
|
|
stage_levels: Dict[str, int] = field(default_factory=dict) # stage_name -> level
|
|
effects_registry: Dict[str, Dict] = field(default_factory=dict) # effect_name -> {path, cid, ...}
|
|
minimal_primitives: bool = False # If True, interpreter uses only core primitives
|
|
|
|
def to_sexp(self) -> List:
|
|
"""Convert entire plan to S-expression."""
|
|
sexp = [Symbol("plan")]
|
|
|
|
# Metadata - purely content-addressed
|
|
sexp.extend([Keyword("id"), self.plan_id])
|
|
sexp.extend([Keyword("source-cid"), self.source_hash]) # CID of recipe source
|
|
|
|
# Parameters
|
|
if self.params:
|
|
sexp.extend([Keyword("params-hash"), self.params_hash])
|
|
params_sexp = [Symbol("params")]
|
|
for name, value in self.params.items():
|
|
params_sexp.append([Symbol(name), value])
|
|
sexp.append(params_sexp)
|
|
|
|
# Input bindings
|
|
if self.inputs:
|
|
inputs_sexp = [Symbol("inputs")]
|
|
for name, hash_val in self.inputs.items():
|
|
inputs_sexp.append([Symbol(name), hash_val])
|
|
sexp.append(inputs_sexp)
|
|
|
|
# Analysis data (for effect parameter bindings)
|
|
if self.analysis:
|
|
analysis_sexp = [Symbol("analysis")]
|
|
for name, data in self.analysis.items():
|
|
track_sexp = [Symbol(name)]
|
|
if isinstance(data, dict) and "_cache_id" in data:
|
|
track_sexp.extend([Keyword("cache-id"), data["_cache_id"]])
|
|
else:
|
|
if "times" in data:
|
|
track_sexp.extend([Keyword("times"), data["times"]])
|
|
if "values" in data:
|
|
track_sexp.extend([Keyword("values"), data["values"]])
|
|
analysis_sexp.append(track_sexp)
|
|
sexp.append(analysis_sexp)
|
|
|
|
# Stage information
|
|
if self.stage_plans:
|
|
stages_sexp = [Symbol("stages")]
|
|
for stage_plan in self.stage_plans:
|
|
stage_sexp = [
|
|
Keyword("name"), stage_plan.stage_name,
|
|
Keyword("level"), stage_plan.level,
|
|
]
|
|
if stage_plan.requires:
|
|
stage_sexp.extend([Keyword("requires"), stage_plan.requires])
|
|
if stage_plan.output_bindings:
|
|
outputs_sexp = []
|
|
for name, cache_id in stage_plan.output_bindings.items():
|
|
outputs_sexp.append([Symbol(name), Keyword("cache-id"), cache_id])
|
|
stage_sexp.extend([Keyword("outputs"), outputs_sexp])
|
|
stages_sexp.append(stage_sexp)
|
|
sexp.append(stages_sexp)
|
|
|
|
# Effects registry - for loading explicitly declared effects
|
|
if self.effects_registry:
|
|
registry_sexp = [Symbol("effects-registry")]
|
|
for name, info in self.effects_registry.items():
|
|
effect_sexp = [Symbol(name)]
|
|
if info.get("path"):
|
|
effect_sexp.extend([Keyword("path"), info["path"]])
|
|
if info.get("cid"):
|
|
effect_sexp.extend([Keyword("cid"), info["cid"]])
|
|
registry_sexp.append(effect_sexp)
|
|
sexp.append(registry_sexp)
|
|
|
|
# Minimal primitives flag
|
|
if self.minimal_primitives:
|
|
sexp.extend([Keyword("minimal-primitives"), True])
|
|
|
|
# Steps
|
|
for step in self.steps:
|
|
sexp.append(step.to_sexp())
|
|
|
|
# Output reference
|
|
sexp.extend([Keyword("output"), self.output_step_id])
|
|
|
|
return sexp
|
|
|
|
def to_string(self, pretty: bool = True) -> str:
|
|
"""Serialize plan to S-expression string."""
|
|
return serialize(self.to_sexp(), pretty=pretty)
|
|
|
|
|
|
def _expand_list_inputs(nodes: List[Dict]) -> List[Dict]:
|
|
"""
|
|
Expand LIST node inputs in SEQUENCE nodes.
|
|
|
|
When a SEQUENCE has a LIST as input, replace it with all the LIST's inputs.
|
|
LIST nodes that are referenced by non-SEQUENCE nodes (e.g., EFFECT chains)
|
|
are promoted to SEQUENCE nodes so they produce a concatenated output.
|
|
Unreferenced LIST nodes are removed.
|
|
"""
|
|
nodes_by_id = {n["id"]: n for n in nodes}
|
|
list_nodes = {n["id"]: n for n in nodes if n["type"] == "LIST"}
|
|
|
|
if not list_nodes:
|
|
return nodes
|
|
|
|
# Determine which LIST nodes are referenced by SEQUENCE vs other node types
|
|
list_consumed_by_seq = set()
|
|
list_referenced_by_other = set()
|
|
for node in nodes:
|
|
if node["type"] == "LIST":
|
|
continue
|
|
for inp in node.get("inputs", []):
|
|
if inp in list_nodes:
|
|
if node["type"] == "SEQUENCE":
|
|
list_consumed_by_seq.add(inp)
|
|
else:
|
|
list_referenced_by_other.add(inp)
|
|
|
|
result = []
|
|
for node in nodes:
|
|
if node["type"] == "LIST":
|
|
if node["id"] in list_referenced_by_other:
|
|
# Promote to SEQUENCE — non-SEQUENCE nodes reference this LIST
|
|
result.append({
|
|
"id": node["id"],
|
|
"type": "SEQUENCE",
|
|
"config": node.get("config", {}),
|
|
"inputs": node.get("inputs", []),
|
|
})
|
|
# Otherwise skip (consumed by SEQUENCE expansion or unreferenced)
|
|
continue
|
|
|
|
if node["type"] == "SEQUENCE":
|
|
# Expand any LIST inputs
|
|
new_inputs = []
|
|
for inp in node.get("inputs", []):
|
|
if inp in list_nodes:
|
|
# Replace LIST with its contents
|
|
new_inputs.extend(list_nodes[inp].get("inputs", []))
|
|
else:
|
|
new_inputs.append(inp)
|
|
|
|
# Create updated node
|
|
result.append({
|
|
**node,
|
|
"inputs": new_inputs,
|
|
})
|
|
else:
|
|
result.append(node)
|
|
|
|
return result
|
|
|
|
|
|
def _collapse_effect_chains(nodes: List[Dict], registry: Dict = None) -> List[Dict]:
|
|
"""
|
|
Collapse sequential effect chains into single COMPOUND nodes.
|
|
|
|
A chain is a sequence of single-input collapsible nodes where:
|
|
- Each node has exactly one input
|
|
- No node in the chain is referenced by multiple other nodes
|
|
- The chain ends at a boundary or multi-ref node
|
|
- No node in the chain is marked as temporal
|
|
|
|
Effects can declare :temporal true to prevent collapsing (e.g., reverse).
|
|
|
|
Returns a new node list with chains collapsed.
|
|
"""
|
|
if not nodes:
|
|
return nodes
|
|
|
|
registry = registry or {}
|
|
nodes_by_id = {n["id"]: n for n in nodes}
|
|
|
|
# Build reference counts: how many nodes reference each node as input
|
|
ref_count = {n["id"]: 0 for n in nodes}
|
|
for node in nodes:
|
|
for inp in node.get("inputs", []):
|
|
if inp in ref_count:
|
|
ref_count[inp] += 1
|
|
|
|
# Track which nodes are consumed by chains
|
|
consumed = set()
|
|
compound_nodes = []
|
|
|
|
def is_temporal(node: Dict) -> bool:
|
|
"""Check if a node is temporal (needs complete input)."""
|
|
config = node.get("config", {})
|
|
# Check node-level temporal flag
|
|
if config.get("temporal"):
|
|
return True
|
|
# Check effect registry for temporal flag
|
|
if node["type"] == "EFFECT":
|
|
effect_name = config.get("effect")
|
|
if effect_name:
|
|
effect_meta = registry.get("effects", {}).get(effect_name, {})
|
|
if effect_meta.get("temporal"):
|
|
return True
|
|
return False
|
|
|
|
def is_collapsible(node_id: str) -> bool:
|
|
"""Check if a node can be part of a chain."""
|
|
if node_id in consumed:
|
|
return False
|
|
node = nodes_by_id.get(node_id)
|
|
if not node:
|
|
return False
|
|
if node["type"] not in COLLAPSIBLE_TYPES:
|
|
return False
|
|
# Temporal effects can't be collapsed
|
|
if is_temporal(node):
|
|
return False
|
|
# Effects CAN be collapsed if they have an FFmpeg mapping
|
|
# Only fall back to Python interpreter if no mapping exists
|
|
config = node.get("config", {})
|
|
if node["type"] == "EFFECT":
|
|
effect_name = config.get("effect")
|
|
# Import here to avoid circular imports
|
|
from .ffmpeg_compiler import FFmpegCompiler
|
|
compiler = FFmpegCompiler()
|
|
if compiler.get_mapping(effect_name):
|
|
return True # Has FFmpeg mapping, can collapse
|
|
elif config.get("effect_path"):
|
|
return False # No FFmpeg mapping, has Python path, can't collapse
|
|
return True
|
|
|
|
def is_chain_boundary(node_id: str) -> bool:
|
|
"""Check if a node is a chain boundary (can't be collapsed into)."""
|
|
node = nodes_by_id.get(node_id)
|
|
if not node:
|
|
return True # Unknown node is a boundary
|
|
# Boundary if: it's a boundary type, or referenced by multiple nodes
|
|
return node["type"] in BOUNDARY_TYPES or ref_count.get(node_id, 0) > 1
|
|
|
|
def collect_chain(start_id: str) -> List[str]:
|
|
"""Collect a chain of collapsible nodes starting from start_id."""
|
|
chain = [start_id]
|
|
current = start_id
|
|
|
|
while True:
|
|
node = nodes_by_id[current]
|
|
inputs = node.get("inputs", [])
|
|
|
|
# Must have exactly one input
|
|
if len(inputs) != 1:
|
|
break
|
|
|
|
next_id = inputs[0]
|
|
|
|
# Stop if next is a boundary or already consumed
|
|
if is_chain_boundary(next_id) or not is_collapsible(next_id):
|
|
break
|
|
|
|
# Stop if next is referenced by others besides current
|
|
if ref_count.get(next_id, 0) > 1:
|
|
break
|
|
|
|
chain.append(next_id)
|
|
current = next_id
|
|
|
|
return chain
|
|
|
|
# Process nodes in reverse order (from outputs toward inputs)
|
|
# This ensures we find complete chains starting from their end
|
|
# First, topologically sort to get dependency order
|
|
sorted_ids = []
|
|
visited = set()
|
|
|
|
def topo_visit(node_id: str):
|
|
if node_id in visited:
|
|
return
|
|
visited.add(node_id)
|
|
node = nodes_by_id.get(node_id)
|
|
if node:
|
|
for inp in node.get("inputs", []):
|
|
topo_visit(inp)
|
|
sorted_ids.append(node_id)
|
|
|
|
for node in nodes:
|
|
topo_visit(node["id"])
|
|
|
|
# Process in reverse topological order (outputs first)
|
|
result_nodes = []
|
|
|
|
for node_id in reversed(sorted_ids):
|
|
node = nodes_by_id[node_id]
|
|
|
|
if node_id in consumed:
|
|
continue
|
|
|
|
if not is_collapsible(node_id):
|
|
# Keep boundary nodes as-is
|
|
result_nodes.append(node)
|
|
continue
|
|
|
|
# Check if this node is the start of a chain (output end)
|
|
# A node is a chain start if it's collapsible and either:
|
|
# - Referenced by a boundary node
|
|
# - Referenced by multiple nodes
|
|
# - Is the output node
|
|
# For now, collect chain going backwards from this node
|
|
|
|
chain = collect_chain(node_id)
|
|
|
|
if len(chain) == 1:
|
|
# Single node, no collapse needed
|
|
result_nodes.append(node)
|
|
continue
|
|
|
|
# Collapse the chain into a COMPOUND node
|
|
# Chain is [end, ..., start] order (backwards from output)
|
|
# The compound node:
|
|
# - Has the same ID as the chain end (for reference stability)
|
|
# - Takes input from what the chain start originally took
|
|
# - Has a filter_chain config with all the nodes in order
|
|
|
|
chain_start = chain[-1] # First to execute
|
|
chain_end = chain[0] # Last to execute
|
|
|
|
start_node = nodes_by_id[chain_start]
|
|
end_node = nodes_by_id[chain_end]
|
|
|
|
# Build filter chain config (in execution order: start to end)
|
|
filter_chain = []
|
|
for chain_node_id in reversed(chain):
|
|
chain_node = nodes_by_id[chain_node_id]
|
|
filter_chain.append({
|
|
"type": chain_node["type"],
|
|
"config": chain_node.get("config", {}),
|
|
})
|
|
|
|
compound_node = {
|
|
"id": chain_end, # Keep the end ID for reference stability
|
|
"type": "COMPOUND",
|
|
"config": {
|
|
"filter_chain": filter_chain,
|
|
# Include effects registry so executor can load only declared effects
|
|
"effects_registry": registry.get("effects", {}),
|
|
},
|
|
"inputs": start_node.get("inputs", []),
|
|
"name": f"compound_{len(filter_chain)}_effects",
|
|
}
|
|
|
|
result_nodes.append(compound_node)
|
|
|
|
# Mark all chain nodes as consumed
|
|
for chain_node_id in chain:
|
|
consumed.add(chain_node_id)
|
|
|
|
return result_nodes
|
|
|
|
|
|
def _expand_slice_on(
|
|
node: Dict,
|
|
analysis_data: Dict[str, Any],
|
|
registry: Dict,
|
|
sources: Dict[str, str] = None,
|
|
cluster_key: str = None,
|
|
encoding: Dict = None,
|
|
named_analysis: Dict = None,
|
|
) -> List[Dict]:
|
|
"""
|
|
Expand a SLICE_ON node into primitive SEGMENT + EFFECT + SEQUENCE nodes.
|
|
|
|
Supports two modes:
|
|
1. Legacy: :effect and :pattern parameters
|
|
2. Lambda: :fn parameter with reducer function
|
|
|
|
Lambda syntax:
|
|
(slice-on analysis
|
|
:times times
|
|
:init 0
|
|
:fn (lambda [acc i start end]
|
|
{:source video
|
|
:effects (if (odd? i) [invert] [])
|
|
:acc (inc acc)}))
|
|
|
|
When all beats produce composition-mode results (layers + compositor)
|
|
with the same layer structure, consecutive beats are automatically merged
|
|
into fewer compositions with time-varying parameter bindings. This can
|
|
reduce thousands of nodes to a handful.
|
|
|
|
Args:
|
|
node: The SLICE_ON node to expand
|
|
analysis_data: Analysis results containing times array
|
|
registry: Recipe registry with effect definitions
|
|
sources: Map of source names to node IDs
|
|
cluster_key: Optional cluster key for hashing
|
|
named_analysis: Mutable dict to inject synthetic analysis tracks into
|
|
|
|
Returns:
|
|
List of expanded nodes (segments, effects, sequence)
|
|
"""
|
|
from .evaluator import evaluate, EvalError
|
|
from .parser import Lambda, Symbol
|
|
|
|
config = node.get("config", {})
|
|
node_inputs = node.get("inputs", [])
|
|
sources = sources or {}
|
|
|
|
# Extract times
|
|
times_path = config.get("times_path", "times")
|
|
times = analysis_data
|
|
for key in times_path.split("."):
|
|
times = times[key]
|
|
|
|
if not times:
|
|
raise ValueError(f"No times found at path '{times_path}' in analysis")
|
|
|
|
# Default video input (first input after analysis)
|
|
default_video = node_inputs[0] if node_inputs else None
|
|
|
|
expanded_nodes = []
|
|
sequence_inputs = []
|
|
base_id = node["id"][:8]
|
|
|
|
# Check for lambda-based reducer
|
|
reducer_fn = config.get("fn")
|
|
|
|
if isinstance(reducer_fn, Lambda):
|
|
# Lambda mode - evaluate function for each slice
|
|
acc = config.get("init", 0)
|
|
slice_times = list(zip([0] + times[:-1], times))
|
|
|
|
# Frame-accurate timing calculation
|
|
# Align ALL times to frame boundaries to prevent accumulating drift
|
|
fps = (encoding or {}).get("fps", 30)
|
|
frame_duration = 1.0 / fps
|
|
|
|
# Get total duration from analysis data (beats analyzer includes this)
|
|
# Falls back to config target_duration for backwards compatibility
|
|
total_duration = analysis_data.get("duration") or config.get("target_duration")
|
|
|
|
# Pre-compute frame-aligned cumulative times
|
|
cumulative_frames = [0] # Start at frame 0
|
|
for t in times:
|
|
# Round to nearest frame boundary
|
|
frames = round(t * fps)
|
|
cumulative_frames.append(frames)
|
|
|
|
# If total duration known, ensure last segment extends to it exactly
|
|
if total_duration is not None:
|
|
target_frames = round(total_duration * fps)
|
|
if target_frames > cumulative_frames[-1]:
|
|
cumulative_frames[-1] = target_frames
|
|
|
|
# Pre-compute frame-aligned start times and durations for each slice
|
|
frame_aligned_starts = []
|
|
frame_aligned_durations = []
|
|
for i in range(len(cumulative_frames) - 1):
|
|
start_frames = cumulative_frames[i]
|
|
end_frames = cumulative_frames[i + 1]
|
|
frame_aligned_starts.append(start_frames * frame_duration)
|
|
frame_aligned_durations.append((end_frames - start_frames) * frame_duration)
|
|
|
|
# Phase 1: Evaluate all lambdas upfront
|
|
videos = config.get("videos", [])
|
|
all_results = []
|
|
all_timings = [] # (seg_start, seg_duration) per valid beat
|
|
original_indices = [] # original beat index for each result
|
|
|
|
for i, (start, end) in enumerate(slice_times):
|
|
if start >= end:
|
|
continue
|
|
|
|
# Build environment with sources, effects, and builtins
|
|
env = dict(sources)
|
|
|
|
# Add effect names so they can be referenced as symbols
|
|
for effect_name in registry.get("effects", {}):
|
|
env[effect_name] = effect_name
|
|
|
|
# Make :videos list available to lambda
|
|
if videos:
|
|
env["videos"] = videos
|
|
|
|
env["acc"] = acc
|
|
env["i"] = i
|
|
env["start"] = start
|
|
env["end"] = end
|
|
|
|
# Evaluate the reducer
|
|
result = evaluate([reducer_fn, Symbol("acc"), Symbol("i"),
|
|
Symbol("start"), Symbol("end")], env)
|
|
|
|
if not isinstance(result, dict):
|
|
raise ValueError(f"Reducer must return a dict, got {type(result)}")
|
|
|
|
# Extract accumulator
|
|
acc = result.get("acc", acc)
|
|
|
|
# Segment timing: use frame-aligned values to prevent drift
|
|
# Lambda can override with explicit start/duration/end
|
|
if result.get("start") is not None or result.get("duration") is not None or result.get("end") is not None:
|
|
# Explicit timing from lambda - use as-is
|
|
seg_start = result.get("start", start)
|
|
seg_duration = result.get("duration")
|
|
if seg_duration is None:
|
|
if result.get("end") is not None:
|
|
seg_duration = result["end"] - seg_start
|
|
else:
|
|
seg_duration = end - start
|
|
else:
|
|
# Default: use frame-aligned start and duration to prevent accumulated drift
|
|
seg_start = frame_aligned_starts[i] if i < len(frame_aligned_starts) else start
|
|
seg_duration = frame_aligned_durations[i] if i < len(frame_aligned_durations) else (end - start)
|
|
|
|
all_results.append(result)
|
|
all_timings.append((seg_start, seg_duration))
|
|
original_indices.append(i)
|
|
|
|
# Phase 2: Merge or expand
|
|
all_composition = (
|
|
len(all_results) > 1
|
|
and all("layers" in r for r in all_results)
|
|
and named_analysis is not None
|
|
)
|
|
|
|
if all_composition:
|
|
# All beats are composition mode — try to merge consecutive
|
|
# beats with the same layer structure
|
|
_merge_composition_beats(
|
|
all_results, all_timings, base_id, videos, registry,
|
|
expanded_nodes, sequence_inputs, named_analysis,
|
|
)
|
|
else:
|
|
# Fallback: expand each beat individually
|
|
for idx, result in enumerate(all_results):
|
|
orig_i = original_indices[idx]
|
|
seg_start, seg_duration = all_timings[idx]
|
|
|
|
if "layers" in result:
|
|
# COMPOSITION MODE — multi-source with per-layer effects + compositor
|
|
_expand_composition_beat(
|
|
result, orig_i, base_id, videos, registry,
|
|
seg_start, seg_duration, expanded_nodes, sequence_inputs,
|
|
)
|
|
else:
|
|
# SINGLE-SOURCE MODE (existing behavior)
|
|
source_name = result.get("source")
|
|
effects = result.get("effects", [])
|
|
|
|
# Resolve source to node ID
|
|
if isinstance(source_name, Symbol):
|
|
source_name = source_name.name
|
|
valid_node_ids = set(sources.values())
|
|
if source_name in sources:
|
|
video_input = sources[source_name]
|
|
elif source_name in valid_node_ids:
|
|
video_input = source_name
|
|
else:
|
|
video_input = default_video
|
|
|
|
# Create SEGMENT node
|
|
segment_id = f"{base_id}_seg_{orig_i:04d}"
|
|
segment_node = {
|
|
"id": segment_id,
|
|
"type": "SEGMENT",
|
|
"config": {
|
|
"start": seg_start,
|
|
"duration": seg_duration,
|
|
},
|
|
"inputs": [video_input],
|
|
}
|
|
expanded_nodes.append(segment_node)
|
|
|
|
# Apply effects chain
|
|
current_input = segment_id
|
|
for j, effect in enumerate(effects):
|
|
effect_name, effect_params = _parse_effect_spec(effect)
|
|
if not effect_name:
|
|
continue
|
|
|
|
effect_id = f"{base_id}_fx_{orig_i:04d}_{j}"
|
|
effect_entry = registry.get("effects", {}).get(effect_name, {})
|
|
|
|
effect_config = {
|
|
"effect": effect_name,
|
|
"effect_path": effect_entry.get("path"),
|
|
}
|
|
effect_config.update(effect_params)
|
|
|
|
effect_node = {
|
|
"id": effect_id,
|
|
"type": "EFFECT",
|
|
"config": effect_config,
|
|
"inputs": [current_input],
|
|
}
|
|
expanded_nodes.append(effect_node)
|
|
current_input = effect_id
|
|
|
|
sequence_inputs.append(current_input)
|
|
|
|
else:
|
|
# Legacy mode - :effect and :pattern
|
|
effect_name = config.get("effect")
|
|
effect_path = config.get("effect_path")
|
|
pattern = config.get("pattern", "all")
|
|
video_input = default_video
|
|
|
|
if not video_input:
|
|
raise ValueError("SLICE_ON requires video input")
|
|
|
|
slice_times = list(zip([0] + times[:-1], times))
|
|
|
|
for i, (start, end) in enumerate(slice_times):
|
|
if start >= end:
|
|
continue
|
|
|
|
# Determine if effect should be applied
|
|
apply_effect = False
|
|
if effect_name:
|
|
if pattern == "all":
|
|
apply_effect = True
|
|
elif pattern == "odd":
|
|
apply_effect = (i % 2 == 1)
|
|
elif pattern == "even":
|
|
apply_effect = (i % 2 == 0)
|
|
elif pattern == "alternate":
|
|
apply_effect = (i % 2 == 1)
|
|
|
|
# Create SEGMENT node
|
|
segment_id = f"{base_id}_seg_{i:04d}"
|
|
segment_node = {
|
|
"id": segment_id,
|
|
"type": "SEGMENT",
|
|
"config": {
|
|
"start": start,
|
|
"duration": end - start,
|
|
},
|
|
"inputs": [video_input],
|
|
}
|
|
expanded_nodes.append(segment_node)
|
|
|
|
if apply_effect:
|
|
effect_id = f"{base_id}_fx_{i:04d}"
|
|
effect_config = {"effect": effect_name}
|
|
if effect_path:
|
|
effect_config["effect_path"] = effect_path
|
|
|
|
effect_node = {
|
|
"id": effect_id,
|
|
"type": "EFFECT",
|
|
"config": effect_config,
|
|
"inputs": [segment_id],
|
|
}
|
|
expanded_nodes.append(effect_node)
|
|
sequence_inputs.append(effect_id)
|
|
else:
|
|
sequence_inputs.append(segment_id)
|
|
# Create LIST node to hold all slices (user must explicitly sequence them)
|
|
list_node = {
|
|
"id": node["id"], # Keep original ID for reference stability
|
|
"type": "LIST",
|
|
"config": {},
|
|
"inputs": sequence_inputs,
|
|
}
|
|
expanded_nodes.append(list_node)
|
|
|
|
return expanded_nodes
|
|
|
|
|
|
def _parse_effect_spec(effect):
|
|
"""Parse an effect spec into (name, params) from Symbol, string, or dict."""
|
|
from .parser import Symbol
|
|
|
|
effect_name = None
|
|
effect_params = {}
|
|
|
|
if isinstance(effect, Symbol):
|
|
effect_name = effect.name
|
|
elif isinstance(effect, str):
|
|
effect_name = effect
|
|
elif isinstance(effect, dict):
|
|
effect_name = effect.get("effect")
|
|
if isinstance(effect_name, Symbol):
|
|
effect_name = effect_name.name
|
|
for k, v in effect.items():
|
|
if k != "effect":
|
|
effect_params[k] = v
|
|
|
|
return effect_name, effect_params
|
|
|
|
|
|
def _expand_composition_beat(result, beat_idx, base_id, videos, registry,
|
|
seg_start, seg_duration, expanded_nodes, sequence_inputs):
|
|
"""
|
|
Expand a composition-mode beat into per-layer SEGMENT + EFFECT nodes
|
|
and a single composition EFFECT node.
|
|
|
|
Args:
|
|
result: Lambda result dict with 'layers' and optional 'compose'
|
|
beat_idx: Beat index for ID generation
|
|
base_id: Base ID prefix
|
|
videos: List of video node IDs from :videos config
|
|
registry: Recipe registry with effect definitions
|
|
seg_start: Segment start time
|
|
seg_duration: Segment duration
|
|
expanded_nodes: List to append generated nodes to
|
|
sequence_inputs: List to append final composition node ID to
|
|
"""
|
|
layers = result["layers"]
|
|
compose_spec = result.get("compose", {})
|
|
|
|
layer_outputs = []
|
|
for layer_idx, layer in enumerate(layers):
|
|
# Resolve video: integer index into videos list, or node ID string
|
|
video_ref = layer.get("video")
|
|
if isinstance(video_ref, (int, float)):
|
|
video_input = videos[int(video_ref)]
|
|
else:
|
|
video_input = str(video_ref)
|
|
|
|
# SEGMENT for this layer
|
|
segment_id = f"{base_id}_seg_{beat_idx:04d}_L{layer_idx}"
|
|
expanded_nodes.append({
|
|
"id": segment_id,
|
|
"type": "SEGMENT",
|
|
"config": {"start": seg_start, "duration": seg_duration},
|
|
"inputs": [video_input],
|
|
})
|
|
|
|
# Per-layer EFFECT chain
|
|
current = segment_id
|
|
for fx_idx, effect in enumerate(layer.get("effects", [])):
|
|
effect_name, effect_params = _parse_effect_spec(effect)
|
|
if not effect_name:
|
|
continue
|
|
effect_id = f"{base_id}_fx_{beat_idx:04d}_L{layer_idx}_{fx_idx}"
|
|
effect_entry = registry.get("effects", {}).get(effect_name, {})
|
|
config = {
|
|
"effect": effect_name,
|
|
"effect_path": effect_entry.get("path"),
|
|
}
|
|
config.update(effect_params)
|
|
expanded_nodes.append({
|
|
"id": effect_id,
|
|
"type": "EFFECT",
|
|
"config": config,
|
|
"inputs": [current],
|
|
})
|
|
current = effect_id
|
|
layer_outputs.append(current)
|
|
|
|
# Composition EFFECT node
|
|
compose_name = compose_spec.get("effect", "blend_multi")
|
|
compose_id = f"{base_id}_comp_{beat_idx:04d}"
|
|
compose_entry = registry.get("effects", {}).get(compose_name, {})
|
|
compose_config = {
|
|
"effect": compose_name,
|
|
"effect_path": compose_entry.get("path"),
|
|
"multi_input": True,
|
|
}
|
|
for k, v in compose_spec.items():
|
|
if k != "effect":
|
|
compose_config[k] = v
|
|
|
|
expanded_nodes.append({
|
|
"id": compose_id,
|
|
"type": "EFFECT",
|
|
"config": compose_config,
|
|
"inputs": layer_outputs,
|
|
})
|
|
sequence_inputs.append(compose_id)
|
|
|
|
|
|
def _fingerprint_composition(result):
|
|
"""Create a hashable fingerprint of a composition beat's layer structure.
|
|
|
|
Beats with the same fingerprint have the same video refs, effect names,
|
|
and compositor type — only parameter values differ. Such beats can be
|
|
merged into a single composition with time-varying bindings.
|
|
"""
|
|
layers = result.get("layers", [])
|
|
compose = result.get("compose", {})
|
|
|
|
layer_fps = []
|
|
for layer in layers:
|
|
video_ref = layer.get("video")
|
|
effect_names = tuple(
|
|
_parse_effect_spec(e)[0] for e in layer.get("effects", [])
|
|
)
|
|
layer_fps.append((video_ref, effect_names))
|
|
|
|
compose_name = compose.get("effect", "blend_multi")
|
|
# Include static compose params (excluding list-valued params like weights)
|
|
static_compose = tuple(sorted(
|
|
(k, v) for k, v in compose.items()
|
|
if k not in ("effect", "weights") and isinstance(v, (str, int, float, bool))
|
|
))
|
|
|
|
return (len(layers), tuple(layer_fps), compose_name, static_compose)
|
|
|
|
|
|
def _merge_composition_beats(
|
|
all_results, all_timings, base_id, videos, registry,
|
|
expanded_nodes, sequence_inputs, named_analysis,
|
|
):
|
|
"""Merge consecutive composition beats with the same layer structure.
|
|
|
|
Groups consecutive beats by structural fingerprint. Groups of 2+ beats
|
|
get merged into a single composition with synthetic analysis tracks for
|
|
time-varying parameters. Single beats use standard per-beat expansion.
|
|
"""
|
|
import sys
|
|
|
|
# Compute fingerprints
|
|
fingerprints = [_fingerprint_composition(r) for r in all_results]
|
|
|
|
# Group consecutive beats with the same fingerprint
|
|
groups = [] # list of (start_idx, end_idx_exclusive)
|
|
group_start = 0
|
|
for i in range(1, len(fingerprints)):
|
|
if fingerprints[i] != fingerprints[group_start]:
|
|
groups.append((group_start, i))
|
|
group_start = i
|
|
groups.append((group_start, len(fingerprints)))
|
|
|
|
print(f" Composition merging: {len(all_results)} beats -> {len(groups)} groups", file=sys.stderr)
|
|
|
|
for group_idx, (g_start, g_end) in enumerate(groups):
|
|
group_size = g_end - g_start
|
|
|
|
if group_size == 1:
|
|
# Single beat — use standard expansion
|
|
result = all_results[g_start]
|
|
seg_start, seg_duration = all_timings[g_start]
|
|
_expand_composition_beat(
|
|
result, g_start, base_id, videos, registry,
|
|
seg_start, seg_duration, expanded_nodes, sequence_inputs,
|
|
)
|
|
else:
|
|
# Merge group into one composition with time-varying bindings
|
|
_merge_composition_group(
|
|
all_results, all_timings,
|
|
list(range(g_start, g_end)),
|
|
base_id, group_idx, videos, registry,
|
|
expanded_nodes, sequence_inputs, named_analysis,
|
|
)
|
|
|
|
|
|
def _merge_composition_group(
|
|
all_results, all_timings, group_indices,
|
|
base_id, group_idx, videos, registry,
|
|
expanded_nodes, sequence_inputs, named_analysis,
|
|
):
|
|
"""Merge a group of same-structure composition beats into one composition.
|
|
|
|
Creates:
|
|
- One SEGMENT per layer (spanning full group duration)
|
|
- One EFFECT per layer with time-varying params via synthetic analysis tracks
|
|
- One compositor EFFECT with time-varying weights via synthetic tracks
|
|
"""
|
|
import sys
|
|
|
|
first = all_results[group_indices[0]]
|
|
layers = first["layers"]
|
|
compose_spec = first.get("compose", {})
|
|
num_layers = len(layers)
|
|
|
|
# Group timing
|
|
first_start = all_timings[group_indices[0]][0]
|
|
last_start, last_dur = all_timings[group_indices[-1]]
|
|
group_duration = (last_start + last_dur) - first_start
|
|
|
|
# Beat start times for synthetic tracks (absolute times)
|
|
beat_times = [float(all_timings[i][0]) for i in group_indices]
|
|
|
|
print(f" Group {group_idx}: {len(group_indices)} beats, "
|
|
f"{first_start:.1f}s -> {first_start + group_duration:.1f}s "
|
|
f"({num_layers} layers)", file=sys.stderr)
|
|
|
|
# --- Per-layer segments and effects ---
|
|
layer_outputs = []
|
|
for layer_idx in range(num_layers):
|
|
layer = layers[layer_idx]
|
|
|
|
# Resolve video input
|
|
video_ref = layer.get("video")
|
|
if isinstance(video_ref, (int, float)):
|
|
video_input = videos[int(video_ref)]
|
|
else:
|
|
video_input = str(video_ref)
|
|
|
|
# SEGMENT for this layer (full group duration)
|
|
segment_id = f"{base_id}_seg_G{group_idx:03d}_L{layer_idx}"
|
|
expanded_nodes.append({
|
|
"id": segment_id,
|
|
"type": "SEGMENT",
|
|
"config": {"start": first_start, "duration": group_duration},
|
|
"inputs": [video_input],
|
|
})
|
|
|
|
# Per-layer EFFECT chain
|
|
current = segment_id
|
|
effects = layer.get("effects", [])
|
|
for fx_idx, effect in enumerate(effects):
|
|
effect_name, first_params = _parse_effect_spec(effect)
|
|
if not effect_name:
|
|
continue
|
|
|
|
effect_id = f"{base_id}_fx_G{group_idx:03d}_L{layer_idx}_{fx_idx}"
|
|
effect_entry = registry.get("effects", {}).get(effect_name, {})
|
|
fx_config = {
|
|
"effect": effect_name,
|
|
"effect_path": effect_entry.get("path"),
|
|
}
|
|
|
|
# For each param, check if it varies across beats
|
|
for param_name, first_val in first_params.items():
|
|
values = []
|
|
for bi in group_indices:
|
|
beat_layer = all_results[bi]["layers"][layer_idx]
|
|
beat_effects = beat_layer.get("effects", [])
|
|
if fx_idx < len(beat_effects):
|
|
_, beat_params = _parse_effect_spec(beat_effects[fx_idx])
|
|
values.append(float(beat_params.get(param_name, first_val)))
|
|
else:
|
|
values.append(float(first_val))
|
|
|
|
# Check if all values are identical
|
|
if all(v == values[0] for v in values):
|
|
fx_config[param_name] = values[0]
|
|
else:
|
|
# Create synthetic analysis track
|
|
# Prefix with 'syn_' to ensure valid S-expression symbol
|
|
# (base_id may start with digits, which the parser splits)
|
|
track_name = f"syn_{base_id}_L{layer_idx}_fx{fx_idx}_{param_name}"
|
|
named_analysis[track_name] = {
|
|
"times": beat_times,
|
|
"values": values,
|
|
}
|
|
fx_config[param_name] = {
|
|
"_binding": True,
|
|
"source": track_name,
|
|
"feature": "values",
|
|
"range": [0.0, 1.0], # pass-through
|
|
}
|
|
|
|
expanded_nodes.append({
|
|
"id": effect_id,
|
|
"type": "EFFECT",
|
|
"config": fx_config,
|
|
"inputs": [current],
|
|
})
|
|
current = effect_id
|
|
|
|
layer_outputs.append(current)
|
|
|
|
# --- Compositor ---
|
|
compose_name = compose_spec.get("effect", "blend_multi")
|
|
compose_id = f"{base_id}_comp_G{group_idx:03d}"
|
|
compose_entry = registry.get("effects", {}).get(compose_name, {})
|
|
compose_config = {
|
|
"effect": compose_name,
|
|
"effect_path": compose_entry.get("path"),
|
|
"multi_input": True,
|
|
}
|
|
|
|
for k, v in compose_spec.items():
|
|
if k == "effect":
|
|
continue
|
|
|
|
if isinstance(v, list):
|
|
# List param (e.g., weights) — check each element
|
|
merged_list = []
|
|
for elem_idx in range(len(v)):
|
|
elem_values = []
|
|
for bi in group_indices:
|
|
beat_compose = all_results[bi].get("compose", {})
|
|
beat_v = beat_compose.get(k, v)
|
|
if isinstance(beat_v, list) and elem_idx < len(beat_v):
|
|
elem_values.append(float(beat_v[elem_idx]))
|
|
else:
|
|
elem_values.append(float(v[elem_idx]))
|
|
|
|
if all(ev == elem_values[0] for ev in elem_values):
|
|
merged_list.append(elem_values[0])
|
|
else:
|
|
track_name = f"syn_{base_id}_comp_{k}_{elem_idx}"
|
|
named_analysis[track_name] = {
|
|
"times": beat_times,
|
|
"values": elem_values,
|
|
}
|
|
merged_list.append({
|
|
"_binding": True,
|
|
"source": track_name,
|
|
"feature": "values",
|
|
"range": [0.0, 1.0],
|
|
})
|
|
compose_config[k] = merged_list
|
|
elif isinstance(v, (int, float)):
|
|
# Scalar param — check if it varies
|
|
values = []
|
|
for bi in group_indices:
|
|
beat_compose = all_results[bi].get("compose", {})
|
|
values.append(float(beat_compose.get(k, v)))
|
|
|
|
if all(val == values[0] for val in values):
|
|
compose_config[k] = values[0]
|
|
else:
|
|
track_name = f"syn_{base_id}_comp_{k}"
|
|
named_analysis[track_name] = {
|
|
"times": beat_times,
|
|
"values": values,
|
|
}
|
|
compose_config[k] = {
|
|
"_binding": True,
|
|
"source": track_name,
|
|
"feature": "values",
|
|
"range": [0.0, 1.0],
|
|
}
|
|
else:
|
|
# String or other — keep as-is
|
|
compose_config[k] = v
|
|
|
|
expanded_nodes.append({
|
|
"id": compose_id,
|
|
"type": "EFFECT",
|
|
"config": compose_config,
|
|
"inputs": layer_outputs,
|
|
})
|
|
sequence_inputs.append(compose_id)
|
|
|
|
|
|
def _parse_construct_params(params_list: list) -> tuple:
|
|
"""
|
|
Parse :params block in a construct definition.
|
|
|
|
Syntax:
|
|
(
|
|
(param_name :type string :default "value" :desc "description")
|
|
)
|
|
|
|
Returns:
|
|
(param_names, param_defaults) where param_names is a list of strings
|
|
and param_defaults is a dict of param_name -> default_value
|
|
"""
|
|
param_names = []
|
|
param_defaults = {}
|
|
|
|
for param_def in params_list:
|
|
if not isinstance(param_def, list) or len(param_def) < 1:
|
|
continue
|
|
|
|
# First element is the parameter name
|
|
first = param_def[0]
|
|
if isinstance(first, Symbol):
|
|
param_name = first.name
|
|
elif isinstance(first, str):
|
|
param_name = first
|
|
else:
|
|
continue
|
|
|
|
param_names.append(param_name)
|
|
|
|
# Parse keyword arguments
|
|
default = None
|
|
i = 1
|
|
while i < len(param_def):
|
|
item = param_def[i]
|
|
if isinstance(item, Keyword):
|
|
if i + 1 >= len(param_def):
|
|
break
|
|
kw_value = param_def[i + 1]
|
|
|
|
if item.name == "default":
|
|
default = kw_value
|
|
# We could also parse :type, :range, :choices, :desc here
|
|
i += 2
|
|
else:
|
|
i += 1
|
|
|
|
param_defaults[param_name] = default
|
|
|
|
return param_names, param_defaults
|
|
|
|
|
|
def _expand_construct(
|
|
node: Dict,
|
|
registry: Dict,
|
|
sources: Dict[str, str],
|
|
analysis_data: Dict[str, Dict],
|
|
recipe_dir: Path,
|
|
cluster_key: str = None,
|
|
encoding: Dict = None,
|
|
) -> List[Dict]:
|
|
"""
|
|
Expand a user-defined CONSTRUCT node.
|
|
|
|
Loads the construct definition from .sexp file, evaluates it with
|
|
the provided arguments, and converts the result into segment nodes.
|
|
|
|
Args:
|
|
node: The CONSTRUCT node to expand
|
|
registry: Recipe registry
|
|
sources: Map of source names to node IDs
|
|
analysis_data: Analysis results (analysis_id -> {times, values})
|
|
recipe_dir: Recipe directory for resolving paths
|
|
cluster_key: Optional cluster key for hashing
|
|
encoding: Encoding config
|
|
|
|
Returns:
|
|
List of expanded nodes (segments, effects, list)
|
|
"""
|
|
from .parser import parse_all, Symbol
|
|
from .evaluator import evaluate
|
|
|
|
config = node.get("config", {})
|
|
construct_name = config.get("construct_name")
|
|
construct_path = config.get("construct_path")
|
|
args = config.get("args", [])
|
|
|
|
# Load construct definition
|
|
full_path = recipe_dir / construct_path
|
|
if not full_path.exists():
|
|
raise ValueError(f"Construct file not found: {full_path}")
|
|
|
|
print(f" Loading construct: {construct_name} from {construct_path}", file=sys.stderr)
|
|
|
|
construct_text = full_path.read_text()
|
|
construct_sexp = parse_all(construct_text)
|
|
|
|
# Parse define-construct: (define-construct name "desc" (params...) body)
|
|
if not isinstance(construct_sexp, list):
|
|
construct_sexp = [construct_sexp]
|
|
|
|
# Process imports (effect, construct declarations) in the construct file
|
|
# These extend the registry for this construct's scope
|
|
local_registry = dict(registry) # Copy parent registry
|
|
construct_def = None
|
|
|
|
for expr in construct_sexp:
|
|
if isinstance(expr, list) and expr and isinstance(expr[0], Symbol):
|
|
form_name = expr[0].name
|
|
|
|
if form_name == "effect":
|
|
# (effect name :path "...")
|
|
effect_name = expr[1].name if isinstance(expr[1], Symbol) else expr[1]
|
|
# Parse kwargs
|
|
i = 2
|
|
kwargs = {}
|
|
while i < len(expr):
|
|
if isinstance(expr[i], Keyword):
|
|
kwargs[expr[i].name] = expr[i + 1] if i + 1 < len(expr) else None
|
|
i += 2
|
|
else:
|
|
i += 1
|
|
local_registry.setdefault("effects", {})[effect_name] = {
|
|
"path": kwargs.get("path"),
|
|
"cid": kwargs.get("cid"),
|
|
}
|
|
print(f" Construct imports effect: {effect_name}", file=sys.stderr)
|
|
|
|
elif form_name == "define-construct":
|
|
construct_def = expr
|
|
|
|
if not construct_def:
|
|
raise ValueError(f"No define-construct found in {construct_path}")
|
|
|
|
# Use local_registry instead of registry from here
|
|
registry = local_registry
|
|
|
|
# Parse define-construct - requires :params syntax:
|
|
# (define-construct name
|
|
# :params (
|
|
# (param1 :type string :default "value" :desc "description")
|
|
# )
|
|
# body)
|
|
#
|
|
# Legacy syntax (define-construct name "desc" (param1 param2) body) is not supported.
|
|
def_name = construct_def[1].name if isinstance(construct_def[1], Symbol) else construct_def[1]
|
|
|
|
params = [] # List of param names
|
|
param_defaults = {} # param_name -> default value
|
|
body = None
|
|
found_params = False
|
|
|
|
idx = 2
|
|
while idx < len(construct_def):
|
|
item = construct_def[idx]
|
|
if isinstance(item, Keyword) and item.name == "params":
|
|
# :params syntax
|
|
if idx + 1 >= len(construct_def):
|
|
raise ValueError(f"Construct '{def_name}': Missing params list after :params keyword")
|
|
params_list = construct_def[idx + 1]
|
|
params, param_defaults = _parse_construct_params(params_list)
|
|
found_params = True
|
|
idx += 2
|
|
elif isinstance(item, Keyword):
|
|
# Skip other keywords (like :desc)
|
|
idx += 2
|
|
elif isinstance(item, str):
|
|
# Skip description strings (but warn about legacy format)
|
|
print(f" Warning: Description strings in define-construct are deprecated", file=sys.stderr)
|
|
idx += 1
|
|
elif body is None:
|
|
# First non-keyword, non-string item is the body
|
|
if isinstance(item, list) and item:
|
|
first_elem = item[0]
|
|
# Check for legacy params syntax and reject it
|
|
if isinstance(first_elem, Symbol) and first_elem.name not in ("let", "let*", "if", "when", "do", "begin", "->", "map", "filter", "fn", "reduce", "nth"):
|
|
# Could be legacy params if all items are just symbols
|
|
if all(isinstance(p, Symbol) for p in item):
|
|
raise ValueError(
|
|
f"Construct '{def_name}': Legacy parameter syntax (param1 param2) is not supported. "
|
|
f"Use :params block instead."
|
|
)
|
|
body = item
|
|
idx += 1
|
|
else:
|
|
idx += 1
|
|
|
|
if body is None:
|
|
raise ValueError(f"No body found in define-construct {def_name}")
|
|
|
|
# Build environment with sources and analysis data
|
|
env = dict(sources)
|
|
|
|
# Add bindings from compiler (video-a, video-b, etc.)
|
|
if "bindings" in config:
|
|
env.update(config["bindings"])
|
|
|
|
# Add effect names so they can be referenced as symbols
|
|
for effect_name in registry.get("effects", {}):
|
|
env[effect_name] = effect_name
|
|
|
|
# Map analysis node IDs to their data with :times and :values
|
|
for analysis_id, data in analysis_data.items():
|
|
# Find the name this analysis was bound to
|
|
for name, node_id in sources.items():
|
|
if node_id == analysis_id or name.endswith("-data"):
|
|
env[name] = data
|
|
env[analysis_id] = data
|
|
|
|
# Apply param defaults first (for :params syntax)
|
|
for param_name, default_value in param_defaults.items():
|
|
if default_value is not None:
|
|
env[param_name] = default_value
|
|
|
|
# Bind positional args to params (overrides defaults)
|
|
param_names = [p.name if isinstance(p, Symbol) else p for p in params]
|
|
for i, param in enumerate(param_names):
|
|
if i < len(args):
|
|
arg = args[i]
|
|
# Resolve node IDs to their data if it's analysis
|
|
if isinstance(arg, str) and arg in analysis_data:
|
|
env[param] = analysis_data[arg]
|
|
else:
|
|
env[param] = arg
|
|
|
|
# Helper to resolve node IDs to analysis data recursively
|
|
def resolve_value(val):
|
|
"""Resolve node IDs and symbols in a value, including inside dicts/lists."""
|
|
if isinstance(val, str) and val in analysis_data:
|
|
return analysis_data[val]
|
|
elif isinstance(val, str) and val in env:
|
|
return env[val]
|
|
elif isinstance(val, Symbol):
|
|
if val.name in env:
|
|
return env[val.name]
|
|
return val
|
|
elif isinstance(val, dict):
|
|
return {k: resolve_value(v) for k, v in val.items()}
|
|
elif isinstance(val, list):
|
|
return [resolve_value(v) for v in val]
|
|
return val
|
|
|
|
# Validate and bind keyword arguments from the config (excluding internal keys)
|
|
# These may be S-expressions that need evaluation (e.g., lambdas)
|
|
# or Symbols that need resolution from bindings
|
|
internal_keys = {"construct_name", "construct_path", "args", "bindings"}
|
|
known_params = set(param_names) | set(param_defaults.keys())
|
|
for key, value in config.items():
|
|
if key not in internal_keys:
|
|
# Convert key to valid identifier (replace - with _) for checking
|
|
param_key = key.replace("-", "_")
|
|
if param_key not in known_params:
|
|
raise ValueError(
|
|
f"Construct '{def_name}': Unknown parameter '{key}'. "
|
|
f"Valid parameters are: {', '.join(sorted(known_params)) if known_params else '(none)'}"
|
|
)
|
|
# Evaluate if it's an expression (list starting with Symbol)
|
|
if isinstance(value, list) and value and isinstance(value[0], Symbol):
|
|
env[param_key] = evaluate(value, env)
|
|
elif isinstance(value, Symbol):
|
|
# Resolve Symbol from env/bindings, then resolve any node IDs in the value
|
|
if value.name in env:
|
|
env[param_key] = resolve_value(env[value.name])
|
|
else:
|
|
raise ValueError(f"Undefined symbol in construct arg: {value.name}")
|
|
else:
|
|
# Resolve node IDs inside dicts/lists
|
|
env[param_key] = resolve_value(value)
|
|
|
|
# Evaluate construct body
|
|
print(f" Evaluating construct with params: {param_names}", file=sys.stderr)
|
|
segments = evaluate(body, env)
|
|
|
|
if not isinstance(segments, list):
|
|
raise ValueError(f"Construct must return a list of segments, got {type(segments)}")
|
|
|
|
print(f" Construct produced {len(segments)} segments", file=sys.stderr)
|
|
|
|
# Convert segment descriptors to plan nodes
|
|
expanded_nodes = []
|
|
sequence_inputs = []
|
|
base_id = node["id"][:8]
|
|
|
|
for i, seg in enumerate(segments):
|
|
if not isinstance(seg, dict):
|
|
continue
|
|
|
|
source_ref = seg.get("source")
|
|
start = seg.get("start", 0)
|
|
print(f" DEBUG segment {i}: source={str(source_ref)[:20]}... start={start}", file=sys.stderr)
|
|
end = seg.get("end")
|
|
duration = seg.get("duration") or (end - start if end else 1.0)
|
|
effects = seg.get("effects", [])
|
|
|
|
# Resolve source reference to node ID
|
|
source_id = sources.get(source_ref, source_ref) if isinstance(source_ref, str) else source_ref
|
|
|
|
# Create segment node
|
|
segment_id = f"{base_id}_seg_{i:04d}"
|
|
segment_node = {
|
|
"id": segment_id,
|
|
"type": "SEGMENT",
|
|
"config": {
|
|
"start": start,
|
|
"duration": duration,
|
|
},
|
|
"inputs": [source_id] if source_id else [],
|
|
}
|
|
expanded_nodes.append(segment_node)
|
|
|
|
# Add effects if specified
|
|
if effects:
|
|
prev_id = segment_id
|
|
for j, eff in enumerate(effects):
|
|
effect_name = eff.get("effect") if isinstance(eff, dict) else eff
|
|
effect_id = f"{base_id}_fx_{i:04d}_{j:02d}"
|
|
# Look up effect_path from registry (prevents collapsing Python effects)
|
|
effect_entry = registry.get("effects", {}).get(effect_name, {})
|
|
effect_config = {
|
|
"effect": effect_name,
|
|
**{k: v for k, v in (eff.items() if isinstance(eff, dict) else []) if k != "effect"},
|
|
}
|
|
if effect_entry.get("path"):
|
|
effect_config["effect_path"] = effect_entry["path"]
|
|
effect_node = {
|
|
"id": effect_id,
|
|
"type": "EFFECT",
|
|
"config": effect_config,
|
|
"inputs": [prev_id],
|
|
}
|
|
expanded_nodes.append(effect_node)
|
|
prev_id = effect_id
|
|
sequence_inputs.append(prev_id)
|
|
else:
|
|
sequence_inputs.append(segment_id)
|
|
|
|
# Create LIST node
|
|
list_node = {
|
|
"id": node["id"],
|
|
"type": "LIST",
|
|
"config": {},
|
|
"inputs": sequence_inputs,
|
|
}
|
|
expanded_nodes.append(list_node)
|
|
|
|
return expanded_nodes
|
|
|
|
|
|
def _expand_nodes(
|
|
nodes: List[Dict],
|
|
registry: Dict,
|
|
recipe_dir: Path,
|
|
source_paths: Dict[str, Path],
|
|
work_dir: Path = None,
|
|
cluster_key: str = None,
|
|
on_analysis: Callable[[str, Dict], None] = None,
|
|
encoding: Dict = None,
|
|
pre_analysis: Dict[str, Dict] = None,
|
|
) -> List[Dict]:
|
|
"""
|
|
Expand dynamic nodes (SLICE_ON) by running analyzers.
|
|
|
|
Processes nodes in dependency order:
|
|
1. SOURCE nodes: resolve file paths
|
|
2. SEGMENT nodes: pre-execute if needed for analysis
|
|
3. ANALYZE nodes: run analyzers (or use pre_analysis), store results
|
|
4. SLICE_ON nodes: expand using analysis results
|
|
|
|
Args:
|
|
nodes: List of compiled nodes
|
|
registry: Recipe registry
|
|
recipe_dir: Directory for resolving relative paths
|
|
source_paths: Resolved source paths (id -> path)
|
|
work_dir: Working directory for temporary files (created if None)
|
|
cluster_key: Optional cluster key
|
|
on_analysis: Callback when analysis completes (node_id, results)
|
|
pre_analysis: Pre-computed analysis data (name -> results)
|
|
|
|
Returns:
|
|
Tuple of (expanded_nodes, named_analysis) where:
|
|
- expanded_nodes: List with SLICE_ON replaced by primitives
|
|
- named_analysis: Dict of analyzer_name -> {times, values}
|
|
"""
|
|
import tempfile
|
|
|
|
nodes_by_id = {n["id"]: n for n in nodes}
|
|
sorted_ids = _topological_sort(nodes)
|
|
|
|
# Create work directory if needed
|
|
if work_dir is None:
|
|
work_dir = Path(tempfile.mkdtemp(prefix="artdag_plan_"))
|
|
|
|
# Track outputs and analysis results
|
|
outputs = {} # node_id -> output path or analysis data
|
|
analysis_results = {} # node_id -> analysis dict
|
|
named_analysis = {} # analyzer_name -> analysis dict (for effect bindings)
|
|
pre_executed = set() # nodes pre-executed during planning
|
|
expanded = []
|
|
expanded_ids = set()
|
|
|
|
for node_id in sorted_ids:
|
|
node = nodes_by_id[node_id]
|
|
node_type = node["type"]
|
|
|
|
if node_type == "SOURCE":
|
|
# Resolve source path
|
|
config = node.get("config", {})
|
|
if "path" in config:
|
|
path = recipe_dir / config["path"]
|
|
outputs[node_id] = path.resolve()
|
|
source_paths[node_id] = outputs[node_id]
|
|
expanded.append(node)
|
|
expanded_ids.add(node_id)
|
|
|
|
elif node_type == "SEGMENT":
|
|
# Check if this segment's input is resolved
|
|
inputs = node.get("inputs", [])
|
|
if inputs and inputs[0] in outputs:
|
|
input_path = outputs[inputs[0]]
|
|
if isinstance(input_path, Path):
|
|
# Skip pre-execution if config contains unresolved bindings
|
|
seg_config = node.get("config", {})
|
|
has_binding = any(
|
|
isinstance(v, Binding) or (isinstance(v, dict) and v.get("_binding"))
|
|
for v in [seg_config.get("start"), seg_config.get("duration"), seg_config.get("end")]
|
|
if v is not None
|
|
)
|
|
if not has_binding:
|
|
# Pre-execute segment to get output path
|
|
# This is needed if ANALYZE depends on this segment
|
|
import sys
|
|
print(f" Pre-executing segment: {node_id[:16]}...", file=sys.stderr)
|
|
output_path = _pre_execute_segment(node, input_path, work_dir)
|
|
outputs[node_id] = output_path
|
|
pre_executed.add(node_id)
|
|
expanded.append(node)
|
|
expanded_ids.add(node_id)
|
|
|
|
elif node_type == "ANALYZE":
|
|
# Get or run analysis
|
|
config = node.get("config", {})
|
|
analysis_name = node.get("name") or config.get("analyzer")
|
|
|
|
# Check for pre-computed analysis first
|
|
if pre_analysis and analysis_name and analysis_name in pre_analysis:
|
|
import sys
|
|
print(f" Using pre-computed analysis: {analysis_name}", file=sys.stderr)
|
|
results = pre_analysis[analysis_name]
|
|
else:
|
|
# Run analyzer to get concrete data
|
|
analyzer_path = config.get("analyzer_path")
|
|
node_inputs = node.get("inputs", [])
|
|
|
|
if not node_inputs:
|
|
raise ValueError(f"ANALYZE node {node_id} has no inputs")
|
|
|
|
# Get input path - could be SOURCE or pre-executed SEGMENT
|
|
input_id = node_inputs[0]
|
|
input_path = outputs.get(input_id)
|
|
|
|
if input_path is None:
|
|
raise ValueError(
|
|
f"ANALYZE input {input_id} not resolved. "
|
|
"Check that input SOURCE or SEGMENT exists."
|
|
)
|
|
|
|
if not isinstance(input_path, Path):
|
|
raise ValueError(
|
|
f"ANALYZE input {input_id} is not a file path: {type(input_path)}"
|
|
)
|
|
|
|
if analyzer_path:
|
|
full_path = recipe_dir / analyzer_path
|
|
params = {k: v for k, v in config.items()
|
|
if k not in ("analyzer", "analyzer_path", "cid")}
|
|
import sys
|
|
print(f" Running analyzer: {config.get('analyzer', 'unknown')}", file=sys.stderr)
|
|
results = _run_analyzer(full_path, input_path, params)
|
|
else:
|
|
raise ValueError(f"ANALYZE node {node_id} missing analyzer_path")
|
|
|
|
analysis_results[node_id] = results
|
|
outputs[node_id] = results
|
|
|
|
# Store by name for effect binding resolution
|
|
if analysis_name:
|
|
named_analysis[analysis_name] = results
|
|
|
|
if on_analysis:
|
|
on_analysis(node_id, results)
|
|
|
|
# Keep ANALYZE node in plan (it produces a JSON artifact)
|
|
expanded.append(node)
|
|
expanded_ids.add(node_id)
|
|
|
|
elif node_type == "SLICE_ON":
|
|
# Expand into primitives using analysis results
|
|
inputs = node.get("inputs", [])
|
|
config = node.get("config", {})
|
|
|
|
# Lambda mode can have just 1 input (analysis), legacy needs 2 (video + analysis)
|
|
has_lambda = "fn" in config
|
|
if has_lambda:
|
|
if len(inputs) < 1:
|
|
raise ValueError(f"SLICE_ON {node_id} requires analysis input")
|
|
analysis_id = inputs[0] # First input is analysis
|
|
else:
|
|
if len(inputs) < 2:
|
|
raise ValueError(f"SLICE_ON {node_id} requires video and analysis inputs")
|
|
analysis_id = inputs[1]
|
|
|
|
if analysis_id not in analysis_results:
|
|
raise ValueError(
|
|
f"SLICE_ON {node_id} analysis input {analysis_id} not found"
|
|
)
|
|
|
|
# Build sources map: name -> node_id
|
|
# This lets the lambda reference videos by name
|
|
sources = {}
|
|
for n in nodes:
|
|
if n.get("name"):
|
|
sources[n["name"]] = n["id"]
|
|
|
|
analysis_data = analysis_results[analysis_id]
|
|
slice_nodes = _expand_slice_on(node, analysis_data, registry, sources, cluster_key, encoding, named_analysis)
|
|
|
|
for sn in slice_nodes:
|
|
if sn["id"] not in expanded_ids:
|
|
expanded.append(sn)
|
|
expanded_ids.add(sn["id"])
|
|
|
|
elif node_type == "CONSTRUCT":
|
|
# Expand user-defined construct
|
|
config = node.get("config", {})
|
|
construct_name = config.get("construct_name")
|
|
construct_path = config.get("construct_path")
|
|
|
|
if not construct_path:
|
|
raise ValueError(f"CONSTRUCT {node_id} missing path")
|
|
|
|
# Build sources map
|
|
sources = {}
|
|
for n in nodes:
|
|
if n.get("name"):
|
|
sources[n["name"]] = n["id"]
|
|
|
|
# Get analysis data if referenced
|
|
inputs = node.get("inputs", [])
|
|
analysis_data = {}
|
|
for inp in inputs:
|
|
if inp in analysis_results:
|
|
analysis_data[inp] = analysis_results[inp]
|
|
|
|
construct_nodes = _expand_construct(
|
|
node, registry, sources, analysis_data, recipe_dir, cluster_key, encoding
|
|
)
|
|
|
|
for cn in construct_nodes:
|
|
if cn["id"] not in expanded_ids:
|
|
expanded.append(cn)
|
|
expanded_ids.add(cn["id"])
|
|
|
|
else:
|
|
# Keep other nodes as-is
|
|
expanded.append(node)
|
|
expanded_ids.add(node_id)
|
|
|
|
return expanded, named_analysis
|
|
|
|
|
|
def create_plan(
|
|
recipe: CompiledRecipe,
|
|
inputs: Dict[str, str] = None,
|
|
recipe_dir: Path = None,
|
|
cluster_key: str = None,
|
|
on_analysis: Callable[[str, Dict], None] = None,
|
|
pre_analysis: Dict[str, Dict] = None,
|
|
) -> ExecutionPlanSexp:
|
|
"""
|
|
Create an execution plan from a compiled recipe.
|
|
|
|
Args:
|
|
recipe: Compiled S-expression recipe
|
|
inputs: Mapping of input names to content hashes
|
|
recipe_dir: Directory for resolving relative paths (required for analyzers)
|
|
cluster_key: Optional cluster key for cache isolation
|
|
on_analysis: Callback when analysis completes (node_id, results)
|
|
pre_analysis: Pre-computed analysis data (name -> results), skips running analyzers
|
|
|
|
Returns:
|
|
ExecutionPlanSexp with all cache IDs computed
|
|
|
|
Example:
|
|
>>> recipe = compile_string('(recipe "test" (-> (source cat) (effect identity)))')
|
|
>>> plan = create_plan(recipe, inputs={}, recipe_dir=Path("."))
|
|
>>> print(plan.to_string())
|
|
"""
|
|
inputs = inputs or {}
|
|
|
|
# Compute source hash as CID (SHA256 of raw bytes) - this IS the content address
|
|
source_hash = hashlib.sha256(recipe.source_text.encode('utf-8')).hexdigest() if recipe.source_text else ""
|
|
|
|
# Compute params hash (use JSON + SHA256 for consistency with cache.py)
|
|
if recipe.resolved_params:
|
|
import json
|
|
params_str = json.dumps(recipe.resolved_params, sort_keys=True, default=str)
|
|
params_hash = hashlib.sha256(params_str.encode()).hexdigest()
|
|
else:
|
|
params_hash = ""
|
|
|
|
# Check if recipe has expandable nodes (SLICE_ON, etc.)
|
|
has_expandable = any(n["type"] in EXPANDABLE_TYPES for n in recipe.nodes)
|
|
named_analysis = {}
|
|
|
|
if has_expandable:
|
|
if recipe_dir is None:
|
|
raise ValueError("recipe_dir required for recipes with SLICE_ON nodes")
|
|
|
|
# Expand dynamic nodes (runs analyzers, expands SLICE_ON)
|
|
source_paths = {}
|
|
expanded_nodes, named_analysis = _expand_nodes(
|
|
recipe.nodes,
|
|
recipe.registry,
|
|
recipe_dir,
|
|
source_paths,
|
|
cluster_key=cluster_key,
|
|
on_analysis=on_analysis,
|
|
encoding=recipe.encoding,
|
|
pre_analysis=pre_analysis,
|
|
)
|
|
# Expand LIST inputs in SEQUENCE nodes
|
|
expanded_nodes = _expand_list_inputs(expanded_nodes)
|
|
# Collapse effect chains after expansion
|
|
collapsed_nodes = _collapse_effect_chains(expanded_nodes, recipe.registry)
|
|
else:
|
|
# No expansion needed
|
|
collapsed_nodes = _collapse_effect_chains(recipe.nodes, recipe.registry)
|
|
|
|
# Build node lookup from collapsed nodes
|
|
nodes_by_id = {node["id"]: node for node in collapsed_nodes}
|
|
|
|
# Topological sort
|
|
sorted_ids = _topological_sort(collapsed_nodes)
|
|
|
|
# Create steps with resolved hashes
|
|
steps = []
|
|
cache_ids = {} # step_id -> cache_id
|
|
|
|
for node_id in sorted_ids:
|
|
node = nodes_by_id[node_id]
|
|
step = _create_step(
|
|
node,
|
|
recipe.registry,
|
|
inputs,
|
|
cache_ids,
|
|
cluster_key,
|
|
)
|
|
steps.append(step)
|
|
cache_ids[node_id] = step.cache_id
|
|
|
|
# Compute levels
|
|
_compute_levels(steps, nodes_by_id)
|
|
|
|
# Handle stage-aware planning if recipe has stages
|
|
stage_plans = []
|
|
stage_order = []
|
|
stage_levels = {}
|
|
|
|
if recipe.stages:
|
|
# Build mapping from node_id to stage
|
|
node_to_stage = {}
|
|
for stage in recipe.stages:
|
|
for node_id in stage.node_ids:
|
|
node_to_stage[node_id] = stage.name
|
|
|
|
# Compute stage levels (for parallel execution)
|
|
stage_levels = _compute_stage_levels(recipe.stages)
|
|
|
|
# Tag each step with stage info
|
|
for step in steps:
|
|
if step.step_id in node_to_stage:
|
|
step.stage = node_to_stage[step.step_id]
|
|
|
|
# Build stage plans
|
|
for stage_name in recipe.stage_order:
|
|
stage = next(s for s in recipe.stages if s.name == stage_name)
|
|
stage_steps = [s for s in steps if s.stage == stage_name]
|
|
|
|
# Build output bindings with cache IDs
|
|
output_cache_ids = {}
|
|
for out_name, node_id in stage.output_bindings.items():
|
|
if node_id in cache_ids:
|
|
output_cache_ids[out_name] = cache_ids[node_id]
|
|
|
|
stage_plans.append(StagePlan(
|
|
stage_name=stage_name,
|
|
steps=stage_steps,
|
|
requires=stage.requires,
|
|
output_bindings=output_cache_ids,
|
|
level=stage_levels.get(stage_name, 0),
|
|
))
|
|
|
|
stage_order = recipe.stage_order
|
|
|
|
# Compute plan ID from source CID + steps
|
|
plan_content = {
|
|
"source_cid": source_hash,
|
|
"steps": [{"id": s.step_id, "cache_id": s.cache_id} for s in steps],
|
|
"inputs": inputs,
|
|
}
|
|
plan_id = _stable_hash(plan_content, cluster_key)
|
|
|
|
return ExecutionPlanSexp(
|
|
plan_id=plan_id,
|
|
source_hash=source_hash,
|
|
params=recipe.resolved_params,
|
|
params_hash=params_hash,
|
|
steps=steps,
|
|
output_step_id=recipe.output_node_id,
|
|
inputs=inputs,
|
|
analysis=named_analysis,
|
|
stage_plans=stage_plans,
|
|
stage_order=stage_order,
|
|
stage_levels=stage_levels,
|
|
effects_registry=recipe.registry.get("effects", {}),
|
|
minimal_primitives=recipe.minimal_primitives,
|
|
)
|
|
|
|
|
|
def _topological_sort(nodes: List[Dict]) -> List[str]:
|
|
"""Sort nodes in dependency order."""
|
|
nodes_by_id = {n["id"]: n for n in nodes}
|
|
visited = set()
|
|
order = []
|
|
|
|
def visit(node_id: str):
|
|
if node_id in visited:
|
|
return
|
|
visited.add(node_id)
|
|
node = nodes_by_id.get(node_id)
|
|
if node:
|
|
for input_id in node.get("inputs", []):
|
|
visit(input_id)
|
|
order.append(node_id)
|
|
|
|
for node in nodes:
|
|
visit(node["id"])
|
|
|
|
return order
|
|
|
|
|
|
def _create_step(
|
|
node: Dict,
|
|
registry: Dict,
|
|
inputs: Dict[str, str],
|
|
cache_ids: Dict[str, str],
|
|
cluster_key: str = None,
|
|
) -> PlanStep:
|
|
"""Create a PlanStep from a node definition."""
|
|
node_id = node["id"]
|
|
node_type = node["type"]
|
|
config = dict(node.get("config", {}))
|
|
node_inputs = node.get("inputs", [])
|
|
|
|
# Resolve registry references
|
|
resolved_config = _resolve_config(config, registry, inputs)
|
|
|
|
# Get input cache IDs (direct graph inputs)
|
|
input_cache_ids = [cache_ids[inp] for inp in node_inputs if inp in cache_ids]
|
|
|
|
# Also include analysis_refs as dependencies (for binding resolution)
|
|
# These are implicit inputs that affect the computation result
|
|
analysis_refs = resolved_config.get("analysis_refs", [])
|
|
analysis_cache_ids = [cache_ids[ref] for ref in analysis_refs if ref in cache_ids]
|
|
|
|
# Compute cache ID including both inputs and analysis dependencies
|
|
cache_content = {
|
|
"node_type": node_type,
|
|
"config": resolved_config,
|
|
"inputs": sorted(input_cache_ids + analysis_cache_ids),
|
|
}
|
|
cache_id = _stable_hash(cache_content, cluster_key)
|
|
|
|
return PlanStep(
|
|
step_id=node_id,
|
|
node_type=node_type,
|
|
config=resolved_config,
|
|
inputs=node_inputs,
|
|
cache_id=cache_id,
|
|
)
|
|
|
|
|
|
def _resolve_config(
|
|
config: Dict,
|
|
registry: Dict,
|
|
inputs: Dict[str, str],
|
|
) -> Dict:
|
|
"""Resolve registry references in config to content hashes."""
|
|
resolved = {}
|
|
|
|
for key, value in config.items():
|
|
if key == "filter_chain" and isinstance(value, list):
|
|
# Resolve each filter in the chain (for COMPOUND nodes)
|
|
resolved_chain = []
|
|
for filter_item in value:
|
|
filter_config = filter_item.get("config", {})
|
|
resolved_filter_config = _resolve_config(filter_config, registry, inputs)
|
|
resolved_chain.append({
|
|
"type": filter_item["type"],
|
|
"config": resolved_filter_config,
|
|
})
|
|
resolved["filter_chain"] = resolved_chain
|
|
|
|
elif key == "asset" and isinstance(value, str):
|
|
# Resolve asset reference - use CID from registry
|
|
if value in registry.get("assets", {}):
|
|
resolved["cid"] = registry["assets"][value]["cid"]
|
|
else:
|
|
resolved["asset"] = value # Keep as-is if not in registry
|
|
|
|
elif key == "effect" and isinstance(value, str):
|
|
# Resolve effect reference - keep name AND add CID/path
|
|
resolved["effect"] = value
|
|
if value in registry.get("effects", {}):
|
|
effect_entry = registry["effects"][value]
|
|
if effect_entry.get("cid"):
|
|
resolved["cid"] = effect_entry["cid"]
|
|
if effect_entry.get("path"):
|
|
resolved["effect_path"] = effect_entry["path"]
|
|
|
|
elif key == "input" and value is True:
|
|
# Variable input - resolve from inputs dict
|
|
input_name = config.get("name", "input")
|
|
if input_name in inputs:
|
|
resolved["hash"] = inputs[input_name]
|
|
else:
|
|
resolved["input"] = True
|
|
resolved["name"] = input_name
|
|
|
|
elif key == "path":
|
|
# Local file path - keep as-is for local execution
|
|
resolved["path"] = value
|
|
|
|
else:
|
|
resolved[key] = value
|
|
|
|
return resolved
|
|
|
|
|
|
def _compute_levels(steps: List[PlanStep], nodes_by_id: Dict) -> None:
|
|
"""Compute dependency levels for steps.
|
|
|
|
Considers both inputs (data dependencies) and analysis_refs (binding dependencies).
|
|
"""
|
|
levels = {}
|
|
|
|
def compute_level(step_id: str) -> int:
|
|
if step_id in levels:
|
|
return levels[step_id]
|
|
|
|
node = nodes_by_id.get(step_id)
|
|
if not node:
|
|
levels[step_id] = 0
|
|
return 0
|
|
|
|
# Collect all dependencies: inputs + analysis_refs
|
|
deps = list(node.get("inputs", []))
|
|
|
|
# Add analysis_refs as dependencies (for bindings to analysis data)
|
|
config = node.get("config", {})
|
|
analysis_refs = config.get("analysis_refs", [])
|
|
deps.extend(analysis_refs)
|
|
|
|
if not deps:
|
|
levels[step_id] = 0
|
|
return 0
|
|
|
|
max_dep = max(compute_level(dep) for dep in deps)
|
|
levels[step_id] = max_dep + 1
|
|
return levels[step_id]
|
|
|
|
for step in steps:
|
|
step.level = compute_level(step.step_id)
|
|
|
|
|
|
def _compute_stage_levels(stages: List) -> Dict[str, int]:
|
|
"""
|
|
Compute stage levels for parallel execution.
|
|
|
|
Stages at the same level have no dependencies between them
|
|
and can run in parallel.
|
|
"""
|
|
from .compiler import CompiledStage
|
|
|
|
levels = {}
|
|
|
|
def compute_level(stage_name: str) -> int:
|
|
if stage_name in levels:
|
|
return levels[stage_name]
|
|
|
|
stage = next((s for s in stages if s.name == stage_name), None)
|
|
if not stage or not stage.requires:
|
|
levels[stage_name] = 0
|
|
return 0
|
|
|
|
max_req = max(compute_level(req) for req in stage.requires)
|
|
levels[stage_name] = max_req + 1
|
|
return levels[stage_name]
|
|
|
|
for stage in stages:
|
|
compute_level(stage.name)
|
|
|
|
return levels
|
|
|
|
|
|
def step_to_task_sexp(step: PlanStep) -> List:
|
|
"""
|
|
Convert a step to a minimal S-expression for Celery task.
|
|
|
|
This is the S-expression that gets sent to a worker.
|
|
The worker hashes this to verify cache_id.
|
|
"""
|
|
sexp = [Symbol(step.node_type.lower())]
|
|
|
|
# Add resolved config
|
|
for key, value in step.config.items():
|
|
sexp.extend([Keyword(key), value])
|
|
|
|
# Add input cache IDs (not step IDs)
|
|
if step.inputs:
|
|
sexp.extend([Keyword("inputs"), step.inputs])
|
|
|
|
return sexp
|
|
|
|
|
|
def task_cache_id(task_sexp: List, cluster_key: str = None) -> str:
|
|
"""
|
|
Compute cache ID from task S-expression.
|
|
|
|
This allows workers to verify they're executing the right task.
|
|
"""
|
|
# Serialize S-expression to canonical form
|
|
canonical = serialize(task_sexp)
|
|
return _stable_hash({"sexp": canonical}, cluster_key)
|