Files
rose-ash/artdag/nodes/compose.py
giles cc2dcbddd4 Squashed 'core/' content from commit 4957443
git-subtree-dir: core
git-subtree-split: 4957443184ae0eb6323635a90a19acffb3e01d07
2026-02-24 23:09:39 +00:00

549 lines
18 KiB
Python

# primitive/nodes/compose.py
"""
Compose executors: Combine multiple media inputs.
Primitives: SEQUENCE, LAYER, MUX, BLEND
"""
import logging
import shutil
import subprocess
from pathlib import Path
from typing import Any, Dict, List
from ..dag import NodeType
from ..executor import Executor, register_executor
from .encoding import WEB_ENCODING_ARGS_STR, get_web_encoding_args
logger = logging.getLogger(__name__)
def _get_duration(path: Path) -> float:
"""Get media duration in seconds."""
cmd = [
"ffprobe", "-v", "error",
"-show_entries", "format=duration",
"-of", "csv=p=0",
str(path)
]
result = subprocess.run(cmd, capture_output=True, text=True)
return float(result.stdout.strip())
def _get_video_info(path: Path) -> dict:
"""Get video width, height, frame rate, and sample rate."""
cmd = [
"ffprobe", "-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=width,height,r_frame_rate",
"-of", "csv=p=0",
str(path)
]
result = subprocess.run(cmd, capture_output=True, text=True)
parts = result.stdout.strip().split(",")
width = int(parts[0]) if len(parts) > 0 and parts[0] else 1920
height = int(parts[1]) if len(parts) > 1 and parts[1] else 1080
fps_str = parts[2] if len(parts) > 2 else "30/1"
# Parse frame rate (e.g., "30/1" or "30000/1001")
if "/" in fps_str:
num, den = fps_str.split("/")
fps = float(num) / float(den) if float(den) != 0 else 30
else:
fps = float(fps_str) if fps_str else 30
# Get audio sample rate
cmd_audio = [
"ffprobe", "-v", "error",
"-select_streams", "a:0",
"-show_entries", "stream=sample_rate",
"-of", "csv=p=0",
str(path)
]
result_audio = subprocess.run(cmd_audio, capture_output=True, text=True)
sample_rate = int(result_audio.stdout.strip()) if result_audio.stdout.strip() else 44100
return {"width": width, "height": height, "fps": fps, "sample_rate": sample_rate}
@register_executor(NodeType.SEQUENCE)
class SequenceExecutor(Executor):
"""
Concatenate inputs in time order.
Config:
transition: Transition config
type: "cut" | "crossfade" | "fade"
duration: Transition duration in seconds
target_size: How to determine output dimensions when inputs differ
"first": Use first input's dimensions (default)
"last": Use last input's dimensions
"largest": Use largest width and height from all inputs
"explicit": Use width/height config values
width: Target width (when target_size="explicit")
height: Target height (when target_size="explicit")
background: Padding color for letterbox/pillarbox (default: "black")
"""
def execute(
self,
config: Dict[str, Any],
inputs: List[Path],
output_path: Path,
) -> Path:
if len(inputs) < 1:
raise ValueError("SEQUENCE requires at least one input")
if len(inputs) == 1:
output_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(inputs[0], output_path)
return output_path
transition = config.get("transition", {"type": "cut"})
transition_type = transition.get("type", "cut")
transition_duration = transition.get("duration", 0.5)
# Size handling config
target_size = config.get("target_size", "first")
width = config.get("width")
height = config.get("height")
background = config.get("background", "black")
if transition_type == "cut":
return self._concat_cut(inputs, output_path, target_size, width, height, background)
elif transition_type == "crossfade":
return self._concat_crossfade(inputs, output_path, transition_duration)
elif transition_type == "fade":
return self._concat_fade(inputs, output_path, transition_duration)
else:
raise ValueError(f"Unknown transition type: {transition_type}")
def _concat_cut(
self,
inputs: List[Path],
output_path: Path,
target_size: str = "first",
width: int = None,
height: int = None,
background: str = "black",
) -> Path:
"""
Concatenate with scaling/padding to handle different resolutions.
Args:
inputs: Input video paths
output_path: Output path
target_size: How to determine output size:
- "first": Use first input's dimensions (default)
- "last": Use last input's dimensions
- "largest": Use largest dimensions from all inputs
- "explicit": Use width/height params
width: Explicit width (when target_size="explicit")
height: Explicit height (when target_size="explicit")
background: Padding color (default: black)
"""
output_path.parent.mkdir(parents=True, exist_ok=True)
n = len(inputs)
input_args = []
for p in inputs:
input_args.extend(["-i", str(p)])
# Get video info for all inputs
infos = [_get_video_info(p) for p in inputs]
# Determine target dimensions
if target_size == "explicit" and width and height:
target_w, target_h = width, height
elif target_size == "last":
target_w, target_h = infos[-1]["width"], infos[-1]["height"]
elif target_size == "largest":
target_w = max(i["width"] for i in infos)
target_h = max(i["height"] for i in infos)
else: # "first" or default
target_w, target_h = infos[0]["width"], infos[0]["height"]
# Use common frame rate (from first input) and sample rate
target_fps = infos[0]["fps"]
target_sr = max(i["sample_rate"] for i in infos)
# Build filter for each input: scale to fit + pad to target size
filter_parts = []
for i in range(n):
# Scale to fit within target, maintaining aspect ratio, then pad
vf = (
f"[{i}:v]scale={target_w}:{target_h}:force_original_aspect_ratio=decrease,"
f"pad={target_w}:{target_h}:(ow-iw)/2:(oh-ih)/2:color={background},"
f"setsar=1,fps={target_fps:.6f}[v{i}]"
)
# Resample audio to common rate
af = f"[{i}:a]aresample={target_sr}[a{i}]"
filter_parts.append(vf)
filter_parts.append(af)
# Build concat filter
stream_labels = "".join(f"[v{i}][a{i}]" for i in range(n))
filter_parts.append(f"{stream_labels}concat=n={n}:v=1:a=1[outv][outa]")
filter_complex = ";".join(filter_parts)
cmd = [
"ffmpeg", "-y",
*input_args,
"-filter_complex", filter_complex,
"-map", "[outv]",
"-map", "[outa]",
*get_web_encoding_args(),
str(output_path)
]
logger.debug(f"SEQUENCE cut: {n} clips -> {target_w}x{target_h} (web-optimized)")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Concat failed: {result.stderr}")
return output_path
def _concat_crossfade(
self,
inputs: List[Path],
output_path: Path,
duration: float,
) -> Path:
"""Concatenate with crossfade transitions."""
output_path.parent.mkdir(parents=True, exist_ok=True)
durations = [_get_duration(p) for p in inputs]
n = len(inputs)
input_args = " ".join(f"-i {p}" for p in inputs)
# Build xfade filter chain
filter_parts = []
current = "[0:v]"
for i in range(1, n):
offset = sum(durations[:i]) - duration * i
next_input = f"[{i}:v]"
output_label = f"[v{i}]" if i < n - 1 else "[outv]"
filter_parts.append(
f"{current}{next_input}xfade=transition=fade:duration={duration}:offset={offset}{output_label}"
)
current = output_label
# Audio crossfade chain
audio_current = "[0:a]"
for i in range(1, n):
next_input = f"[{i}:a]"
output_label = f"[a{i}]" if i < n - 1 else "[outa]"
filter_parts.append(
f"{audio_current}{next_input}acrossfade=d={duration}{output_label}"
)
audio_current = output_label
filter_complex = ";".join(filter_parts)
cmd = f'ffmpeg -y {input_args} -filter_complex "{filter_complex}" -map [outv] -map [outa] {WEB_ENCODING_ARGS_STR} {output_path}'
logger.debug(f"SEQUENCE crossfade: {n} clips (web-optimized)")
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode != 0:
logger.warning(f"Crossfade failed, falling back to cut: {result.stderr[:200]}")
return self._concat_cut(inputs, output_path)
return output_path
def _concat_fade(
self,
inputs: List[Path],
output_path: Path,
duration: float,
) -> Path:
"""Concatenate with fade out/in transitions."""
output_path.parent.mkdir(parents=True, exist_ok=True)
faded_paths = []
for i, path in enumerate(inputs):
clip_dur = _get_duration(path)
faded_path = output_path.parent / f"_faded_{i}.mkv"
cmd = [
"ffmpeg", "-y",
"-i", str(path),
"-vf", f"fade=in:st=0:d={duration},fade=out:st={clip_dur - duration}:d={duration}",
"-af", f"afade=in:st=0:d={duration},afade=out:st={clip_dur - duration}:d={duration}",
"-c:v", "libx264", "-preset", "ultrafast", "-crf", "18",
"-c:a", "aac",
str(faded_path)
]
subprocess.run(cmd, capture_output=True, check=True)
faded_paths.append(faded_path)
result = self._concat_cut(faded_paths, output_path)
for p in faded_paths:
p.unlink()
return result
@register_executor(NodeType.LAYER)
class LayerExecutor(Executor):
"""
Layer inputs spatially (overlay/composite).
Config:
inputs: List of per-input configs
position: [x, y] offset
opacity: 0.0-1.0
scale: Scale factor
"""
def execute(
self,
config: Dict[str, Any],
inputs: List[Path],
output_path: Path,
) -> Path:
if len(inputs) < 1:
raise ValueError("LAYER requires at least one input")
if len(inputs) == 1:
output_path.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(inputs[0], output_path)
return output_path
input_configs = config.get("inputs", [{}] * len(inputs))
output_path.parent.mkdir(parents=True, exist_ok=True)
input_args = " ".join(f"-i {p}" for p in inputs)
n = len(inputs)
filter_parts = []
current = "[0:v]"
for i in range(1, n):
cfg = input_configs[i] if i < len(input_configs) else {}
x, y = cfg.get("position", [0, 0])
opacity = cfg.get("opacity", 1.0)
scale = cfg.get("scale", 1.0)
scale_label = f"[s{i}]"
if scale != 1.0:
filter_parts.append(f"[{i}:v]scale=iw*{scale}:ih*{scale}{scale_label}")
overlay_input = scale_label
else:
overlay_input = f"[{i}:v]"
output_label = f"[v{i}]" if i < n - 1 else "[outv]"
if opacity < 1.0:
filter_parts.append(
f"{overlay_input}format=rgba,colorchannelmixer=aa={opacity}[a{i}]"
)
overlay_input = f"[a{i}]"
filter_parts.append(
f"{current}{overlay_input}overlay=x={x}:y={y}:format=auto{output_label}"
)
current = output_label
filter_complex = ";".join(filter_parts)
cmd = f'ffmpeg -y {input_args} -filter_complex "{filter_complex}" -map [outv] -map 0:a? {WEB_ENCODING_ARGS_STR} {output_path}'
logger.debug(f"LAYER: {n} inputs (web-optimized)")
result = subprocess.run(cmd, shell=True, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Layer failed: {result.stderr}")
return output_path
@register_executor(NodeType.MUX)
class MuxExecutor(Executor):
"""
Combine video and audio streams.
Config:
video_stream: Index of video input (default: 0)
audio_stream: Index of audio input (default: 1)
shortest: End when shortest stream ends (default: True)
"""
def execute(
self,
config: Dict[str, Any],
inputs: List[Path],
output_path: Path,
) -> Path:
if len(inputs) < 2:
raise ValueError("MUX requires at least 2 inputs (video + audio)")
video_idx = config.get("video_stream", 0)
audio_idx = config.get("audio_stream", 1)
shortest = config.get("shortest", True)
video_path = inputs[video_idx]
audio_path = inputs[audio_idx]
output_path.parent.mkdir(parents=True, exist_ok=True)
cmd = [
"ffmpeg", "-y",
"-i", str(video_path),
"-i", str(audio_path),
"-c:v", "copy",
"-c:a", "aac",
"-map", "0:v:0",
"-map", "1:a:0",
]
if shortest:
cmd.append("-shortest")
cmd.append(str(output_path))
logger.debug(f"MUX: video={video_path.name} + audio={audio_path.name}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Mux failed: {result.stderr}")
return output_path
@register_executor(NodeType.BLEND)
class BlendExecutor(Executor):
"""
Blend two inputs using a blend mode.
Config:
mode: Blend mode (multiply, screen, overlay, add, etc.)
opacity: 0.0-1.0 for second input
"""
BLEND_MODES = {
"multiply": "multiply",
"screen": "screen",
"overlay": "overlay",
"add": "addition",
"subtract": "subtract",
"average": "average",
"difference": "difference",
"lighten": "lighten",
"darken": "darken",
}
def execute(
self,
config: Dict[str, Any],
inputs: List[Path],
output_path: Path,
) -> Path:
if len(inputs) != 2:
raise ValueError("BLEND requires exactly 2 inputs")
mode = config.get("mode", "overlay")
opacity = config.get("opacity", 0.5)
if mode not in self.BLEND_MODES:
raise ValueError(f"Unknown blend mode: {mode}")
output_path.parent.mkdir(parents=True, exist_ok=True)
blend_mode = self.BLEND_MODES[mode]
if opacity < 1.0:
filter_complex = (
f"[1:v]format=rgba,colorchannelmixer=aa={opacity}[b];"
f"[0:v][b]blend=all_mode={blend_mode}"
)
else:
filter_complex = f"[0:v][1:v]blend=all_mode={blend_mode}"
cmd = [
"ffmpeg", "-y",
"-i", str(inputs[0]),
"-i", str(inputs[1]),
"-filter_complex", filter_complex,
"-map", "0:a?",
*get_web_encoding_args(),
str(output_path)
]
logger.debug(f"BLEND: {mode} (opacity={opacity}) (web-optimized)")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Blend failed: {result.stderr}")
return output_path
@register_executor(NodeType.AUDIO_MIX)
class AudioMixExecutor(Executor):
"""
Mix multiple audio streams.
Config:
gains: List of gain values per input (0.0-2.0, default 1.0)
normalize: Normalize output to prevent clipping (default True)
"""
def execute(
self,
config: Dict[str, Any],
inputs: List[Path],
output_path: Path,
) -> Path:
if len(inputs) < 2:
raise ValueError("AUDIO_MIX requires at least 2 inputs")
gains = config.get("gains", [1.0] * len(inputs))
normalize = config.get("normalize", True)
# Pad gains list if too short
while len(gains) < len(inputs):
gains.append(1.0)
output_path.parent.mkdir(parents=True, exist_ok=True)
# Build filter: apply volume to each input, then mix
filter_parts = []
mix_inputs = []
for i, gain in enumerate(gains[:len(inputs)]):
if gain != 1.0:
filter_parts.append(f"[{i}:a]volume={gain}[a{i}]")
mix_inputs.append(f"[a{i}]")
else:
mix_inputs.append(f"[{i}:a]")
# amix filter
normalize_flag = 1 if normalize else 0
mix_filter = f"{''.join(mix_inputs)}amix=inputs={len(inputs)}:normalize={normalize_flag}[aout]"
filter_parts.append(mix_filter)
filter_complex = ";".join(filter_parts)
cmd = [
"ffmpeg", "-y",
]
for p in inputs:
cmd.extend(["-i", str(p)])
cmd.extend([
"-filter_complex", filter_complex,
"-map", "[aout]",
"-c:a", "aac",
str(output_path)
])
logger.debug(f"AUDIO_MIX: {len(inputs)} inputs, gains={gains[:len(inputs)]}")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"Audio mix failed: {result.stderr}")
return output_path