Files
rose-ash/artdag/effects/frame_processor.py
giles cc2dcbddd4 Squashed 'core/' content from commit 4957443
git-subtree-dir: core
git-subtree-split: 4957443184ae0eb6323635a90a19acffb3e01d07
2026-02-24 23:09:39 +00:00

348 lines
9.2 KiB
Python

"""
FFmpeg pipe-based frame processing.
Processes video through Python frame-by-frame effects using FFmpeg pipes:
FFmpeg decode -> Python process_frame -> FFmpeg encode
This avoids writing intermediate frames to disk.
"""
import logging
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple
import numpy as np
logger = logging.getLogger(__name__)
@dataclass
class VideoInfo:
"""Video metadata."""
width: int
height: int
frame_rate: float
total_frames: int
duration: float
pixel_format: str = "rgb24"
def probe_video(path: Path) -> VideoInfo:
"""
Get video information using ffprobe.
Args:
path: Path to video file
Returns:
VideoInfo with dimensions, frame rate, etc.
"""
cmd = [
"ffprobe",
"-v", "error",
"-select_streams", "v:0",
"-show_entries", "stream=width,height,r_frame_rate,nb_frames,duration",
"-of", "csv=p=0",
str(path),
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
raise RuntimeError(f"ffprobe failed: {result.stderr}")
parts = result.stdout.strip().split(",")
if len(parts) < 4:
raise RuntimeError(f"Unexpected ffprobe output: {result.stdout}")
width = int(parts[0])
height = int(parts[1])
# Parse frame rate (could be "30/1" or "30")
fr_parts = parts[2].split("/")
if len(fr_parts) == 2:
frame_rate = float(fr_parts[0]) / float(fr_parts[1])
else:
frame_rate = float(fr_parts[0])
# nb_frames might be N/A
total_frames = 0
duration = 0.0
try:
total_frames = int(parts[3])
except (ValueError, IndexError):
pass
try:
duration = float(parts[4]) if len(parts) > 4 else 0.0
except (ValueError, IndexError):
pass
if total_frames == 0 and duration > 0:
total_frames = int(duration * frame_rate)
return VideoInfo(
width=width,
height=height,
frame_rate=frame_rate,
total_frames=total_frames,
duration=duration,
)
FrameProcessor = Callable[[np.ndarray, Dict[str, Any], Any], Tuple[np.ndarray, Any]]
def process_video(
input_path: Path,
output_path: Path,
process_frame: FrameProcessor,
params: Dict[str, Any],
bindings: Dict[str, List[float]] = None,
initial_state: Any = None,
pixel_format: str = "rgb24",
output_codec: str = "libx264",
output_options: List[str] = None,
) -> Tuple[Path, Any]:
"""
Process video through frame-by-frame effect.
Args:
input_path: Input video path
output_path: Output video path
process_frame: Function (frame, params, state) -> (frame, state)
params: Static parameter dict
bindings: Per-frame parameter lookup tables
initial_state: Initial state for process_frame
pixel_format: Pixel format for frame data
output_codec: Video codec for output
output_options: Additional ffmpeg output options
Returns:
Tuple of (output_path, final_state)
"""
bindings = bindings or {}
output_options = output_options or []
# Probe input
info = probe_video(input_path)
logger.info(f"Processing {info.width}x{info.height} @ {info.frame_rate}fps")
# Calculate bytes per frame
if pixel_format == "rgb24":
bytes_per_pixel = 3
elif pixel_format == "rgba":
bytes_per_pixel = 4
else:
bytes_per_pixel = 3 # Default to RGB
frame_size = info.width * info.height * bytes_per_pixel
# Start decoder process
decode_cmd = [
"ffmpeg",
"-i", str(input_path),
"-f", "rawvideo",
"-pix_fmt", pixel_format,
"-",
]
# Start encoder process
encode_cmd = [
"ffmpeg",
"-y",
"-f", "rawvideo",
"-pix_fmt", pixel_format,
"-s", f"{info.width}x{info.height}",
"-r", str(info.frame_rate),
"-i", "-",
"-i", str(input_path), # For audio
"-map", "0:v",
"-map", "1:a?",
"-c:v", output_codec,
"-c:a", "aac",
*output_options,
str(output_path),
]
logger.debug(f"Decoder: {' '.join(decode_cmd)}")
logger.debug(f"Encoder: {' '.join(encode_cmd)}")
decoder = subprocess.Popen(
decode_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)
encoder = subprocess.Popen(
encode_cmd,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
)
state = initial_state
frame_idx = 0
try:
while True:
# Read frame from decoder
raw_frame = decoder.stdout.read(frame_size)
if len(raw_frame) < frame_size:
break
# Convert to numpy
frame = np.frombuffer(raw_frame, dtype=np.uint8)
frame = frame.reshape((info.height, info.width, bytes_per_pixel))
# Build per-frame params
frame_params = dict(params)
for param_name, values in bindings.items():
if frame_idx < len(values):
frame_params[param_name] = values[frame_idx]
# Process frame
processed, state = process_frame(frame, frame_params, state)
# Ensure correct shape and dtype
if processed.shape != frame.shape:
raise ValueError(
f"Frame shape mismatch: {processed.shape} vs {frame.shape}"
)
processed = processed.astype(np.uint8)
# Write to encoder
encoder.stdin.write(processed.tobytes())
frame_idx += 1
if frame_idx % 100 == 0:
logger.debug(f"Processed frame {frame_idx}")
except Exception as e:
logger.error(f"Frame processing failed at frame {frame_idx}: {e}")
raise
finally:
decoder.stdout.close()
decoder.wait()
encoder.stdin.close()
encoder.wait()
if encoder.returncode != 0:
stderr = encoder.stderr.read().decode() if encoder.stderr else ""
raise RuntimeError(f"Encoder failed: {stderr}")
logger.info(f"Processed {frame_idx} frames")
return output_path, state
def process_video_batch(
input_path: Path,
output_path: Path,
process_frames: Callable[[List[np.ndarray], Dict[str, Any]], List[np.ndarray]],
params: Dict[str, Any],
batch_size: int = 30,
pixel_format: str = "rgb24",
output_codec: str = "libx264",
) -> Path:
"""
Process video in batches for effects that need temporal context.
Args:
input_path: Input video path
output_path: Output video path
process_frames: Function (frames_batch, params) -> processed_batch
params: Parameter dict
batch_size: Number of frames per batch
pixel_format: Pixel format
output_codec: Output codec
Returns:
Output path
"""
info = probe_video(input_path)
if pixel_format == "rgb24":
bytes_per_pixel = 3
elif pixel_format == "rgba":
bytes_per_pixel = 4
else:
bytes_per_pixel = 3
frame_size = info.width * info.height * bytes_per_pixel
decode_cmd = [
"ffmpeg",
"-i", str(input_path),
"-f", "rawvideo",
"-pix_fmt", pixel_format,
"-",
]
encode_cmd = [
"ffmpeg",
"-y",
"-f", "rawvideo",
"-pix_fmt", pixel_format,
"-s", f"{info.width}x{info.height}",
"-r", str(info.frame_rate),
"-i", "-",
"-i", str(input_path),
"-map", "0:v",
"-map", "1:a?",
"-c:v", output_codec,
"-c:a", "aac",
str(output_path),
]
decoder = subprocess.Popen(
decode_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)
encoder = subprocess.Popen(
encode_cmd,
stdin=subprocess.PIPE,
stderr=subprocess.PIPE,
)
batch = []
total_processed = 0
try:
while True:
raw_frame = decoder.stdout.read(frame_size)
if len(raw_frame) < frame_size:
# Process remaining batch
if batch:
processed = process_frames(batch, params)
for frame in processed:
encoder.stdin.write(frame.astype(np.uint8).tobytes())
total_processed += 1
break
frame = np.frombuffer(raw_frame, dtype=np.uint8)
frame = frame.reshape((info.height, info.width, bytes_per_pixel))
batch.append(frame)
if len(batch) >= batch_size:
processed = process_frames(batch, params)
for frame in processed:
encoder.stdin.write(frame.astype(np.uint8).tobytes())
total_processed += 1
batch = []
finally:
decoder.stdout.close()
decoder.wait()
encoder.stdin.close()
encoder.wait()
if encoder.returncode != 0:
stderr = encoder.stderr.read().decode() if encoder.stderr else ""
raise RuntimeError(f"Encoder failed: {stderr}")
logger.info(f"Processed {total_processed} frames in batches of {batch_size}")
return output_path