348 lines
9.2 KiB
Python
348 lines
9.2 KiB
Python
"""
|
|
FFmpeg pipe-based frame processing.
|
|
|
|
Processes video through Python frame-by-frame effects using FFmpeg pipes:
|
|
FFmpeg decode -> Python process_frame -> FFmpeg encode
|
|
|
|
This avoids writing intermediate frames to disk.
|
|
"""
|
|
|
|
import logging
|
|
import subprocess
|
|
from dataclasses import dataclass
|
|
from pathlib import Path
|
|
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
|
|
import numpy as np
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class VideoInfo:
|
|
"""Video metadata."""
|
|
|
|
width: int
|
|
height: int
|
|
frame_rate: float
|
|
total_frames: int
|
|
duration: float
|
|
pixel_format: str = "rgb24"
|
|
|
|
|
|
def probe_video(path: Path) -> VideoInfo:
|
|
"""
|
|
Get video information using ffprobe.
|
|
|
|
Args:
|
|
path: Path to video file
|
|
|
|
Returns:
|
|
VideoInfo with dimensions, frame rate, etc.
|
|
"""
|
|
cmd = [
|
|
"ffprobe",
|
|
"-v", "error",
|
|
"-select_streams", "v:0",
|
|
"-show_entries", "stream=width,height,r_frame_rate,nb_frames,duration",
|
|
"-of", "csv=p=0",
|
|
str(path),
|
|
]
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
if result.returncode != 0:
|
|
raise RuntimeError(f"ffprobe failed: {result.stderr}")
|
|
|
|
parts = result.stdout.strip().split(",")
|
|
if len(parts) < 4:
|
|
raise RuntimeError(f"Unexpected ffprobe output: {result.stdout}")
|
|
|
|
width = int(parts[0])
|
|
height = int(parts[1])
|
|
|
|
# Parse frame rate (could be "30/1" or "30")
|
|
fr_parts = parts[2].split("/")
|
|
if len(fr_parts) == 2:
|
|
frame_rate = float(fr_parts[0]) / float(fr_parts[1])
|
|
else:
|
|
frame_rate = float(fr_parts[0])
|
|
|
|
# nb_frames might be N/A
|
|
total_frames = 0
|
|
duration = 0.0
|
|
try:
|
|
total_frames = int(parts[3])
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
try:
|
|
duration = float(parts[4]) if len(parts) > 4 else 0.0
|
|
except (ValueError, IndexError):
|
|
pass
|
|
|
|
if total_frames == 0 and duration > 0:
|
|
total_frames = int(duration * frame_rate)
|
|
|
|
return VideoInfo(
|
|
width=width,
|
|
height=height,
|
|
frame_rate=frame_rate,
|
|
total_frames=total_frames,
|
|
duration=duration,
|
|
)
|
|
|
|
|
|
FrameProcessor = Callable[[np.ndarray, Dict[str, Any], Any], Tuple[np.ndarray, Any]]
|
|
|
|
|
|
def process_video(
|
|
input_path: Path,
|
|
output_path: Path,
|
|
process_frame: FrameProcessor,
|
|
params: Dict[str, Any],
|
|
bindings: Dict[str, List[float]] = None,
|
|
initial_state: Any = None,
|
|
pixel_format: str = "rgb24",
|
|
output_codec: str = "libx264",
|
|
output_options: List[str] = None,
|
|
) -> Tuple[Path, Any]:
|
|
"""
|
|
Process video through frame-by-frame effect.
|
|
|
|
Args:
|
|
input_path: Input video path
|
|
output_path: Output video path
|
|
process_frame: Function (frame, params, state) -> (frame, state)
|
|
params: Static parameter dict
|
|
bindings: Per-frame parameter lookup tables
|
|
initial_state: Initial state for process_frame
|
|
pixel_format: Pixel format for frame data
|
|
output_codec: Video codec for output
|
|
output_options: Additional ffmpeg output options
|
|
|
|
Returns:
|
|
Tuple of (output_path, final_state)
|
|
"""
|
|
bindings = bindings or {}
|
|
output_options = output_options or []
|
|
|
|
# Probe input
|
|
info = probe_video(input_path)
|
|
logger.info(f"Processing {info.width}x{info.height} @ {info.frame_rate}fps")
|
|
|
|
# Calculate bytes per frame
|
|
if pixel_format == "rgb24":
|
|
bytes_per_pixel = 3
|
|
elif pixel_format == "rgba":
|
|
bytes_per_pixel = 4
|
|
else:
|
|
bytes_per_pixel = 3 # Default to RGB
|
|
|
|
frame_size = info.width * info.height * bytes_per_pixel
|
|
|
|
# Start decoder process
|
|
decode_cmd = [
|
|
"ffmpeg",
|
|
"-i", str(input_path),
|
|
"-f", "rawvideo",
|
|
"-pix_fmt", pixel_format,
|
|
"-",
|
|
]
|
|
|
|
# Start encoder process
|
|
encode_cmd = [
|
|
"ffmpeg",
|
|
"-y",
|
|
"-f", "rawvideo",
|
|
"-pix_fmt", pixel_format,
|
|
"-s", f"{info.width}x{info.height}",
|
|
"-r", str(info.frame_rate),
|
|
"-i", "-",
|
|
"-i", str(input_path), # For audio
|
|
"-map", "0:v",
|
|
"-map", "1:a?",
|
|
"-c:v", output_codec,
|
|
"-c:a", "aac",
|
|
*output_options,
|
|
str(output_path),
|
|
]
|
|
|
|
logger.debug(f"Decoder: {' '.join(decode_cmd)}")
|
|
logger.debug(f"Encoder: {' '.join(encode_cmd)}")
|
|
|
|
decoder = subprocess.Popen(
|
|
decode_cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.DEVNULL,
|
|
)
|
|
|
|
encoder = subprocess.Popen(
|
|
encode_cmd,
|
|
stdin=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
)
|
|
|
|
state = initial_state
|
|
frame_idx = 0
|
|
|
|
try:
|
|
while True:
|
|
# Read frame from decoder
|
|
raw_frame = decoder.stdout.read(frame_size)
|
|
if len(raw_frame) < frame_size:
|
|
break
|
|
|
|
# Convert to numpy
|
|
frame = np.frombuffer(raw_frame, dtype=np.uint8)
|
|
frame = frame.reshape((info.height, info.width, bytes_per_pixel))
|
|
|
|
# Build per-frame params
|
|
frame_params = dict(params)
|
|
for param_name, values in bindings.items():
|
|
if frame_idx < len(values):
|
|
frame_params[param_name] = values[frame_idx]
|
|
|
|
# Process frame
|
|
processed, state = process_frame(frame, frame_params, state)
|
|
|
|
# Ensure correct shape and dtype
|
|
if processed.shape != frame.shape:
|
|
raise ValueError(
|
|
f"Frame shape mismatch: {processed.shape} vs {frame.shape}"
|
|
)
|
|
processed = processed.astype(np.uint8)
|
|
|
|
# Write to encoder
|
|
encoder.stdin.write(processed.tobytes())
|
|
frame_idx += 1
|
|
|
|
if frame_idx % 100 == 0:
|
|
logger.debug(f"Processed frame {frame_idx}")
|
|
|
|
except Exception as e:
|
|
logger.error(f"Frame processing failed at frame {frame_idx}: {e}")
|
|
raise
|
|
finally:
|
|
decoder.stdout.close()
|
|
decoder.wait()
|
|
encoder.stdin.close()
|
|
encoder.wait()
|
|
|
|
if encoder.returncode != 0:
|
|
stderr = encoder.stderr.read().decode() if encoder.stderr else ""
|
|
raise RuntimeError(f"Encoder failed: {stderr}")
|
|
|
|
logger.info(f"Processed {frame_idx} frames")
|
|
return output_path, state
|
|
|
|
|
|
def process_video_batch(
|
|
input_path: Path,
|
|
output_path: Path,
|
|
process_frames: Callable[[List[np.ndarray], Dict[str, Any]], List[np.ndarray]],
|
|
params: Dict[str, Any],
|
|
batch_size: int = 30,
|
|
pixel_format: str = "rgb24",
|
|
output_codec: str = "libx264",
|
|
) -> Path:
|
|
"""
|
|
Process video in batches for effects that need temporal context.
|
|
|
|
Args:
|
|
input_path: Input video path
|
|
output_path: Output video path
|
|
process_frames: Function (frames_batch, params) -> processed_batch
|
|
params: Parameter dict
|
|
batch_size: Number of frames per batch
|
|
pixel_format: Pixel format
|
|
output_codec: Output codec
|
|
|
|
Returns:
|
|
Output path
|
|
"""
|
|
info = probe_video(input_path)
|
|
|
|
if pixel_format == "rgb24":
|
|
bytes_per_pixel = 3
|
|
elif pixel_format == "rgba":
|
|
bytes_per_pixel = 4
|
|
else:
|
|
bytes_per_pixel = 3
|
|
|
|
frame_size = info.width * info.height * bytes_per_pixel
|
|
|
|
decode_cmd = [
|
|
"ffmpeg",
|
|
"-i", str(input_path),
|
|
"-f", "rawvideo",
|
|
"-pix_fmt", pixel_format,
|
|
"-",
|
|
]
|
|
|
|
encode_cmd = [
|
|
"ffmpeg",
|
|
"-y",
|
|
"-f", "rawvideo",
|
|
"-pix_fmt", pixel_format,
|
|
"-s", f"{info.width}x{info.height}",
|
|
"-r", str(info.frame_rate),
|
|
"-i", "-",
|
|
"-i", str(input_path),
|
|
"-map", "0:v",
|
|
"-map", "1:a?",
|
|
"-c:v", output_codec,
|
|
"-c:a", "aac",
|
|
str(output_path),
|
|
]
|
|
|
|
decoder = subprocess.Popen(
|
|
decode_cmd,
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.DEVNULL,
|
|
)
|
|
|
|
encoder = subprocess.Popen(
|
|
encode_cmd,
|
|
stdin=subprocess.PIPE,
|
|
stderr=subprocess.PIPE,
|
|
)
|
|
|
|
batch = []
|
|
total_processed = 0
|
|
|
|
try:
|
|
while True:
|
|
raw_frame = decoder.stdout.read(frame_size)
|
|
if len(raw_frame) < frame_size:
|
|
# Process remaining batch
|
|
if batch:
|
|
processed = process_frames(batch, params)
|
|
for frame in processed:
|
|
encoder.stdin.write(frame.astype(np.uint8).tobytes())
|
|
total_processed += 1
|
|
break
|
|
|
|
frame = np.frombuffer(raw_frame, dtype=np.uint8)
|
|
frame = frame.reshape((info.height, info.width, bytes_per_pixel))
|
|
batch.append(frame)
|
|
|
|
if len(batch) >= batch_size:
|
|
processed = process_frames(batch, params)
|
|
for frame in processed:
|
|
encoder.stdin.write(frame.astype(np.uint8).tobytes())
|
|
total_processed += 1
|
|
batch = []
|
|
|
|
finally:
|
|
decoder.stdout.close()
|
|
decoder.wait()
|
|
encoder.stdin.close()
|
|
encoder.wait()
|
|
|
|
if encoder.returncode != 0:
|
|
stderr = encoder.stderr.read().decode() if encoder.stderr else ""
|
|
raise RuntimeError(f"Encoder failed: {stderr}")
|
|
|
|
logger.info(f"Processed {total_processed} frames in batches of {batch_size}")
|
|
return output_path
|