rose-ash/artdag/effects/frame_processor.py

"""
FFmpeg pipe-based frame processing.

Processes video through Python frame-by-frame effects using FFmpeg pipes:
  FFmpeg decode -> Python process_frame -> FFmpeg encode

This avoids writing intermediate frames to disk.
"""

import logging
import subprocess
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple

import numpy as np

logger = logging.getLogger(__name__)


@dataclass
class VideoInfo:
    """Video metadata."""

    width: int
    height: int
    frame_rate: float
    total_frames: int
    duration: float
    pixel_format: str = "rgb24"


def probe_video(path: Path) -> VideoInfo:
    """
    Get video information using ffprobe.

    Args:
        path: Path to video file

    Returns:
        VideoInfo with dimensions, frame rate, etc.
    """
    cmd = [
        "ffprobe",
        "-v", "error",
        "-select_streams", "v:0",
        "-show_entries", "stream=width,height,r_frame_rate,nb_frames,duration",
        "-of", "csv=p=0",
        str(path),
    ]

    result = subprocess.run(cmd, capture_output=True, text=True)
    if result.returncode != 0:
        raise RuntimeError(f"ffprobe failed: {result.stderr}")

    parts = result.stdout.strip().split(",")
    if len(parts) < 4:
        raise RuntimeError(f"Unexpected ffprobe output: {result.stdout}")

    width = int(parts[0])
    height = int(parts[1])

    # Parse frame rate (could be "30/1" or "30")
    fr_parts = parts[2].split("/")
    if len(fr_parts) == 2:
        frame_rate = float(fr_parts[0]) / float(fr_parts[1])
    else:
        frame_rate = float(fr_parts[0])

    # nb_frames might be N/A
    total_frames = 0
    duration = 0.0
    try:
        total_frames = int(parts[3])
    except (ValueError, IndexError):
        pass

    try:
        duration = float(parts[4]) if len(parts) > 4 else 0.0
    except (ValueError, IndexError):
        pass

    if total_frames == 0 and duration > 0:
        total_frames = int(duration * frame_rate)

    return VideoInfo(
        width=width,
        height=height,
        frame_rate=frame_rate,
        total_frames=total_frames,
        duration=duration,
    )


FrameProcessor = Callable[[np.ndarray, Dict[str, Any], Any], Tuple[np.ndarray, Any]]


def process_video(
    input_path: Path,
    output_path: Path,
    process_frame: FrameProcessor,
    params: Dict[str, Any],
    bindings: Dict[str, List[float]] = None,
    initial_state: Any = None,
    pixel_format: str = "rgb24",
    output_codec: str = "libx264",
    output_options: List[str] = None,
) -> Tuple[Path, Any]:
    """
    Process video through frame-by-frame effect.

    Args:
        input_path: Input video path
        output_path: Output video path
        process_frame: Function (frame, params, state) -> (frame, state)
        params: Static parameter dict
        bindings: Per-frame parameter lookup tables
        initial_state: Initial state for process_frame
        pixel_format: Pixel format for frame data
        output_codec: Video codec for output
        output_options: Additional ffmpeg output options

    Returns:
        Tuple of (output_path, final_state)
    """
    bindings = bindings or {}
    output_options = output_options or []

    # Probe input
    info = probe_video(input_path)
    logger.info(f"Processing {info.width}x{info.height} @ {info.frame_rate}fps")

    # Calculate bytes per frame
    if pixel_format == "rgb24":
        bytes_per_pixel = 3
    elif pixel_format == "rgba":
        bytes_per_pixel = 4
    else:
        bytes_per_pixel = 3  # Default to RGB

    frame_size = info.width * info.height * bytes_per_pixel

    # Start decoder process
    decode_cmd = [
        "ffmpeg",
        "-i", str(input_path),
        "-f", "rawvideo",
        "-pix_fmt", pixel_format,
        "-",
    ]

    # Start encoder process
    encode_cmd = [
        "ffmpeg",
        "-y",
        "-f", "rawvideo",
        "-pix_fmt", pixel_format,
        "-s", f"{info.width}x{info.height}",
        "-r", str(info.frame_rate),
        "-i", "-",
        "-i", str(input_path),  # For audio
        "-map", "0:v",
        "-map", "1:a?",
        "-c:v", output_codec,
        "-c:a", "aac",
        *output_options,
        str(output_path),
    ]

    logger.debug(f"Decoder: {' '.join(decode_cmd)}")
    logger.debug(f"Encoder: {' '.join(encode_cmd)}")

    decoder = subprocess.Popen(
        decode_cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.DEVNULL,
    )

    encoder = subprocess.Popen(
        encode_cmd,
        stdin=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )

    state = initial_state
    frame_idx = 0

    try:
        while True:
            # Read frame from decoder
            raw_frame = decoder.stdout.read(frame_size)
            if len(raw_frame) < frame_size:
                break

            # Convert to numpy
            frame = np.frombuffer(raw_frame, dtype=np.uint8)
            frame = frame.reshape((info.height, info.width, bytes_per_pixel))

            # Build per-frame params
            frame_params = dict(params)
            for param_name, values in bindings.items():
                if frame_idx < len(values):
                    frame_params[param_name] = values[frame_idx]

            # Process frame
            processed, state = process_frame(frame, frame_params, state)

            # Ensure correct shape and dtype
            if processed.shape != frame.shape:
                raise ValueError(
                    f"Frame shape mismatch: {processed.shape} vs {frame.shape}"
                )
            processed = processed.astype(np.uint8)

            # Write to encoder
            encoder.stdin.write(processed.tobytes())
            frame_idx += 1

            if frame_idx % 100 == 0:
                logger.debug(f"Processed frame {frame_idx}")

    except Exception as e:
        logger.error(f"Frame processing failed at frame {frame_idx}: {e}")
        raise
    finally:
        decoder.stdout.close()
        decoder.wait()
        encoder.stdin.close()
        encoder.wait()

    if encoder.returncode != 0:
        stderr = encoder.stderr.read().decode() if encoder.stderr else ""
        raise RuntimeError(f"Encoder failed: {stderr}")

    logger.info(f"Processed {frame_idx} frames")
    return output_path, state


def process_video_batch(
    input_path: Path,
    output_path: Path,
    process_frames: Callable[[List[np.ndarray], Dict[str, Any]], List[np.ndarray]],
    params: Dict[str, Any],
    batch_size: int = 30,
    pixel_format: str = "rgb24",
    output_codec: str = "libx264",
) -> Path:
    """
    Process video in batches for effects that need temporal context.

    Args:
        input_path: Input video path
        output_path: Output video path
        process_frames: Function (frames_batch, params) -> processed_batch
        params: Parameter dict
        batch_size: Number of frames per batch
        pixel_format: Pixel format
        output_codec: Output codec

    Returns:
        Output path
    """
    info = probe_video(input_path)

    if pixel_format == "rgb24":
        bytes_per_pixel = 3
    elif pixel_format == "rgba":
        bytes_per_pixel = 4
    else:
        bytes_per_pixel = 3

    frame_size = info.width * info.height * bytes_per_pixel

    decode_cmd = [
        "ffmpeg",
        "-i", str(input_path),
        "-f", "rawvideo",
        "-pix_fmt", pixel_format,
        "-",
    ]

    encode_cmd = [
        "ffmpeg",
        "-y",
        "-f", "rawvideo",
        "-pix_fmt", pixel_format,
        "-s", f"{info.width}x{info.height}",
        "-r", str(info.frame_rate),
        "-i", "-",
        "-i", str(input_path),
        "-map", "0:v",
        "-map", "1:a?",
        "-c:v", output_codec,
        "-c:a", "aac",
        str(output_path),
    ]

    decoder = subprocess.Popen(
        decode_cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.DEVNULL,
    )

    encoder = subprocess.Popen(
        encode_cmd,
        stdin=subprocess.PIPE,
        stderr=subprocess.PIPE,
    )

    batch = []
    total_processed = 0

    try:
        while True:
            raw_frame = decoder.stdout.read(frame_size)
            if len(raw_frame) < frame_size:
                # Process remaining batch
                if batch:
                    processed = process_frames(batch, params)
                    for frame in processed:
                        encoder.stdin.write(frame.astype(np.uint8).tobytes())
                        total_processed += 1
                break

            frame = np.frombuffer(raw_frame, dtype=np.uint8)
            frame = frame.reshape((info.height, info.width, bytes_per_pixel))
            batch.append(frame)

            if len(batch) >= batch_size:
                processed = process_frames(batch, params)
                for frame in processed:
                    encoder.stdin.write(frame.astype(np.uint8).tobytes())
                    total_processed += 1
                batch = []

    finally:
        decoder.stdout.close()
        decoder.wait()
        encoder.stdin.close()
        encoder.wait()

    if encoder.returncode != 0:
        stderr = encoder.stderr.read().decode() if encoder.stderr else ""
        raise RuntimeError(f"Encoder failed: {stderr}")

    logger.info(f"Processed {total_processed} frames in batches of {batch_size}")
    return output_path