Files
rose-ash/artdag/analysis/video.py
giles cc2dcbddd4 Squashed 'core/' content from commit 4957443
git-subtree-dir: core
git-subtree-split: 4957443184ae0eb6323635a90a19acffb3e01d07
2026-02-24 23:09:39 +00:00

267 lines
7.2 KiB
Python

# artdag/analysis/video.py
"""
Video feature extraction.
Uses ffprobe for basic metadata and optional OpenCV for motion analysis.
"""
import json
import logging
import subprocess
from fractions import Fraction
from pathlib import Path
from typing import List, Optional
from .schema import VideoFeatures
logger = logging.getLogger(__name__)
# Feature names
FEATURE_METADATA = "metadata"
FEATURE_MOTION_TEMPO = "motion_tempo"
FEATURE_SCENE_CHANGES = "scene_changes"
FEATURE_ALL = "all"
def _parse_frame_rate(rate_str: str) -> float:
"""Parse frame rate string like '30000/1001' or '30'."""
try:
if "/" in rate_str:
frac = Fraction(rate_str)
return float(frac)
return float(rate_str)
except (ValueError, ZeroDivisionError):
return 30.0 # Default
def analyze_metadata(path: Path) -> VideoFeatures:
"""
Extract video metadata using ffprobe.
Args:
path: Path to video file
Returns:
VideoFeatures with basic metadata
"""
cmd = [
"ffprobe", "-v", "quiet",
"-print_format", "json",
"-show_streams",
"-show_format",
"-select_streams", "v:0",
str(path)
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
data = json.loads(result.stdout)
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
raise ValueError(f"Could not read video info: {e}")
if not data.get("streams"):
raise ValueError("No video stream found")
stream = data["streams"][0]
fmt = data.get("format", {})
# Get duration from format or stream
duration = float(fmt.get("duration", stream.get("duration", 0)))
# Parse frame rate
frame_rate = _parse_frame_rate(stream.get("avg_frame_rate", "30"))
return VideoFeatures(
duration=duration,
frame_rate=frame_rate,
width=int(stream.get("width", 0)),
height=int(stream.get("height", 0)),
codec=stream.get("codec_name", ""),
)
def analyze_scene_changes(path: Path, threshold: float = 0.3) -> List[float]:
"""
Detect scene changes using ffmpeg scene detection.
Args:
path: Path to video file
threshold: Scene change threshold (0-1, lower = more sensitive)
Returns:
List of scene change times in seconds
"""
cmd = [
"ffmpeg", "-i", str(path),
"-vf", f"select='gt(scene,{threshold})',showinfo",
"-f", "null", "-"
]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
stderr = result.stderr
except subprocess.CalledProcessError as e:
logger.warning(f"Scene detection failed: {e}")
return []
# Parse scene change times from ffmpeg output
scene_times = []
for line in stderr.split("\n"):
if "pts_time:" in line:
try:
# Extract pts_time value
for part in line.split():
if part.startswith("pts_time:"):
time_str = part.split(":")[1]
scene_times.append(float(time_str))
break
except (ValueError, IndexError):
continue
return scene_times
def analyze_motion_tempo(path: Path, sample_duration: float = 30.0) -> Optional[float]:
"""
Estimate tempo from video motion periodicity.
Analyzes optical flow or frame differences to detect rhythmic motion.
This is useful for matching video speed to audio tempo.
Args:
path: Path to video file
sample_duration: Duration to analyze (seconds)
Returns:
Estimated motion tempo in BPM, or None if not detectable
"""
try:
import cv2
import numpy as np
except ImportError:
logger.warning("OpenCV not available, skipping motion tempo analysis")
return None
cap = cv2.VideoCapture(str(path))
if not cap.isOpened():
logger.warning(f"Could not open video: {path}")
return None
try:
fps = cap.get(cv2.CAP_PROP_FPS)
if fps <= 0:
fps = 30.0
max_frames = int(sample_duration * fps)
frame_diffs = []
prev_gray = None
frame_count = 0
while frame_count < max_frames:
ret, frame = cap.read()
if not ret:
break
# Convert to grayscale and resize for speed
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.resize(gray, (160, 90))
if prev_gray is not None:
# Calculate frame difference
diff = cv2.absdiff(gray, prev_gray)
frame_diffs.append(np.mean(diff))
prev_gray = gray
frame_count += 1
if len(frame_diffs) < 60: # Need at least 2 seconds at 30fps
return None
# Convert to numpy array
motion = np.array(frame_diffs)
# Normalize
motion = motion - motion.mean()
if motion.std() > 0:
motion = motion / motion.std()
# Autocorrelation to find periodicity
n = len(motion)
acf = np.correlate(motion, motion, mode="full")[n-1:]
acf = acf / acf[0] # Normalize
# Find peaks in autocorrelation (potential beat periods)
# Look for periods between 0.3s (200 BPM) and 2s (30 BPM)
min_lag = int(0.3 * fps)
max_lag = min(int(2.0 * fps), len(acf) - 1)
if max_lag <= min_lag:
return None
# Find the highest peak in the valid range
search_range = acf[min_lag:max_lag]
if len(search_range) == 0:
return None
peak_idx = np.argmax(search_range) + min_lag
peak_value = acf[peak_idx]
# Only report if peak is significant
if peak_value < 0.1:
return None
# Convert lag to BPM
period_seconds = peak_idx / fps
bpm = 60.0 / period_seconds
# Sanity check
if 30 <= bpm <= 200:
return round(bpm, 1)
return None
finally:
cap.release()
def analyze_video(
path: Path,
features: Optional[List[str]] = None,
) -> VideoFeatures:
"""
Extract video features from file.
Args:
path: Path to video file
features: List of features to extract. Options:
- "metadata": Basic video info (always included)
- "motion_tempo": Estimated tempo from motion
- "scene_changes": Scene change detection
- "all": All features
Returns:
VideoFeatures with requested analysis
"""
if features is None:
features = [FEATURE_METADATA]
if FEATURE_ALL in features:
features = [FEATURE_METADATA, FEATURE_MOTION_TEMPO, FEATURE_SCENE_CHANGES]
# Basic metadata is always extracted
result = analyze_metadata(path)
if FEATURE_MOTION_TEMPO in features:
try:
result.motion_tempo = analyze_motion_tempo(path)
except Exception as e:
logger.warning(f"Motion tempo analysis failed: {e}")
if FEATURE_SCENE_CHANGES in features:
try:
result.scene_changes = analyze_scene_changes(path)
except Exception as e:
logger.warning(f"Scene change detection failed: {e}")
return result