Squashed 'core/' content from commit 4957443
git-subtree-dir: core git-subtree-split: 4957443184ae0eb6323635a90a19acffb3e01d07
This commit is contained in:
266
artdag/analysis/video.py
Normal file
266
artdag/analysis/video.py
Normal file
@@ -0,0 +1,266 @@
|
||||
# artdag/analysis/video.py
|
||||
"""
|
||||
Video feature extraction.
|
||||
|
||||
Uses ffprobe for basic metadata and optional OpenCV for motion analysis.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import subprocess
|
||||
from fractions import Fraction
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
|
||||
from .schema import VideoFeatures
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Feature names
|
||||
FEATURE_METADATA = "metadata"
|
||||
FEATURE_MOTION_TEMPO = "motion_tempo"
|
||||
FEATURE_SCENE_CHANGES = "scene_changes"
|
||||
FEATURE_ALL = "all"
|
||||
|
||||
|
||||
def _parse_frame_rate(rate_str: str) -> float:
|
||||
"""Parse frame rate string like '30000/1001' or '30'."""
|
||||
try:
|
||||
if "/" in rate_str:
|
||||
frac = Fraction(rate_str)
|
||||
return float(frac)
|
||||
return float(rate_str)
|
||||
except (ValueError, ZeroDivisionError):
|
||||
return 30.0 # Default
|
||||
|
||||
|
||||
def analyze_metadata(path: Path) -> VideoFeatures:
|
||||
"""
|
||||
Extract video metadata using ffprobe.
|
||||
|
||||
Args:
|
||||
path: Path to video file
|
||||
|
||||
Returns:
|
||||
VideoFeatures with basic metadata
|
||||
"""
|
||||
cmd = [
|
||||
"ffprobe", "-v", "quiet",
|
||||
"-print_format", "json",
|
||||
"-show_streams",
|
||||
"-show_format",
|
||||
"-select_streams", "v:0",
|
||||
str(path)
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
data = json.loads(result.stdout)
|
||||
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
|
||||
raise ValueError(f"Could not read video info: {e}")
|
||||
|
||||
if not data.get("streams"):
|
||||
raise ValueError("No video stream found")
|
||||
|
||||
stream = data["streams"][0]
|
||||
fmt = data.get("format", {})
|
||||
|
||||
# Get duration from format or stream
|
||||
duration = float(fmt.get("duration", stream.get("duration", 0)))
|
||||
|
||||
# Parse frame rate
|
||||
frame_rate = _parse_frame_rate(stream.get("avg_frame_rate", "30"))
|
||||
|
||||
return VideoFeatures(
|
||||
duration=duration,
|
||||
frame_rate=frame_rate,
|
||||
width=int(stream.get("width", 0)),
|
||||
height=int(stream.get("height", 0)),
|
||||
codec=stream.get("codec_name", ""),
|
||||
)
|
||||
|
||||
|
||||
def analyze_scene_changes(path: Path, threshold: float = 0.3) -> List[float]:
|
||||
"""
|
||||
Detect scene changes using ffmpeg scene detection.
|
||||
|
||||
Args:
|
||||
path: Path to video file
|
||||
threshold: Scene change threshold (0-1, lower = more sensitive)
|
||||
|
||||
Returns:
|
||||
List of scene change times in seconds
|
||||
"""
|
||||
cmd = [
|
||||
"ffmpeg", "-i", str(path),
|
||||
"-vf", f"select='gt(scene,{threshold})',showinfo",
|
||||
"-f", "null", "-"
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
stderr = result.stderr
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.warning(f"Scene detection failed: {e}")
|
||||
return []
|
||||
|
||||
# Parse scene change times from ffmpeg output
|
||||
scene_times = []
|
||||
for line in stderr.split("\n"):
|
||||
if "pts_time:" in line:
|
||||
try:
|
||||
# Extract pts_time value
|
||||
for part in line.split():
|
||||
if part.startswith("pts_time:"):
|
||||
time_str = part.split(":")[1]
|
||||
scene_times.append(float(time_str))
|
||||
break
|
||||
except (ValueError, IndexError):
|
||||
continue
|
||||
|
||||
return scene_times
|
||||
|
||||
|
||||
def analyze_motion_tempo(path: Path, sample_duration: float = 30.0) -> Optional[float]:
|
||||
"""
|
||||
Estimate tempo from video motion periodicity.
|
||||
|
||||
Analyzes optical flow or frame differences to detect rhythmic motion.
|
||||
This is useful for matching video speed to audio tempo.
|
||||
|
||||
Args:
|
||||
path: Path to video file
|
||||
sample_duration: Duration to analyze (seconds)
|
||||
|
||||
Returns:
|
||||
Estimated motion tempo in BPM, or None if not detectable
|
||||
"""
|
||||
try:
|
||||
import cv2
|
||||
import numpy as np
|
||||
except ImportError:
|
||||
logger.warning("OpenCV not available, skipping motion tempo analysis")
|
||||
return None
|
||||
|
||||
cap = cv2.VideoCapture(str(path))
|
||||
if not cap.isOpened():
|
||||
logger.warning(f"Could not open video: {path}")
|
||||
return None
|
||||
|
||||
try:
|
||||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||||
if fps <= 0:
|
||||
fps = 30.0
|
||||
|
||||
max_frames = int(sample_duration * fps)
|
||||
frame_diffs = []
|
||||
prev_gray = None
|
||||
|
||||
frame_count = 0
|
||||
while frame_count < max_frames:
|
||||
ret, frame = cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# Convert to grayscale and resize for speed
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
gray = cv2.resize(gray, (160, 90))
|
||||
|
||||
if prev_gray is not None:
|
||||
# Calculate frame difference
|
||||
diff = cv2.absdiff(gray, prev_gray)
|
||||
frame_diffs.append(np.mean(diff))
|
||||
|
||||
prev_gray = gray
|
||||
frame_count += 1
|
||||
|
||||
if len(frame_diffs) < 60: # Need at least 2 seconds at 30fps
|
||||
return None
|
||||
|
||||
# Convert to numpy array
|
||||
motion = np.array(frame_diffs)
|
||||
|
||||
# Normalize
|
||||
motion = motion - motion.mean()
|
||||
if motion.std() > 0:
|
||||
motion = motion / motion.std()
|
||||
|
||||
# Autocorrelation to find periodicity
|
||||
n = len(motion)
|
||||
acf = np.correlate(motion, motion, mode="full")[n-1:]
|
||||
acf = acf / acf[0] # Normalize
|
||||
|
||||
# Find peaks in autocorrelation (potential beat periods)
|
||||
# Look for periods between 0.3s (200 BPM) and 2s (30 BPM)
|
||||
min_lag = int(0.3 * fps)
|
||||
max_lag = min(int(2.0 * fps), len(acf) - 1)
|
||||
|
||||
if max_lag <= min_lag:
|
||||
return None
|
||||
|
||||
# Find the highest peak in the valid range
|
||||
search_range = acf[min_lag:max_lag]
|
||||
if len(search_range) == 0:
|
||||
return None
|
||||
|
||||
peak_idx = np.argmax(search_range) + min_lag
|
||||
peak_value = acf[peak_idx]
|
||||
|
||||
# Only report if peak is significant
|
||||
if peak_value < 0.1:
|
||||
return None
|
||||
|
||||
# Convert lag to BPM
|
||||
period_seconds = peak_idx / fps
|
||||
bpm = 60.0 / period_seconds
|
||||
|
||||
# Sanity check
|
||||
if 30 <= bpm <= 200:
|
||||
return round(bpm, 1)
|
||||
|
||||
return None
|
||||
|
||||
finally:
|
||||
cap.release()
|
||||
|
||||
|
||||
def analyze_video(
|
||||
path: Path,
|
||||
features: Optional[List[str]] = None,
|
||||
) -> VideoFeatures:
|
||||
"""
|
||||
Extract video features from file.
|
||||
|
||||
Args:
|
||||
path: Path to video file
|
||||
features: List of features to extract. Options:
|
||||
- "metadata": Basic video info (always included)
|
||||
- "motion_tempo": Estimated tempo from motion
|
||||
- "scene_changes": Scene change detection
|
||||
- "all": All features
|
||||
|
||||
Returns:
|
||||
VideoFeatures with requested analysis
|
||||
"""
|
||||
if features is None:
|
||||
features = [FEATURE_METADATA]
|
||||
|
||||
if FEATURE_ALL in features:
|
||||
features = [FEATURE_METADATA, FEATURE_MOTION_TEMPO, FEATURE_SCENE_CHANGES]
|
||||
|
||||
# Basic metadata is always extracted
|
||||
result = analyze_metadata(path)
|
||||
|
||||
if FEATURE_MOTION_TEMPO in features:
|
||||
try:
|
||||
result.motion_tempo = analyze_motion_tempo(path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Motion tempo analysis failed: {e}")
|
||||
|
||||
if FEATURE_SCENE_CHANGES in features:
|
||||
try:
|
||||
result.scene_changes = analyze_scene_changes(path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Scene change detection failed: {e}")
|
||||
|
||||
return result
|
||||
Reference in New Issue
Block a user