Squashed 'core/' content from commit 4957443

git-subtree-dir: core
git-subtree-split: 4957443184ae0eb6323635a90a19acffb3e01d07
This commit is contained in:
giles
2026-02-24 23:09:39 +00:00
commit cc2dcbddd4
80 changed files with 25711 additions and 0 deletions

View File

@@ -0,0 +1,26 @@
# artdag/analysis - Audio and video feature extraction
#
# Provides the Analysis phase of the 3-phase execution model:
# 1. ANALYZE - Extract features from inputs
# 2. PLAN - Generate execution plan with cache IDs
# 3. EXECUTE - Run steps with caching
from .schema import (
AnalysisResult,
AudioFeatures,
VideoFeatures,
BeatInfo,
EnergyEnvelope,
SpectrumBands,
)
from .analyzer import Analyzer
__all__ = [
"Analyzer",
"AnalysisResult",
"AudioFeatures",
"VideoFeatures",
"BeatInfo",
"EnergyEnvelope",
"SpectrumBands",
]

282
artdag/analysis/analyzer.py Normal file
View File

@@ -0,0 +1,282 @@
# artdag/analysis/analyzer.py
"""
Main Analyzer class for the Analysis phase.
Coordinates audio and video feature extraction with caching.
"""
import json
import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional
from .schema import AnalysisResult, AudioFeatures, VideoFeatures
from .audio import analyze_audio, FEATURE_ALL as AUDIO_ALL
from .video import analyze_video, FEATURE_ALL as VIDEO_ALL
logger = logging.getLogger(__name__)
class AnalysisCache:
"""
Simple file-based cache for analysis results.
Stores results as JSON files keyed by analysis cache_id.
"""
def __init__(self, cache_dir: Path):
self.cache_dir = Path(cache_dir)
self.cache_dir.mkdir(parents=True, exist_ok=True)
def _path_for(self, cache_id: str) -> Path:
"""Get cache file path for a cache_id."""
return self.cache_dir / f"{cache_id}.json"
def get(self, cache_id: str) -> Optional[AnalysisResult]:
"""Retrieve cached analysis result."""
path = self._path_for(cache_id)
if not path.exists():
return None
try:
with open(path, "r") as f:
data = json.load(f)
return AnalysisResult.from_dict(data)
except (json.JSONDecodeError, KeyError) as e:
logger.warning(f"Failed to load analysis cache {cache_id}: {e}")
return None
def put(self, result: AnalysisResult) -> None:
"""Store analysis result in cache."""
path = self._path_for(result.cache_id)
with open(path, "w") as f:
json.dump(result.to_dict(), f, indent=2)
def has(self, cache_id: str) -> bool:
"""Check if analysis result is cached."""
return self._path_for(cache_id).exists()
def remove(self, cache_id: str) -> bool:
"""Remove cached analysis result."""
path = self._path_for(cache_id)
if path.exists():
path.unlink()
return True
return False
class Analyzer:
"""
Analyzes media inputs to extract features.
The Analyzer is the first phase of the 3-phase execution model.
It extracts features from inputs that inform downstream processing.
Example:
analyzer = Analyzer(cache_dir=Path("./analysis_cache"))
# Analyze a music file for beats
result = analyzer.analyze(
input_path=Path("/path/to/music.mp3"),
input_hash="abc123...",
features=["beats", "energy"]
)
print(f"Tempo: {result.tempo} BPM")
print(f"Beats: {result.beat_times}")
"""
def __init__(
self,
cache_dir: Optional[Path] = None,
content_cache: Optional["Cache"] = None, # artdag.Cache for input lookup
):
"""
Initialize the Analyzer.
Args:
cache_dir: Directory for analysis cache. If None, no caching.
content_cache: artdag Cache for looking up inputs by hash
"""
self.cache = AnalysisCache(cache_dir) if cache_dir else None
self.content_cache = content_cache
def get_input_path(self, input_hash: str, input_path: Optional[Path] = None) -> Path:
"""
Resolve input to a file path.
Args:
input_hash: Content hash of the input
input_path: Optional direct path to file
Returns:
Path to the input file
Raises:
ValueError: If input cannot be resolved
"""
if input_path and input_path.exists():
return input_path
if self.content_cache:
entry = self.content_cache.get(input_hash)
if entry:
return Path(entry.output_path)
raise ValueError(f"Cannot resolve input {input_hash}: no path provided and not in cache")
def analyze(
self,
input_hash: str,
features: List[str],
input_path: Optional[Path] = None,
media_type: Optional[str] = None,
) -> AnalysisResult:
"""
Analyze an input file and extract features.
Args:
input_hash: Content hash of the input (for cache key)
features: List of features to extract:
Audio: "beats", "tempo", "energy", "spectrum", "onsets"
Video: "metadata", "motion_tempo", "scene_changes"
Meta: "all" (extracts all relevant features)
input_path: Optional direct path to file
media_type: Optional hint ("audio", "video", or None for auto-detect)
Returns:
AnalysisResult with extracted features
"""
# Compute cache ID
temp_result = AnalysisResult(
input_hash=input_hash,
features_requested=sorted(features),
)
cache_id = temp_result.cache_id
# Check cache
if self.cache and self.cache.has(cache_id):
cached = self.cache.get(cache_id)
if cached:
logger.info(f"Analysis cache hit: {cache_id[:16]}...")
return cached
# Resolve input path
path = self.get_input_path(input_hash, input_path)
logger.info(f"Analyzing {path} for features: {features}")
# Detect media type if not specified
if media_type is None:
media_type = self._detect_media_type(path)
# Extract features
audio_features = None
video_features = None
# Normalize features
if "all" in features:
audio_features_list = [AUDIO_ALL]
video_features_list = [VIDEO_ALL]
else:
audio_features_list = [f for f in features if f in ("beats", "tempo", "energy", "spectrum", "onsets")]
video_features_list = [f for f in features if f in ("metadata", "motion_tempo", "scene_changes")]
if media_type in ("audio", "video") and audio_features_list:
try:
audio_features = analyze_audio(path, features=audio_features_list)
except Exception as e:
logger.warning(f"Audio analysis failed: {e}")
if media_type == "video" and video_features_list:
try:
video_features = analyze_video(path, features=video_features_list)
except Exception as e:
logger.warning(f"Video analysis failed: {e}")
result = AnalysisResult(
input_hash=input_hash,
features_requested=sorted(features),
audio=audio_features,
video=video_features,
analyzed_at=datetime.now(timezone.utc).isoformat(),
)
# Cache result
if self.cache:
self.cache.put(result)
return result
def analyze_multiple(
self,
inputs: Dict[str, Path],
features: List[str],
) -> Dict[str, AnalysisResult]:
"""
Analyze multiple inputs.
Args:
inputs: Dict mapping input_hash to file path
features: Features to extract from all inputs
Returns:
Dict mapping input_hash to AnalysisResult
"""
results = {}
for input_hash, input_path in inputs.items():
try:
results[input_hash] = self.analyze(
input_hash=input_hash,
features=features,
input_path=input_path,
)
except Exception as e:
logger.error(f"Analysis failed for {input_hash}: {e}")
raise
return results
def _detect_media_type(self, path: Path) -> str:
"""
Detect if file is audio or video.
Args:
path: Path to media file
Returns:
"audio" or "video"
"""
import subprocess
import json
cmd = [
"ffprobe", "-v", "quiet",
"-print_format", "json",
"-show_streams",
str(path)
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
data = json.loads(result.stdout)
streams = data.get("streams", [])
has_video = any(s.get("codec_type") == "video" for s in streams)
has_audio = any(s.get("codec_type") == "audio" for s in streams)
if has_video:
return "video"
elif has_audio:
return "audio"
else:
return "unknown"
except (subprocess.CalledProcessError, json.JSONDecodeError):
# Fall back to extension-based detection
ext = path.suffix.lower()
if ext in (".mp4", ".mov", ".avi", ".mkv", ".webm"):
return "video"
elif ext in (".mp3", ".wav", ".flac", ".ogg", ".m4a", ".aac"):
return "audio"
return "unknown"

336
artdag/analysis/audio.py Normal file
View File

@@ -0,0 +1,336 @@
# artdag/analysis/audio.py
"""
Audio feature extraction.
Uses librosa for beat detection, energy analysis, and spectral features.
Falls back to basic ffprobe if librosa is not available.
"""
import json
import logging
import subprocess
from pathlib import Path
from typing import List, Optional, Tuple
from .schema import AudioFeatures, BeatInfo, EnergyEnvelope, SpectrumBands
logger = logging.getLogger(__name__)
# Feature names for requesting specific analysis
FEATURE_BEATS = "beats"
FEATURE_TEMPO = "tempo"
FEATURE_ENERGY = "energy"
FEATURE_SPECTRUM = "spectrum"
FEATURE_ONSETS = "onsets"
FEATURE_ALL = "all"
def _get_audio_info_ffprobe(path: Path) -> Tuple[float, int, int]:
"""Get basic audio info using ffprobe."""
cmd = [
"ffprobe", "-v", "quiet",
"-print_format", "json",
"-show_streams",
"-select_streams", "a:0",
str(path)
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
data = json.loads(result.stdout)
if not data.get("streams"):
raise ValueError("No audio stream found")
stream = data["streams"][0]
duration = float(stream.get("duration", 0))
sample_rate = int(stream.get("sample_rate", 44100))
channels = int(stream.get("channels", 2))
return duration, sample_rate, channels
except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
logger.warning(f"ffprobe failed: {e}")
raise ValueError(f"Could not read audio info: {e}")
def _extract_audio_to_wav(path: Path, duration: Optional[float] = None) -> Path:
"""Extract audio to temporary WAV file for librosa processing."""
import tempfile
wav_path = Path(tempfile.mktemp(suffix=".wav"))
cmd = ["ffmpeg", "-y", "-i", str(path)]
if duration:
cmd.extend(["-t", str(duration)])
cmd.extend([
"-vn", # No video
"-acodec", "pcm_s16le",
"-ar", "22050", # Resample to 22050 Hz for librosa
"-ac", "1", # Mono
str(wav_path)
])
try:
subprocess.run(cmd, capture_output=True, check=True)
return wav_path
except subprocess.CalledProcessError as e:
logger.error(f"Audio extraction failed: {e.stderr}")
raise ValueError(f"Could not extract audio: {e}")
def analyze_beats(path: Path, sample_rate: int = 22050) -> BeatInfo:
"""
Detect beats and tempo using librosa.
Args:
path: Path to audio file (or pre-extracted WAV)
sample_rate: Sample rate for analysis
Returns:
BeatInfo with beat times, tempo, and confidence
"""
try:
import librosa
except ImportError:
raise ImportError("librosa required for beat detection. Install with: pip install librosa")
# Load audio
y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
# Detect tempo and beats
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
# Convert frames to times
beat_times = librosa.frames_to_time(beat_frames, sr=sr).tolist()
# Estimate confidence from onset strength consistency
onset_env = librosa.onset.onset_strength(y=y, sr=sr)
beat_strength = onset_env[beat_frames] if len(beat_frames) > 0 else []
confidence = float(beat_strength.mean() / onset_env.max()) if len(beat_strength) > 0 and onset_env.max() > 0 else 0.5
# Detect downbeats (first beat of each bar)
# Use beat phase to estimate bar positions
downbeat_times = None
if len(beat_times) >= 4:
# Assume 4/4 time signature, downbeats every 4 beats
downbeat_times = [beat_times[i] for i in range(0, len(beat_times), 4)]
return BeatInfo(
beat_times=beat_times,
tempo=float(tempo) if hasattr(tempo, '__float__') else float(tempo[0]) if len(tempo) > 0 else 120.0,
confidence=min(1.0, max(0.0, confidence)),
downbeat_times=downbeat_times,
time_signature=4,
)
def analyze_energy(path: Path, window_ms: float = 50.0, sample_rate: int = 22050) -> EnergyEnvelope:
"""
Extract energy (loudness) envelope.
Args:
path: Path to audio file
window_ms: Analysis window size in milliseconds
sample_rate: Sample rate for analysis
Returns:
EnergyEnvelope with times and normalized values
"""
try:
import librosa
import numpy as np
except ImportError:
raise ImportError("librosa and numpy required. Install with: pip install librosa numpy")
y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
# Calculate frame size from window_ms
hop_length = int(sr * window_ms / 1000)
# RMS energy
rms = librosa.feature.rms(y=y, hop_length=hop_length)[0]
# Normalize to 0-1
rms_max = rms.max()
if rms_max > 0:
rms_normalized = rms / rms_max
else:
rms_normalized = rms
# Generate time points
times = librosa.frames_to_time(np.arange(len(rms)), sr=sr, hop_length=hop_length)
return EnergyEnvelope(
times=times.tolist(),
values=rms_normalized.tolist(),
window_ms=window_ms,
)
def analyze_spectrum(
path: Path,
band_ranges: Optional[dict] = None,
window_ms: float = 50.0,
sample_rate: int = 22050
) -> SpectrumBands:
"""
Extract frequency band envelopes.
Args:
path: Path to audio file
band_ranges: Dict mapping band name to (low_hz, high_hz)
window_ms: Analysis window size
sample_rate: Sample rate
Returns:
SpectrumBands with bass, mid, high envelopes
"""
try:
import librosa
import numpy as np
except ImportError:
raise ImportError("librosa and numpy required")
if band_ranges is None:
band_ranges = {
"bass": (20, 200),
"mid": (200, 2000),
"high": (2000, 20000),
}
y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
hop_length = int(sr * window_ms / 1000)
# Compute STFT
n_fft = 2048
stft = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
# Frequency bins
freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
def band_energy(low_hz: float, high_hz: float) -> List[float]:
"""Sum energy in frequency band."""
mask = (freqs >= low_hz) & (freqs <= high_hz)
if not mask.any():
return [0.0] * stft.shape[1]
band = stft[mask, :].sum(axis=0)
# Normalize
band_max = band.max()
if band_max > 0:
band = band / band_max
return band.tolist()
times = librosa.frames_to_time(np.arange(stft.shape[1]), sr=sr, hop_length=hop_length)
return SpectrumBands(
bass=band_energy(*band_ranges["bass"]),
mid=band_energy(*band_ranges["mid"]),
high=band_energy(*band_ranges["high"]),
times=times.tolist(),
band_ranges=band_ranges,
)
def analyze_onsets(path: Path, sample_rate: int = 22050) -> List[float]:
"""
Detect onset times (note/sound starts).
Args:
path: Path to audio file
sample_rate: Sample rate
Returns:
List of onset times in seconds
"""
try:
import librosa
except ImportError:
raise ImportError("librosa required")
y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
# Detect onsets
onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
onset_times = librosa.frames_to_time(onset_frames, sr=sr)
return onset_times.tolist()
def analyze_audio(
path: Path,
features: Optional[List[str]] = None,
) -> AudioFeatures:
"""
Extract audio features from file.
Args:
path: Path to audio/video file
features: List of features to extract. Options:
- "beats": Beat detection (tempo, beat times)
- "energy": Loudness envelope
- "spectrum": Frequency band envelopes
- "onsets": Note onset times
- "all": All features
Returns:
AudioFeatures with requested analysis
"""
if features is None:
features = [FEATURE_ALL]
# Normalize features
if FEATURE_ALL in features:
features = [FEATURE_BEATS, FEATURE_ENERGY, FEATURE_SPECTRUM, FEATURE_ONSETS]
# Get basic info via ffprobe
duration, sample_rate, channels = _get_audio_info_ffprobe(path)
result = AudioFeatures(
duration=duration,
sample_rate=sample_rate,
channels=channels,
)
# Check if librosa is available for advanced features
try:
import librosa # noqa: F401
has_librosa = True
except ImportError:
has_librosa = False
if any(f in features for f in [FEATURE_BEATS, FEATURE_ENERGY, FEATURE_SPECTRUM, FEATURE_ONSETS]):
logger.warning("librosa not available, skipping advanced audio features")
if not has_librosa:
return result
# Extract audio to WAV for librosa
wav_path = None
try:
wav_path = _extract_audio_to_wav(path)
if FEATURE_BEATS in features or FEATURE_TEMPO in features:
try:
result.beats = analyze_beats(wav_path)
except Exception as e:
logger.warning(f"Beat detection failed: {e}")
if FEATURE_ENERGY in features:
try:
result.energy = analyze_energy(wav_path)
except Exception as e:
logger.warning(f"Energy analysis failed: {e}")
if FEATURE_SPECTRUM in features:
try:
result.spectrum = analyze_spectrum(wav_path)
except Exception as e:
logger.warning(f"Spectrum analysis failed: {e}")
if FEATURE_ONSETS in features:
try:
result.onsets = analyze_onsets(wav_path)
except Exception as e:
logger.warning(f"Onset detection failed: {e}")
finally:
# Clean up temporary WAV file
if wav_path and wav_path.exists():
wav_path.unlink()
return result

352
artdag/analysis/schema.py Normal file
View File

@@ -0,0 +1,352 @@
# artdag/analysis/schema.py
"""
Data structures for analysis results.
Analysis extracts features from input media that inform downstream processing.
Results are cached by: analysis_cache_id = SHA3-256(input_hash + sorted(features))
"""
import hashlib
import json
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Tuple
def _stable_hash(data: Any, algorithm: str = "sha3_256") -> str:
"""Create stable hash from arbitrary data."""
json_str = json.dumps(data, sort_keys=True, separators=(",", ":"))
hasher = hashlib.new(algorithm)
hasher.update(json_str.encode())
return hasher.hexdigest()
@dataclass
class BeatInfo:
"""
Beat detection results.
Attributes:
beat_times: List of beat positions in seconds
tempo: Estimated tempo in BPM
confidence: Tempo detection confidence (0-1)
downbeat_times: First beat of each bar (if detected)
time_signature: Detected or assumed time signature (e.g., 4)
"""
beat_times: List[float]
tempo: float
confidence: float = 1.0
downbeat_times: Optional[List[float]] = None
time_signature: int = 4
def to_dict(self) -> Dict[str, Any]:
return {
"beat_times": self.beat_times,
"tempo": self.tempo,
"confidence": self.confidence,
"downbeat_times": self.downbeat_times,
"time_signature": self.time_signature,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "BeatInfo":
return cls(
beat_times=data["beat_times"],
tempo=data["tempo"],
confidence=data.get("confidence", 1.0),
downbeat_times=data.get("downbeat_times"),
time_signature=data.get("time_signature", 4),
)
@dataclass
class EnergyEnvelope:
"""
Energy (loudness) over time.
Attributes:
times: Time points in seconds
values: Energy values (0-1, normalized)
window_ms: Analysis window size in milliseconds
"""
times: List[float]
values: List[float]
window_ms: float = 50.0
def to_dict(self) -> Dict[str, Any]:
return {
"times": self.times,
"values": self.values,
"window_ms": self.window_ms,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "EnergyEnvelope":
return cls(
times=data["times"],
values=data["values"],
window_ms=data.get("window_ms", 50.0),
)
def at_time(self, t: float) -> float:
"""Interpolate energy value at given time."""
if not self.times:
return 0.0
if t <= self.times[0]:
return self.values[0]
if t >= self.times[-1]:
return self.values[-1]
# Binary search for bracketing indices
lo, hi = 0, len(self.times) - 1
while hi - lo > 1:
mid = (lo + hi) // 2
if self.times[mid] <= t:
lo = mid
else:
hi = mid
# Linear interpolation
t0, t1 = self.times[lo], self.times[hi]
v0, v1 = self.values[lo], self.values[hi]
alpha = (t - t0) / (t1 - t0) if t1 != t0 else 0
return v0 + alpha * (v1 - v0)
@dataclass
class SpectrumBands:
"""
Frequency band envelopes over time.
Attributes:
bass: Low frequency envelope (20-200 Hz typical)
mid: Mid frequency envelope (200-2000 Hz typical)
high: High frequency envelope (2000-20000 Hz typical)
times: Time points in seconds
band_ranges: Frequency ranges for each band in Hz
"""
bass: List[float]
mid: List[float]
high: List[float]
times: List[float]
band_ranges: Dict[str, Tuple[float, float]] = field(default_factory=lambda: {
"bass": (20, 200),
"mid": (200, 2000),
"high": (2000, 20000),
})
def to_dict(self) -> Dict[str, Any]:
return {
"bass": self.bass,
"mid": self.mid,
"high": self.high,
"times": self.times,
"band_ranges": self.band_ranges,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "SpectrumBands":
return cls(
bass=data["bass"],
mid=data["mid"],
high=data["high"],
times=data["times"],
band_ranges=data.get("band_ranges", {
"bass": (20, 200),
"mid": (200, 2000),
"high": (2000, 20000),
}),
)
@dataclass
class AudioFeatures:
"""
All extracted audio features.
Attributes:
duration: Audio duration in seconds
sample_rate: Sample rate in Hz
channels: Number of audio channels
beats: Beat detection results
energy: Energy envelope
spectrum: Frequency band envelopes
onsets: Note/sound onset times
"""
duration: float
sample_rate: int
channels: int
beats: Optional[BeatInfo] = None
energy: Optional[EnergyEnvelope] = None
spectrum: Optional[SpectrumBands] = None
onsets: Optional[List[float]] = None
def to_dict(self) -> Dict[str, Any]:
return {
"duration": self.duration,
"sample_rate": self.sample_rate,
"channels": self.channels,
"beats": self.beats.to_dict() if self.beats else None,
"energy": self.energy.to_dict() if self.energy else None,
"spectrum": self.spectrum.to_dict() if self.spectrum else None,
"onsets": self.onsets,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "AudioFeatures":
return cls(
duration=data["duration"],
sample_rate=data["sample_rate"],
channels=data["channels"],
beats=BeatInfo.from_dict(data["beats"]) if data.get("beats") else None,
energy=EnergyEnvelope.from_dict(data["energy"]) if data.get("energy") else None,
spectrum=SpectrumBands.from_dict(data["spectrum"]) if data.get("spectrum") else None,
onsets=data.get("onsets"),
)
@dataclass
class VideoFeatures:
"""
Extracted video features.
Attributes:
duration: Video duration in seconds
frame_rate: Frames per second
width: Frame width in pixels
height: Frame height in pixels
codec: Video codec name
motion_tempo: Estimated tempo from motion analysis (optional)
scene_changes: Times of detected scene changes
"""
duration: float
frame_rate: float
width: int
height: int
codec: str = ""
motion_tempo: Optional[float] = None
scene_changes: Optional[List[float]] = None
def to_dict(self) -> Dict[str, Any]:
return {
"duration": self.duration,
"frame_rate": self.frame_rate,
"width": self.width,
"height": self.height,
"codec": self.codec,
"motion_tempo": self.motion_tempo,
"scene_changes": self.scene_changes,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "VideoFeatures":
return cls(
duration=data["duration"],
frame_rate=data["frame_rate"],
width=data["width"],
height=data["height"],
codec=data.get("codec", ""),
motion_tempo=data.get("motion_tempo"),
scene_changes=data.get("scene_changes"),
)
@dataclass
class AnalysisResult:
"""
Complete analysis result for an input.
Combines audio and video features with metadata for caching.
Attributes:
input_hash: Content hash of the analyzed input
features_requested: List of features that were requested
audio: Audio features (if input has audio)
video: Video features (if input has video)
cache_id: Computed cache ID for this analysis
analyzed_at: Timestamp of analysis
"""
input_hash: str
features_requested: List[str]
audio: Optional[AudioFeatures] = None
video: Optional[VideoFeatures] = None
cache_id: Optional[str] = None
analyzed_at: Optional[str] = None
def __post_init__(self):
"""Compute cache_id if not provided."""
if self.cache_id is None:
self.cache_id = self._compute_cache_id()
def _compute_cache_id(self) -> str:
"""
Compute cache ID from input hash and requested features.
cache_id = SHA3-256(input_hash + sorted(features_requested))
"""
content = {
"input_hash": self.input_hash,
"features": sorted(self.features_requested),
}
return _stable_hash(content)
def to_dict(self) -> Dict[str, Any]:
return {
"input_hash": self.input_hash,
"features_requested": self.features_requested,
"audio": self.audio.to_dict() if self.audio else None,
"video": self.video.to_dict() if self.video else None,
"cache_id": self.cache_id,
"analyzed_at": self.analyzed_at,
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "AnalysisResult":
return cls(
input_hash=data["input_hash"],
features_requested=data["features_requested"],
audio=AudioFeatures.from_dict(data["audio"]) if data.get("audio") else None,
video=VideoFeatures.from_dict(data["video"]) if data.get("video") else None,
cache_id=data.get("cache_id"),
analyzed_at=data.get("analyzed_at"),
)
def to_json(self) -> str:
"""Serialize to JSON string."""
return json.dumps(self.to_dict(), indent=2)
@classmethod
def from_json(cls, json_str: str) -> "AnalysisResult":
"""Deserialize from JSON string."""
return cls.from_dict(json.loads(json_str))
# Convenience accessors
@property
def tempo(self) -> Optional[float]:
"""Get tempo if beats were analyzed."""
return self.audio.beats.tempo if self.audio and self.audio.beats else None
@property
def beat_times(self) -> Optional[List[float]]:
"""Get beat times if beats were analyzed."""
return self.audio.beats.beat_times if self.audio and self.audio.beats else None
@property
def downbeat_times(self) -> Optional[List[float]]:
"""Get downbeat times if analyzed."""
return self.audio.beats.downbeat_times if self.audio and self.audio.beats else None
@property
def duration(self) -> float:
"""Get duration from video or audio."""
if self.video:
return self.video.duration
if self.audio:
return self.audio.duration
return 0.0
@property
def dimensions(self) -> Optional[Tuple[int, int]]:
"""Get video dimensions if available."""
if self.video:
return (self.video.width, self.video.height)
return None

266
artdag/analysis/video.py Normal file
View File

@@ -0,0 +1,266 @@
# artdag/analysis/video.py
"""
Video feature extraction.
Uses ffprobe for basic metadata and optional OpenCV for motion analysis.
"""
import json
import logging
import subprocess
from fractions import Fraction
from pathlib import Path
from typing import List, Optional
from .schema import VideoFeatures
logger = logging.getLogger(__name__)
# Feature names
FEATURE_METADATA = "metadata"
FEATURE_MOTION_TEMPO = "motion_tempo"
FEATURE_SCENE_CHANGES = "scene_changes"
FEATURE_ALL = "all"
def _parse_frame_rate(rate_str: str) -> float:
"""Parse frame rate string like '30000/1001' or '30'."""
try:
if "/" in rate_str:
frac = Fraction(rate_str)
return float(frac)
return float(rate_str)
except (ValueError, ZeroDivisionError):
return 30.0 # Default
def analyze_metadata(path: Path) -> VideoFeatures:
"""
Extract video metadata using ffprobe.
Args:
path: Path to video file
Returns:
VideoFeatures with basic metadata
"""
cmd = [
"ffprobe", "-v", "quiet",
"-print_format", "json",
"-show_streams",
"-show_format",
"-select_streams", "v:0",
str(path)
]
try:
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
data = json.loads(result.stdout)
except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
raise ValueError(f"Could not read video info: {e}")
if not data.get("streams"):
raise ValueError("No video stream found")
stream = data["streams"][0]
fmt = data.get("format", {})
# Get duration from format or stream
duration = float(fmt.get("duration", stream.get("duration", 0)))
# Parse frame rate
frame_rate = _parse_frame_rate(stream.get("avg_frame_rate", "30"))
return VideoFeatures(
duration=duration,
frame_rate=frame_rate,
width=int(stream.get("width", 0)),
height=int(stream.get("height", 0)),
codec=stream.get("codec_name", ""),
)
def analyze_scene_changes(path: Path, threshold: float = 0.3) -> List[float]:
"""
Detect scene changes using ffmpeg scene detection.
Args:
path: Path to video file
threshold: Scene change threshold (0-1, lower = more sensitive)
Returns:
List of scene change times in seconds
"""
cmd = [
"ffmpeg", "-i", str(path),
"-vf", f"select='gt(scene,{threshold})',showinfo",
"-f", "null", "-"
]
try:
result = subprocess.run(cmd, capture_output=True, text=True)
stderr = result.stderr
except subprocess.CalledProcessError as e:
logger.warning(f"Scene detection failed: {e}")
return []
# Parse scene change times from ffmpeg output
scene_times = []
for line in stderr.split("\n"):
if "pts_time:" in line:
try:
# Extract pts_time value
for part in line.split():
if part.startswith("pts_time:"):
time_str = part.split(":")[1]
scene_times.append(float(time_str))
break
except (ValueError, IndexError):
continue
return scene_times
def analyze_motion_tempo(path: Path, sample_duration: float = 30.0) -> Optional[float]:
"""
Estimate tempo from video motion periodicity.
Analyzes optical flow or frame differences to detect rhythmic motion.
This is useful for matching video speed to audio tempo.
Args:
path: Path to video file
sample_duration: Duration to analyze (seconds)
Returns:
Estimated motion tempo in BPM, or None if not detectable
"""
try:
import cv2
import numpy as np
except ImportError:
logger.warning("OpenCV not available, skipping motion tempo analysis")
return None
cap = cv2.VideoCapture(str(path))
if not cap.isOpened():
logger.warning(f"Could not open video: {path}")
return None
try:
fps = cap.get(cv2.CAP_PROP_FPS)
if fps <= 0:
fps = 30.0
max_frames = int(sample_duration * fps)
frame_diffs = []
prev_gray = None
frame_count = 0
while frame_count < max_frames:
ret, frame = cap.read()
if not ret:
break
# Convert to grayscale and resize for speed
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.resize(gray, (160, 90))
if prev_gray is not None:
# Calculate frame difference
diff = cv2.absdiff(gray, prev_gray)
frame_diffs.append(np.mean(diff))
prev_gray = gray
frame_count += 1
if len(frame_diffs) < 60: # Need at least 2 seconds at 30fps
return None
# Convert to numpy array
motion = np.array(frame_diffs)
# Normalize
motion = motion - motion.mean()
if motion.std() > 0:
motion = motion / motion.std()
# Autocorrelation to find periodicity
n = len(motion)
acf = np.correlate(motion, motion, mode="full")[n-1:]
acf = acf / acf[0] # Normalize
# Find peaks in autocorrelation (potential beat periods)
# Look for periods between 0.3s (200 BPM) and 2s (30 BPM)
min_lag = int(0.3 * fps)
max_lag = min(int(2.0 * fps), len(acf) - 1)
if max_lag <= min_lag:
return None
# Find the highest peak in the valid range
search_range = acf[min_lag:max_lag]
if len(search_range) == 0:
return None
peak_idx = np.argmax(search_range) + min_lag
peak_value = acf[peak_idx]
# Only report if peak is significant
if peak_value < 0.1:
return None
# Convert lag to BPM
period_seconds = peak_idx / fps
bpm = 60.0 / period_seconds
# Sanity check
if 30 <= bpm <= 200:
return round(bpm, 1)
return None
finally:
cap.release()
def analyze_video(
path: Path,
features: Optional[List[str]] = None,
) -> VideoFeatures:
"""
Extract video features from file.
Args:
path: Path to video file
features: List of features to extract. Options:
- "metadata": Basic video info (always included)
- "motion_tempo": Estimated tempo from motion
- "scene_changes": Scene change detection
- "all": All features
Returns:
VideoFeatures with requested analysis
"""
if features is None:
features = [FEATURE_METADATA]
if FEATURE_ALL in features:
features = [FEATURE_METADATA, FEATURE_MOTION_TEMPO, FEATURE_SCENE_CHANGES]
# Basic metadata is always extracted
result = analyze_metadata(path)
if FEATURE_MOTION_TEMPO in features:
try:
result.motion_tempo = analyze_motion_tempo(path)
except Exception as e:
logger.warning(f"Motion tempo analysis failed: {e}")
if FEATURE_SCENE_CHANGES in features:
try:
result.scene_changes = analyze_scene_changes(path)
except Exception as e:
logger.warning(f"Scene change detection failed: {e}")
return result