Squashed 'core/' content from commit 4957443

git-subtree-dir: core git-subtree-split: 4957443184ae0eb6323635a90a19acffb3e01d07
2026-02-24 23:09:39 +00:00
commit cc2dcbddd4
80 changed files with 25711 additions and 0 deletions
--- a/artdag/analysis/init.py
+++ b/artdag/analysis/init.py
@@ -0,0 +1,26 @@
+# artdag/analysis - Audio and video feature extraction
+#
+# Provides the Analysis phase of the 3-phase execution model:
+# 1. ANALYZE - Extract features from inputs
+# 2. PLAN - Generate execution plan with cache IDs
+# 3. EXECUTE - Run steps with caching
+
+from .schema import (
+    AnalysisResult,
+    AudioFeatures,
+    VideoFeatures,
+    BeatInfo,
+    EnergyEnvelope,
+    SpectrumBands,
+)
+from .analyzer import Analyzer
+
+__all__ = [
+    "Analyzer",
+    "AnalysisResult",
+    "AudioFeatures",
+    "VideoFeatures",
+    "BeatInfo",
+    "EnergyEnvelope",
+    "SpectrumBands",
+]
--- a/artdag/analysis/analyzer.py
+++ b/artdag/analysis/analyzer.py
@@ -0,0 +1,282 @@
+# artdag/analysis/analyzer.py
+"""
+Main Analyzer class for the Analysis phase.
+
+Coordinates audio and video feature extraction with caching.
+"""
+
+import json
+import logging
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Dict, List, Optional
+
+from .schema import AnalysisResult, AudioFeatures, VideoFeatures
+from .audio import analyze_audio, FEATURE_ALL as AUDIO_ALL
+from .video import analyze_video, FEATURE_ALL as VIDEO_ALL
+
+logger = logging.getLogger(__name__)
+
+
+class AnalysisCache:
+    """
+    Simple file-based cache for analysis results.
+
+    Stores results as JSON files keyed by analysis cache_id.
+    """
+
+    def __init__(self, cache_dir: Path):
+        self.cache_dir = Path(cache_dir)
+        self.cache_dir.mkdir(parents=True, exist_ok=True)
+
+    def _path_for(self, cache_id: str) -> Path:
+        """Get cache file path for a cache_id."""
+        return self.cache_dir / f"{cache_id}.json"
+
+    def get(self, cache_id: str) -> Optional[AnalysisResult]:
+        """Retrieve cached analysis result."""
+        path = self._path_for(cache_id)
+        if not path.exists():
+            return None
+
+        try:
+            with open(path, "r") as f:
+                data = json.load(f)
+            return AnalysisResult.from_dict(data)
+        except (json.JSONDecodeError, KeyError) as e:
+            logger.warning(f"Failed to load analysis cache {cache_id}: {e}")
+            return None
+
+    def put(self, result: AnalysisResult) -> None:
+        """Store analysis result in cache."""
+        path = self._path_for(result.cache_id)
+        with open(path, "w") as f:
+            json.dump(result.to_dict(), f, indent=2)
+
+    def has(self, cache_id: str) -> bool:
+        """Check if analysis result is cached."""
+        return self._path_for(cache_id).exists()
+
+    def remove(self, cache_id: str) -> bool:
+        """Remove cached analysis result."""
+        path = self._path_for(cache_id)
+        if path.exists():
+            path.unlink()
+            return True
+        return False
+
+
+class Analyzer:
+    """
+    Analyzes media inputs to extract features.
+
+    The Analyzer is the first phase of the 3-phase execution model.
+    It extracts features from inputs that inform downstream processing.
+
+    Example:
+        analyzer = Analyzer(cache_dir=Path("./analysis_cache"))
+
+        # Analyze a music file for beats
+        result = analyzer.analyze(
+            input_path=Path("/path/to/music.mp3"),
+            input_hash="abc123...",
+            features=["beats", "energy"]
+        )
+
+        print(f"Tempo: {result.tempo} BPM")
+        print(f"Beats: {result.beat_times}")
+    """
+
+    def __init__(
+        self,
+        cache_dir: Optional[Path] = None,
+        content_cache: Optional["Cache"] = None,  # artdag.Cache for input lookup
+    ):
+        """
+        Initialize the Analyzer.
+
+        Args:
+            cache_dir: Directory for analysis cache. If None, no caching.
+            content_cache: artdag Cache for looking up inputs by hash
+        """
+        self.cache = AnalysisCache(cache_dir) if cache_dir else None
+        self.content_cache = content_cache
+
+    def get_input_path(self, input_hash: str, input_path: Optional[Path] = None) -> Path:
+        """
+        Resolve input to a file path.
+
+        Args:
+            input_hash: Content hash of the input
+            input_path: Optional direct path to file
+
+        Returns:
+            Path to the input file
+
+        Raises:
+            ValueError: If input cannot be resolved
+        """
+        if input_path and input_path.exists():
+            return input_path
+
+        if self.content_cache:
+            entry = self.content_cache.get(input_hash)
+            if entry:
+                return Path(entry.output_path)
+
+        raise ValueError(f"Cannot resolve input {input_hash}: no path provided and not in cache")
+
+    def analyze(
+        self,
+        input_hash: str,
+        features: List[str],
+        input_path: Optional[Path] = None,
+        media_type: Optional[str] = None,
+    ) -> AnalysisResult:
+        """
+        Analyze an input file and extract features.
+
+        Args:
+            input_hash: Content hash of the input (for cache key)
+            features: List of features to extract:
+                Audio: "beats", "tempo", "energy", "spectrum", "onsets"
+                Video: "metadata", "motion_tempo", "scene_changes"
+                Meta: "all" (extracts all relevant features)
+            input_path: Optional direct path to file
+            media_type: Optional hint ("audio", "video", or None for auto-detect)
+
+        Returns:
+            AnalysisResult with extracted features
+        """
+        # Compute cache ID
+        temp_result = AnalysisResult(
+            input_hash=input_hash,
+            features_requested=sorted(features),
+        )
+        cache_id = temp_result.cache_id
+
+        # Check cache
+        if self.cache and self.cache.has(cache_id):
+            cached = self.cache.get(cache_id)
+            if cached:
+                logger.info(f"Analysis cache hit: {cache_id[:16]}...")
+                return cached
+
+        # Resolve input path
+        path = self.get_input_path(input_hash, input_path)
+        logger.info(f"Analyzing {path} for features: {features}")
+
+        # Detect media type if not specified
+        if media_type is None:
+            media_type = self._detect_media_type(path)
+
+        # Extract features
+        audio_features = None
+        video_features = None
+
+        # Normalize features
+        if "all" in features:
+            audio_features_list = [AUDIO_ALL]
+            video_features_list = [VIDEO_ALL]
+        else:
+            audio_features_list = [f for f in features if f in ("beats", "tempo", "energy", "spectrum", "onsets")]
+            video_features_list = [f for f in features if f in ("metadata", "motion_tempo", "scene_changes")]
+
+        if media_type in ("audio", "video") and audio_features_list:
+            try:
+                audio_features = analyze_audio(path, features=audio_features_list)
+            except Exception as e:
+                logger.warning(f"Audio analysis failed: {e}")
+
+        if media_type == "video" and video_features_list:
+            try:
+                video_features = analyze_video(path, features=video_features_list)
+            except Exception as e:
+                logger.warning(f"Video analysis failed: {e}")
+
+        result = AnalysisResult(
+            input_hash=input_hash,
+            features_requested=sorted(features),
+            audio=audio_features,
+            video=video_features,
+            analyzed_at=datetime.now(timezone.utc).isoformat(),
+        )
+
+        # Cache result
+        if self.cache:
+            self.cache.put(result)
+
+        return result
+
+    def analyze_multiple(
+        self,
+        inputs: Dict[str, Path],
+        features: List[str],
+    ) -> Dict[str, AnalysisResult]:
+        """
+        Analyze multiple inputs.
+
+        Args:
+            inputs: Dict mapping input_hash to file path
+            features: Features to extract from all inputs
+
+        Returns:
+            Dict mapping input_hash to AnalysisResult
+        """
+        results = {}
+        for input_hash, input_path in inputs.items():
+            try:
+                results[input_hash] = self.analyze(
+                    input_hash=input_hash,
+                    features=features,
+                    input_path=input_path,
+                )
+            except Exception as e:
+                logger.error(f"Analysis failed for {input_hash}: {e}")
+                raise
+
+        return results
+
+    def _detect_media_type(self, path: Path) -> str:
+        """
+        Detect if file is audio or video.
+
+        Args:
+            path: Path to media file
+
+        Returns:
+            "audio" or "video"
+        """
+        import subprocess
+        import json
+
+        cmd = [
+            "ffprobe", "-v", "quiet",
+            "-print_format", "json",
+            "-show_streams",
+            str(path)
+        ]
+
+        try:
+            result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+            data = json.loads(result.stdout)
+            streams = data.get("streams", [])
+
+            has_video = any(s.get("codec_type") == "video" for s in streams)
+            has_audio = any(s.get("codec_type") == "audio" for s in streams)
+
+            if has_video:
+                return "video"
+            elif has_audio:
+                return "audio"
+            else:
+                return "unknown"
+
+        except (subprocess.CalledProcessError, json.JSONDecodeError):
+            # Fall back to extension-based detection
+            ext = path.suffix.lower()
+            if ext in (".mp4", ".mov", ".avi", ".mkv", ".webm"):
+                return "video"
+            elif ext in (".mp3", ".wav", ".flac", ".ogg", ".m4a", ".aac"):
+                return "audio"
+            return "unknown"
--- a/artdag/analysis/audio.py
+++ b/artdag/analysis/audio.py
@@ -0,0 +1,336 @@
+# artdag/analysis/audio.py
+"""
+Audio feature extraction.
+
+Uses librosa for beat detection, energy analysis, and spectral features.
+Falls back to basic ffprobe if librosa is not available.
+"""
+
+import json
+import logging
+import subprocess
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+from .schema import AudioFeatures, BeatInfo, EnergyEnvelope, SpectrumBands
+
+logger = logging.getLogger(__name__)
+
+# Feature names for requesting specific analysis
+FEATURE_BEATS = "beats"
+FEATURE_TEMPO = "tempo"
+FEATURE_ENERGY = "energy"
+FEATURE_SPECTRUM = "spectrum"
+FEATURE_ONSETS = "onsets"
+FEATURE_ALL = "all"
+
+
+def _get_audio_info_ffprobe(path: Path) -> Tuple[float, int, int]:
+    """Get basic audio info using ffprobe."""
+    cmd = [
+        "ffprobe", "-v", "quiet",
+        "-print_format", "json",
+        "-show_streams",
+        "-select_streams", "a:0",
+        str(path)
+    ]
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        data = json.loads(result.stdout)
+        if not data.get("streams"):
+            raise ValueError("No audio stream found")
+
+        stream = data["streams"][0]
+        duration = float(stream.get("duration", 0))
+        sample_rate = int(stream.get("sample_rate", 44100))
+        channels = int(stream.get("channels", 2))
+        return duration, sample_rate, channels
+    except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError) as e:
+        logger.warning(f"ffprobe failed: {e}")
+        raise ValueError(f"Could not read audio info: {e}")
+
+
+def _extract_audio_to_wav(path: Path, duration: Optional[float] = None) -> Path:
+    """Extract audio to temporary WAV file for librosa processing."""
+    import tempfile
+    wav_path = Path(tempfile.mktemp(suffix=".wav"))
+
+    cmd = ["ffmpeg", "-y", "-i", str(path)]
+    if duration:
+        cmd.extend(["-t", str(duration)])
+    cmd.extend([
+        "-vn",  # No video
+        "-acodec", "pcm_s16le",
+        "-ar", "22050",  # Resample to 22050 Hz for librosa
+        "-ac", "1",  # Mono
+        str(wav_path)
+    ])
+
+    try:
+        subprocess.run(cmd, capture_output=True, check=True)
+        return wav_path
+    except subprocess.CalledProcessError as e:
+        logger.error(f"Audio extraction failed: {e.stderr}")
+        raise ValueError(f"Could not extract audio: {e}")
+
+
+def analyze_beats(path: Path, sample_rate: int = 22050) -> BeatInfo:
+    """
+    Detect beats and tempo using librosa.
+
+    Args:
+        path: Path to audio file (or pre-extracted WAV)
+        sample_rate: Sample rate for analysis
+
+    Returns:
+        BeatInfo with beat times, tempo, and confidence
+    """
+    try:
+        import librosa
+    except ImportError:
+        raise ImportError("librosa required for beat detection. Install with: pip install librosa")
+
+    # Load audio
+    y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
+
+    # Detect tempo and beats
+    tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
+
+    # Convert frames to times
+    beat_times = librosa.frames_to_time(beat_frames, sr=sr).tolist()
+
+    # Estimate confidence from onset strength consistency
+    onset_env = librosa.onset.onset_strength(y=y, sr=sr)
+    beat_strength = onset_env[beat_frames] if len(beat_frames) > 0 else []
+    confidence = float(beat_strength.mean() / onset_env.max()) if len(beat_strength) > 0 and onset_env.max() > 0 else 0.5
+
+    # Detect downbeats (first beat of each bar)
+    # Use beat phase to estimate bar positions
+    downbeat_times = None
+    if len(beat_times) >= 4:
+        # Assume 4/4 time signature, downbeats every 4 beats
+        downbeat_times = [beat_times[i] for i in range(0, len(beat_times), 4)]
+
+    return BeatInfo(
+        beat_times=beat_times,
+        tempo=float(tempo) if hasattr(tempo, '__float__') else float(tempo[0]) if len(tempo) > 0 else 120.0,
+        confidence=min(1.0, max(0.0, confidence)),
+        downbeat_times=downbeat_times,
+        time_signature=4,
+    )
+
+
+def analyze_energy(path: Path, window_ms: float = 50.0, sample_rate: int = 22050) -> EnergyEnvelope:
+    """
+    Extract energy (loudness) envelope.
+
+    Args:
+        path: Path to audio file
+        window_ms: Analysis window size in milliseconds
+        sample_rate: Sample rate for analysis
+
+    Returns:
+        EnergyEnvelope with times and normalized values
+    """
+    try:
+        import librosa
+        import numpy as np
+    except ImportError:
+        raise ImportError("librosa and numpy required. Install with: pip install librosa numpy")
+
+    y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
+
+    # Calculate frame size from window_ms
+    hop_length = int(sr * window_ms / 1000)
+
+    # RMS energy
+    rms = librosa.feature.rms(y=y, hop_length=hop_length)[0]
+
+    # Normalize to 0-1
+    rms_max = rms.max()
+    if rms_max > 0:
+        rms_normalized = rms / rms_max
+    else:
+        rms_normalized = rms
+
+    # Generate time points
+    times = librosa.frames_to_time(np.arange(len(rms)), sr=sr, hop_length=hop_length)
+
+    return EnergyEnvelope(
+        times=times.tolist(),
+        values=rms_normalized.tolist(),
+        window_ms=window_ms,
+    )
+
+
+def analyze_spectrum(
+    path: Path,
+    band_ranges: Optional[dict] = None,
+    window_ms: float = 50.0,
+    sample_rate: int = 22050
+) -> SpectrumBands:
+    """
+    Extract frequency band envelopes.
+
+    Args:
+        path: Path to audio file
+        band_ranges: Dict mapping band name to (low_hz, high_hz)
+        window_ms: Analysis window size
+        sample_rate: Sample rate
+
+    Returns:
+        SpectrumBands with bass, mid, high envelopes
+    """
+    try:
+        import librosa
+        import numpy as np
+    except ImportError:
+        raise ImportError("librosa and numpy required")
+
+    if band_ranges is None:
+        band_ranges = {
+            "bass": (20, 200),
+            "mid": (200, 2000),
+            "high": (2000, 20000),
+        }
+
+    y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
+    hop_length = int(sr * window_ms / 1000)
+
+    # Compute STFT
+    n_fft = 2048
+    stft = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))
+
+    # Frequency bins
+    freqs = librosa.fft_frequencies(sr=sr, n_fft=n_fft)
+
+    def band_energy(low_hz: float, high_hz: float) -> List[float]:
+        """Sum energy in frequency band."""
+        mask = (freqs >= low_hz) & (freqs <= high_hz)
+        if not mask.any():
+            return [0.0] * stft.shape[1]
+        band = stft[mask, :].sum(axis=0)
+        # Normalize
+        band_max = band.max()
+        if band_max > 0:
+            band = band / band_max
+        return band.tolist()
+
+    times = librosa.frames_to_time(np.arange(stft.shape[1]), sr=sr, hop_length=hop_length)
+
+    return SpectrumBands(
+        bass=band_energy(*band_ranges["bass"]),
+        mid=band_energy(*band_ranges["mid"]),
+        high=band_energy(*band_ranges["high"]),
+        times=times.tolist(),
+        band_ranges=band_ranges,
+    )
+
+
+def analyze_onsets(path: Path, sample_rate: int = 22050) -> List[float]:
+    """
+    Detect onset times (note/sound starts).
+
+    Args:
+        path: Path to audio file
+        sample_rate: Sample rate
+
+    Returns:
+        List of onset times in seconds
+    """
+    try:
+        import librosa
+    except ImportError:
+        raise ImportError("librosa required")
+
+    y, sr = librosa.load(str(path), sr=sample_rate, mono=True)
+
+    # Detect onsets
+    onset_frames = librosa.onset.onset_detect(y=y, sr=sr)
+    onset_times = librosa.frames_to_time(onset_frames, sr=sr)
+
+    return onset_times.tolist()
+
+
+def analyze_audio(
+    path: Path,
+    features: Optional[List[str]] = None,
+) -> AudioFeatures:
+    """
+    Extract audio features from file.
+
+    Args:
+        path: Path to audio/video file
+        features: List of features to extract. Options:
+            - "beats": Beat detection (tempo, beat times)
+            - "energy": Loudness envelope
+            - "spectrum": Frequency band envelopes
+            - "onsets": Note onset times
+            - "all": All features
+
+    Returns:
+        AudioFeatures with requested analysis
+    """
+    if features is None:
+        features = [FEATURE_ALL]
+
+    # Normalize features
+    if FEATURE_ALL in features:
+        features = [FEATURE_BEATS, FEATURE_ENERGY, FEATURE_SPECTRUM, FEATURE_ONSETS]
+
+    # Get basic info via ffprobe
+    duration, sample_rate, channels = _get_audio_info_ffprobe(path)
+
+    result = AudioFeatures(
+        duration=duration,
+        sample_rate=sample_rate,
+        channels=channels,
+    )
+
+    # Check if librosa is available for advanced features
+    try:
+        import librosa  # noqa: F401
+        has_librosa = True
+    except ImportError:
+        has_librosa = False
+        if any(f in features for f in [FEATURE_BEATS, FEATURE_ENERGY, FEATURE_SPECTRUM, FEATURE_ONSETS]):
+            logger.warning("librosa not available, skipping advanced audio features")
+
+    if not has_librosa:
+        return result
+
+    # Extract audio to WAV for librosa
+    wav_path = None
+    try:
+        wav_path = _extract_audio_to_wav(path)
+
+        if FEATURE_BEATS in features or FEATURE_TEMPO in features:
+            try:
+                result.beats = analyze_beats(wav_path)
+            except Exception as e:
+                logger.warning(f"Beat detection failed: {e}")
+
+        if FEATURE_ENERGY in features:
+            try:
+                result.energy = analyze_energy(wav_path)
+            except Exception as e:
+                logger.warning(f"Energy analysis failed: {e}")
+
+        if FEATURE_SPECTRUM in features:
+            try:
+                result.spectrum = analyze_spectrum(wav_path)
+            except Exception as e:
+                logger.warning(f"Spectrum analysis failed: {e}")
+
+        if FEATURE_ONSETS in features:
+            try:
+                result.onsets = analyze_onsets(wav_path)
+            except Exception as e:
+                logger.warning(f"Onset detection failed: {e}")
+
+    finally:
+        # Clean up temporary WAV file
+        if wav_path and wav_path.exists():
+            wav_path.unlink()
+
+    return result
--- a/artdag/analysis/schema.py
+++ b/artdag/analysis/schema.py
@@ -0,0 +1,352 @@
+# artdag/analysis/schema.py
+"""
+Data structures for analysis results.
+
+Analysis extracts features from input media that inform downstream processing.
+Results are cached by: analysis_cache_id = SHA3-256(input_hash + sorted(features))
+"""
+
+import hashlib
+import json
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple
+
+
+def _stable_hash(data: Any, algorithm: str = "sha3_256") -> str:
+    """Create stable hash from arbitrary data."""
+    json_str = json.dumps(data, sort_keys=True, separators=(",", ":"))
+    hasher = hashlib.new(algorithm)
+    hasher.update(json_str.encode())
+    return hasher.hexdigest()
+
+
+@dataclass
+class BeatInfo:
+    """
+    Beat detection results.
+
+    Attributes:
+        beat_times: List of beat positions in seconds
+        tempo: Estimated tempo in BPM
+        confidence: Tempo detection confidence (0-1)
+        downbeat_times: First beat of each bar (if detected)
+        time_signature: Detected or assumed time signature (e.g., 4)
+    """
+    beat_times: List[float]
+    tempo: float
+    confidence: float = 1.0
+    downbeat_times: Optional[List[float]] = None
+    time_signature: int = 4
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "beat_times": self.beat_times,
+            "tempo": self.tempo,
+            "confidence": self.confidence,
+            "downbeat_times": self.downbeat_times,
+            "time_signature": self.time_signature,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "BeatInfo":
+        return cls(
+            beat_times=data["beat_times"],
+            tempo=data["tempo"],
+            confidence=data.get("confidence", 1.0),
+            downbeat_times=data.get("downbeat_times"),
+            time_signature=data.get("time_signature", 4),
+        )
+
+
+@dataclass
+class EnergyEnvelope:
+    """
+    Energy (loudness) over time.
+
+    Attributes:
+        times: Time points in seconds
+        values: Energy values (0-1, normalized)
+        window_ms: Analysis window size in milliseconds
+    """
+    times: List[float]
+    values: List[float]
+    window_ms: float = 50.0
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "times": self.times,
+            "values": self.values,
+            "window_ms": self.window_ms,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "EnergyEnvelope":
+        return cls(
+            times=data["times"],
+            values=data["values"],
+            window_ms=data.get("window_ms", 50.0),
+        )
+
+    def at_time(self, t: float) -> float:
+        """Interpolate energy value at given time."""
+        if not self.times:
+            return 0.0
+        if t <= self.times[0]:
+            return self.values[0]
+        if t >= self.times[-1]:
+            return self.values[-1]
+
+        # Binary search for bracketing indices
+        lo, hi = 0, len(self.times) - 1
+        while hi - lo > 1:
+            mid = (lo + hi) // 2
+            if self.times[mid] <= t:
+                lo = mid
+            else:
+                hi = mid
+
+        # Linear interpolation
+        t0, t1 = self.times[lo], self.times[hi]
+        v0, v1 = self.values[lo], self.values[hi]
+        alpha = (t - t0) / (t1 - t0) if t1 != t0 else 0
+        return v0 + alpha * (v1 - v0)
+
+
+@dataclass
+class SpectrumBands:
+    """
+    Frequency band envelopes over time.
+
+    Attributes:
+        bass: Low frequency envelope (20-200 Hz typical)
+        mid: Mid frequency envelope (200-2000 Hz typical)
+        high: High frequency envelope (2000-20000 Hz typical)
+        times: Time points in seconds
+        band_ranges: Frequency ranges for each band in Hz
+    """
+    bass: List[float]
+    mid: List[float]
+    high: List[float]
+    times: List[float]
+    band_ranges: Dict[str, Tuple[float, float]] = field(default_factory=lambda: {
+        "bass": (20, 200),
+        "mid": (200, 2000),
+        "high": (2000, 20000),
+    })
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "bass": self.bass,
+            "mid": self.mid,
+            "high": self.high,
+            "times": self.times,
+            "band_ranges": self.band_ranges,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "SpectrumBands":
+        return cls(
+            bass=data["bass"],
+            mid=data["mid"],
+            high=data["high"],
+            times=data["times"],
+            band_ranges=data.get("band_ranges", {
+                "bass": (20, 200),
+                "mid": (200, 2000),
+                "high": (2000, 20000),
+            }),
+        )
+
+
+@dataclass
+class AudioFeatures:
+    """
+    All extracted audio features.
+
+    Attributes:
+        duration: Audio duration in seconds
+        sample_rate: Sample rate in Hz
+        channels: Number of audio channels
+        beats: Beat detection results
+        energy: Energy envelope
+        spectrum: Frequency band envelopes
+        onsets: Note/sound onset times
+    """
+    duration: float
+    sample_rate: int
+    channels: int
+    beats: Optional[BeatInfo] = None
+    energy: Optional[EnergyEnvelope] = None
+    spectrum: Optional[SpectrumBands] = None
+    onsets: Optional[List[float]] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "duration": self.duration,
+            "sample_rate": self.sample_rate,
+            "channels": self.channels,
+            "beats": self.beats.to_dict() if self.beats else None,
+            "energy": self.energy.to_dict() if self.energy else None,
+            "spectrum": self.spectrum.to_dict() if self.spectrum else None,
+            "onsets": self.onsets,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "AudioFeatures":
+        return cls(
+            duration=data["duration"],
+            sample_rate=data["sample_rate"],
+            channels=data["channels"],
+            beats=BeatInfo.from_dict(data["beats"]) if data.get("beats") else None,
+            energy=EnergyEnvelope.from_dict(data["energy"]) if data.get("energy") else None,
+            spectrum=SpectrumBands.from_dict(data["spectrum"]) if data.get("spectrum") else None,
+            onsets=data.get("onsets"),
+        )
+
+
+@dataclass
+class VideoFeatures:
+    """
+    Extracted video features.
+
+    Attributes:
+        duration: Video duration in seconds
+        frame_rate: Frames per second
+        width: Frame width in pixels
+        height: Frame height in pixels
+        codec: Video codec name
+        motion_tempo: Estimated tempo from motion analysis (optional)
+        scene_changes: Times of detected scene changes
+    """
+    duration: float
+    frame_rate: float
+    width: int
+    height: int
+    codec: str = ""
+    motion_tempo: Optional[float] = None
+    scene_changes: Optional[List[float]] = None
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "duration": self.duration,
+            "frame_rate": self.frame_rate,
+            "width": self.width,
+            "height": self.height,
+            "codec": self.codec,
+            "motion_tempo": self.motion_tempo,
+            "scene_changes": self.scene_changes,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "VideoFeatures":
+        return cls(
+            duration=data["duration"],
+            frame_rate=data["frame_rate"],
+            width=data["width"],
+            height=data["height"],
+            codec=data.get("codec", ""),
+            motion_tempo=data.get("motion_tempo"),
+            scene_changes=data.get("scene_changes"),
+        )
+
+
+@dataclass
+class AnalysisResult:
+    """
+    Complete analysis result for an input.
+
+    Combines audio and video features with metadata for caching.
+
+    Attributes:
+        input_hash: Content hash of the analyzed input
+        features_requested: List of features that were requested
+        audio: Audio features (if input has audio)
+        video: Video features (if input has video)
+        cache_id: Computed cache ID for this analysis
+        analyzed_at: Timestamp of analysis
+    """
+    input_hash: str
+    features_requested: List[str]
+    audio: Optional[AudioFeatures] = None
+    video: Optional[VideoFeatures] = None
+    cache_id: Optional[str] = None
+    analyzed_at: Optional[str] = None
+
+    def __post_init__(self):
+        """Compute cache_id if not provided."""
+        if self.cache_id is None:
+            self.cache_id = self._compute_cache_id()
+
+    def _compute_cache_id(self) -> str:
+        """
+        Compute cache ID from input hash and requested features.
+
+        cache_id = SHA3-256(input_hash + sorted(features_requested))
+        """
+        content = {
+            "input_hash": self.input_hash,
+            "features": sorted(self.features_requested),
+        }
+        return _stable_hash(content)
+
+    def to_dict(self) -> Dict[str, Any]:
+        return {
+            "input_hash": self.input_hash,
+            "features_requested": self.features_requested,
+            "audio": self.audio.to_dict() if self.audio else None,
+            "video": self.video.to_dict() if self.video else None,
+            "cache_id": self.cache_id,
+            "analyzed_at": self.analyzed_at,
+        }
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, Any]) -> "AnalysisResult":
+        return cls(
+            input_hash=data["input_hash"],
+            features_requested=data["features_requested"],
+            audio=AudioFeatures.from_dict(data["audio"]) if data.get("audio") else None,
+            video=VideoFeatures.from_dict(data["video"]) if data.get("video") else None,
+            cache_id=data.get("cache_id"),
+            analyzed_at=data.get("analyzed_at"),
+        )
+
+    def to_json(self) -> str:
+        """Serialize to JSON string."""
+        return json.dumps(self.to_dict(), indent=2)
+
+    @classmethod
+    def from_json(cls, json_str: str) -> "AnalysisResult":
+        """Deserialize from JSON string."""
+        return cls.from_dict(json.loads(json_str))
+
+    # Convenience accessors
+    @property
+    def tempo(self) -> Optional[float]:
+        """Get tempo if beats were analyzed."""
+        return self.audio.beats.tempo if self.audio and self.audio.beats else None
+
+    @property
+    def beat_times(self) -> Optional[List[float]]:
+        """Get beat times if beats were analyzed."""
+        return self.audio.beats.beat_times if self.audio and self.audio.beats else None
+
+    @property
+    def downbeat_times(self) -> Optional[List[float]]:
+        """Get downbeat times if analyzed."""
+        return self.audio.beats.downbeat_times if self.audio and self.audio.beats else None
+
+    @property
+    def duration(self) -> float:
+        """Get duration from video or audio."""
+        if self.video:
+            return self.video.duration
+        if self.audio:
+            return self.audio.duration
+        return 0.0
+
+    @property
+    def dimensions(self) -> Optional[Tuple[int, int]]:
+        """Get video dimensions if available."""
+        if self.video:
+            return (self.video.width, self.video.height)
+        return None
--- a/artdag/analysis/video.py
+++ b/artdag/analysis/video.py
@@ -0,0 +1,266 @@
+# artdag/analysis/video.py
+"""
+Video feature extraction.
+
+Uses ffprobe for basic metadata and optional OpenCV for motion analysis.
+"""
+
+import json
+import logging
+import subprocess
+from fractions import Fraction
+from pathlib import Path
+from typing import List, Optional
+
+from .schema import VideoFeatures
+
+logger = logging.getLogger(__name__)
+
+# Feature names
+FEATURE_METADATA = "metadata"
+FEATURE_MOTION_TEMPO = "motion_tempo"
+FEATURE_SCENE_CHANGES = "scene_changes"
+FEATURE_ALL = "all"
+
+
+def _parse_frame_rate(rate_str: str) -> float:
+    """Parse frame rate string like '30000/1001' or '30'."""
+    try:
+        if "/" in rate_str:
+            frac = Fraction(rate_str)
+            return float(frac)
+        return float(rate_str)
+    except (ValueError, ZeroDivisionError):
+        return 30.0  # Default
+
+
+def analyze_metadata(path: Path) -> VideoFeatures:
+    """
+    Extract video metadata using ffprobe.
+
+    Args:
+        path: Path to video file
+
+    Returns:
+        VideoFeatures with basic metadata
+    """
+    cmd = [
+        "ffprobe", "-v", "quiet",
+        "-print_format", "json",
+        "-show_streams",
+        "-show_format",
+        "-select_streams", "v:0",
+        str(path)
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        data = json.loads(result.stdout)
+    except (subprocess.CalledProcessError, json.JSONDecodeError) as e:
+        raise ValueError(f"Could not read video info: {e}")
+
+    if not data.get("streams"):
+        raise ValueError("No video stream found")
+
+    stream = data["streams"][0]
+    fmt = data.get("format", {})
+
+    # Get duration from format or stream
+    duration = float(fmt.get("duration", stream.get("duration", 0)))
+
+    # Parse frame rate
+    frame_rate = _parse_frame_rate(stream.get("avg_frame_rate", "30"))
+
+    return VideoFeatures(
+        duration=duration,
+        frame_rate=frame_rate,
+        width=int(stream.get("width", 0)),
+        height=int(stream.get("height", 0)),
+        codec=stream.get("codec_name", ""),
+    )
+
+
+def analyze_scene_changes(path: Path, threshold: float = 0.3) -> List[float]:
+    """
+    Detect scene changes using ffmpeg scene detection.
+
+    Args:
+        path: Path to video file
+        threshold: Scene change threshold (0-1, lower = more sensitive)
+
+    Returns:
+        List of scene change times in seconds
+    """
+    cmd = [
+        "ffmpeg", "-i", str(path),
+        "-vf", f"select='gt(scene,{threshold})',showinfo",
+        "-f", "null", "-"
+    ]
+
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        stderr = result.stderr
+    except subprocess.CalledProcessError as e:
+        logger.warning(f"Scene detection failed: {e}")
+        return []
+
+    # Parse scene change times from ffmpeg output
+    scene_times = []
+    for line in stderr.split("\n"):
+        if "pts_time:" in line:
+            try:
+                # Extract pts_time value
+                for part in line.split():
+                    if part.startswith("pts_time:"):
+                        time_str = part.split(":")[1]
+                        scene_times.append(float(time_str))
+                        break
+            except (ValueError, IndexError):
+                continue
+
+    return scene_times
+
+
+def analyze_motion_tempo(path: Path, sample_duration: float = 30.0) -> Optional[float]:
+    """
+    Estimate tempo from video motion periodicity.
+
+    Analyzes optical flow or frame differences to detect rhythmic motion.
+    This is useful for matching video speed to audio tempo.
+
+    Args:
+        path: Path to video file
+        sample_duration: Duration to analyze (seconds)
+
+    Returns:
+        Estimated motion tempo in BPM, or None if not detectable
+    """
+    try:
+        import cv2
+        import numpy as np
+    except ImportError:
+        logger.warning("OpenCV not available, skipping motion tempo analysis")
+        return None
+
+    cap = cv2.VideoCapture(str(path))
+    if not cap.isOpened():
+        logger.warning(f"Could not open video: {path}")
+        return None
+
+    try:
+        fps = cap.get(cv2.CAP_PROP_FPS)
+        if fps <= 0:
+            fps = 30.0
+
+        max_frames = int(sample_duration * fps)
+        frame_diffs = []
+        prev_gray = None
+
+        frame_count = 0
+        while frame_count < max_frames:
+            ret, frame = cap.read()
+            if not ret:
+                break
+
+            # Convert to grayscale and resize for speed
+            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+            gray = cv2.resize(gray, (160, 90))
+
+            if prev_gray is not None:
+                # Calculate frame difference
+                diff = cv2.absdiff(gray, prev_gray)
+                frame_diffs.append(np.mean(diff))
+
+            prev_gray = gray
+            frame_count += 1
+
+        if len(frame_diffs) < 60:  # Need at least 2 seconds at 30fps
+            return None
+
+        # Convert to numpy array
+        motion = np.array(frame_diffs)
+
+        # Normalize
+        motion = motion - motion.mean()
+        if motion.std() > 0:
+            motion = motion / motion.std()
+
+        # Autocorrelation to find periodicity
+        n = len(motion)
+        acf = np.correlate(motion, motion, mode="full")[n-1:]
+        acf = acf / acf[0]  # Normalize
+
+        # Find peaks in autocorrelation (potential beat periods)
+        # Look for periods between 0.3s (200 BPM) and 2s (30 BPM)
+        min_lag = int(0.3 * fps)
+        max_lag = min(int(2.0 * fps), len(acf) - 1)
+
+        if max_lag <= min_lag:
+            return None
+
+        # Find the highest peak in the valid range
+        search_range = acf[min_lag:max_lag]
+        if len(search_range) == 0:
+            return None
+
+        peak_idx = np.argmax(search_range) + min_lag
+        peak_value = acf[peak_idx]
+
+        # Only report if peak is significant
+        if peak_value < 0.1:
+            return None
+
+        # Convert lag to BPM
+        period_seconds = peak_idx / fps
+        bpm = 60.0 / period_seconds
+
+        # Sanity check
+        if 30 <= bpm <= 200:
+            return round(bpm, 1)
+
+        return None
+
+    finally:
+        cap.release()
+
+
+def analyze_video(
+    path: Path,
+    features: Optional[List[str]] = None,
+) -> VideoFeatures:
+    """
+    Extract video features from file.
+
+    Args:
+        path: Path to video file
+        features: List of features to extract. Options:
+            - "metadata": Basic video info (always included)
+            - "motion_tempo": Estimated tempo from motion
+            - "scene_changes": Scene change detection
+            - "all": All features
+
+    Returns:
+        VideoFeatures with requested analysis
+    """
+    if features is None:
+        features = [FEATURE_METADATA]
+
+    if FEATURE_ALL in features:
+        features = [FEATURE_METADATA, FEATURE_MOTION_TEMPO, FEATURE_SCENE_CHANGES]
+
+    # Basic metadata is always extracted
+    result = analyze_metadata(path)
+
+    if FEATURE_MOTION_TEMPO in features:
+        try:
+            result.motion_tempo = analyze_motion_tempo(path)
+        except Exception as e:
+            logger.warning(f"Motion tempo analysis failed: {e}")
+
+    if FEATURE_SCENE_CHANGES in features:
+        try:
+            result.scene_changes = analyze_scene_changes(path)
+        except Exception as e:
+            logger.warning(f"Scene change detection failed: {e}")
+
+    return result